LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
20#include "RISCVSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
42#include "llvm/Support/Debug.h"
48#include <optional>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "riscv-lower"
53
54STATISTIC(NumTailCalls, "Number of tail calls");
55
57 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
58 cl::desc("Give the maximum size (in number of nodes) of the web of "
59 "instructions that we will consider for VW expansion"),
60 cl::init(18));
61
62static cl::opt<bool>
63 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
64 cl::desc("Allow the formation of VW_W operations (e.g., "
65 "VWADD_W) with splat constants"),
66 cl::init(false));
67
69 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
70 cl::desc("Set the minimum number of repetitions of a divisor to allow "
71 "transformation to multiplications by the reciprocal"),
72 cl::init(2));
73
74static cl::opt<int>
76 cl::desc("Give the maximum number of instructions that we will "
77 "use for creating a floating-point immediate value"),
78 cl::init(2));
79
81 const RISCVSubtarget &STI)
82 : TargetLowering(TM), Subtarget(STI) {
83
84 RISCVABI::ABI ABI = Subtarget.getTargetABI();
85 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
86
87 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
88 !Subtarget.hasStdExtF()) {
89 errs() << "Hard-float 'f' ABI can't be used for a target that "
90 "doesn't support the F instruction set extension (ignoring "
91 "target-abi)\n";
93 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
94 !Subtarget.hasStdExtD()) {
95 errs() << "Hard-float 'd' ABI can't be used for a target that "
96 "doesn't support the D instruction set extension (ignoring "
97 "target-abi)\n";
99 }
100
101 switch (ABI) {
102 default:
103 report_fatal_error("Don't know how to lower this ABI");
112 break;
113 }
114
115 MVT XLenVT = Subtarget.getXLenVT();
116
117 // Set up the register classes.
118 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
119
120 if (Subtarget.hasStdExtZfhmin())
121 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
122 if (Subtarget.hasStdExtZfbfmin())
123 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
124 if (Subtarget.hasStdExtF())
125 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
126 if (Subtarget.hasStdExtD())
127 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
128 if (Subtarget.hasStdExtZhinxmin())
129 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
130 if (Subtarget.hasStdExtZfinx())
131 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
132 if (Subtarget.hasStdExtZdinx()) {
133 if (Subtarget.is64Bit())
134 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
135 else
136 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
137 }
138
139 static const MVT::SimpleValueType BoolVecVTs[] = {
140 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
141 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
142 static const MVT::SimpleValueType IntVecVTs[] = {
143 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
144 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
145 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
146 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
147 MVT::nxv4i64, MVT::nxv8i64};
148 static const MVT::SimpleValueType F16VecVTs[] = {
149 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
150 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
151 static const MVT::SimpleValueType BF16VecVTs[] = {
152 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
153 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
154 static const MVT::SimpleValueType F32VecVTs[] = {
155 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
156 static const MVT::SimpleValueType F64VecVTs[] = {
157 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
158 static const MVT::SimpleValueType VecTupleVTs[] = {
159 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
160 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
161 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
162 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
163 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
164 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
165 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
166 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
167 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
168 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
169 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
170
171 if (Subtarget.hasVInstructions()) {
172 auto addRegClassForRVV = [this](MVT VT) {
173 // Disable the smallest fractional LMUL types if ELEN is less than
174 // RVVBitsPerBlock.
175 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
176 if (VT.getVectorMinNumElements() < MinElts)
177 return;
178
179 unsigned Size = VT.getSizeInBits().getKnownMinValue();
180 const TargetRegisterClass *RC;
182 RC = &RISCV::VRRegClass;
183 else if (Size == 2 * RISCV::RVVBitsPerBlock)
184 RC = &RISCV::VRM2RegClass;
185 else if (Size == 4 * RISCV::RVVBitsPerBlock)
186 RC = &RISCV::VRM4RegClass;
187 else if (Size == 8 * RISCV::RVVBitsPerBlock)
188 RC = &RISCV::VRM8RegClass;
189 else
190 llvm_unreachable("Unexpected size");
191
192 addRegisterClass(VT, RC);
193 };
194
195 for (MVT VT : BoolVecVTs)
196 addRegClassForRVV(VT);
197 for (MVT VT : IntVecVTs) {
198 if (VT.getVectorElementType() == MVT::i64 &&
199 !Subtarget.hasVInstructionsI64())
200 continue;
201 addRegClassForRVV(VT);
202 }
203
204 if (Subtarget.hasVInstructionsF16Minimal())
205 for (MVT VT : F16VecVTs)
206 addRegClassForRVV(VT);
207
208 if (Subtarget.hasVInstructionsBF16Minimal())
209 for (MVT VT : BF16VecVTs)
210 addRegClassForRVV(VT);
211
212 if (Subtarget.hasVInstructionsF32())
213 for (MVT VT : F32VecVTs)
214 addRegClassForRVV(VT);
215
216 if (Subtarget.hasVInstructionsF64())
217 for (MVT VT : F64VecVTs)
218 addRegClassForRVV(VT);
219
220 if (Subtarget.useRVVForFixedLengthVectors()) {
221 auto addRegClassForFixedVectors = [this](MVT VT) {
222 MVT ContainerVT = getContainerForFixedLengthVector(VT);
223 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
224 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
225 addRegisterClass(VT, TRI.getRegClass(RCID));
226 };
228 if (useRVVForFixedLengthVectorVT(VT))
229 addRegClassForFixedVectors(VT);
230
232 if (useRVVForFixedLengthVectorVT(VT))
233 addRegClassForFixedVectors(VT);
234 }
235
236 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
237 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
243 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
250 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
257 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
264 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
267 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
268 }
269
270 // Compute derived properties from the register classes.
272
274
276 MVT::i1, Promote);
277 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
279 MVT::i1, Promote);
280
281 // TODO: add all necessary setOperationAction calls.
283
288
293 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
296 }
297
299
302
303 if (!Subtarget.hasVendorXTHeadBb())
305
307
308 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
309 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
310 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
311
312 if (Subtarget.is64Bit()) {
314
317 MVT::i32, Custom);
319 if (!Subtarget.hasStdExtZbb())
322 Custom);
324 }
325 if (!Subtarget.hasStdExtZmmul()) {
327 } else if (Subtarget.is64Bit()) {
330 } else {
332 }
333
334 if (!Subtarget.hasStdExtM()) {
336 Expand);
337 } else if (Subtarget.is64Bit()) {
339 {MVT::i8, MVT::i16, MVT::i32}, Custom);
340 }
341
344 Expand);
345
347 Custom);
348
349 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
350 if (Subtarget.is64Bit())
352 } else if (Subtarget.hasVendorXTHeadBb()) {
353 if (Subtarget.is64Bit())
356 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
358 } else {
360 }
361
362 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
363 // pattern match it directly in isel.
365 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
366 Subtarget.hasVendorXTHeadBb())
367 ? Legal
368 : Expand);
369
370 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
372 } else {
373 // Zbkb can use rev8+brev8 to implement bitreverse.
375 Subtarget.hasStdExtZbkb() ? Custom : Expand);
376 }
377
378 if (Subtarget.hasStdExtZbb() ||
379 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
381 Legal);
382 }
383
384 if (Subtarget.hasStdExtZbb() ||
385 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
386 if (Subtarget.is64Bit())
388 } else {
390 }
391
392 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
393 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
394 // We need the custom lowering to make sure that the resulting sequence
395 // for the 32bit case is efficient on 64bit targets.
396 if (Subtarget.is64Bit())
398 } else {
400 }
401
402 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
404 } else if (Subtarget.hasShortForwardBranchOpt()) {
405 // We can use PseudoCCSUB to implement ABS.
407 } else if (Subtarget.is64Bit()) {
409 }
410
411 if (!Subtarget.hasVendorXTHeadCondMov())
413
414 static const unsigned FPLegalNodeTypes[] = {
422
423 static const ISD::CondCode FPCCToExpand[] = {
427
428 static const unsigned FPOpToExpand[] = {
430 ISD::FREM};
431
432 static const unsigned FPRndMode[] = {
435
436 static const unsigned ZfhminZfbfminPromoteOps[] = {
446
447 if (Subtarget.hasStdExtZfbfmin()) {
453 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
460 }
461
462 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
463 if (Subtarget.hasStdExtZfhOrZhinx()) {
464 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
465 setOperationAction(FPRndMode, MVT::f16,
466 Subtarget.hasStdExtZfa() ? Legal : Custom);
469 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 if (Subtarget.hasStdExtZfa())
472 } else {
473 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
478 setOperationAction(Op, MVT::f16, Custom);
484 }
485
487
490 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
494
496 ISD::FNEARBYINT, MVT::f16,
497 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
502 MVT::f16, Promote);
503
504 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
505 // complete support for all operations in LegalizeDAG.
510 MVT::f16, Promote);
511
512 // We need to custom promote this.
513 if (Subtarget.is64Bit())
515 }
516
517 if (Subtarget.hasStdExtFOrZfinx()) {
518 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
519 setOperationAction(FPRndMode, MVT::f32,
520 Subtarget.hasStdExtZfa() ? Legal : Custom);
521 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
525 setOperationAction(FPOpToExpand, MVT::f32, Expand);
526 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
527 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
528 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
529 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
533 Subtarget.isSoftFPABI() ? LibCall : Custom);
538
539 if (Subtarget.hasStdExtZfa()) {
543 } else {
545 }
546 }
547
548 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
550
551 if (Subtarget.hasStdExtDOrZdinx()) {
552 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
553
554 if (!Subtarget.is64Bit())
556
557 if (Subtarget.hasStdExtZfa()) {
559 setOperationAction(FPRndMode, MVT::f64, Legal);
562 } else {
563 if (Subtarget.is64Bit())
564 setOperationAction(FPRndMode, MVT::f64, Custom);
565
567 }
568
571 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
575 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
576 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
577 setOperationAction(FPOpToExpand, MVT::f64, Expand);
578 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
579 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
580 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
581 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
585 Subtarget.isSoftFPABI() ? LibCall : Custom);
590 }
591
592 if (Subtarget.is64Bit()) {
595 MVT::i32, Custom);
597 }
598
599 if (Subtarget.hasStdExtFOrZfinx()) {
601 Custom);
602
603 // f16/bf16 require custom handling.
605 Custom);
607 Custom);
608
611 }
612
615 XLenVT, Custom);
616
618
619 if (Subtarget.is64Bit())
621
622 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
623 // Unfortunately this can't be determined just from the ISA naming string.
625 Subtarget.is64Bit() ? Legal : Custom);
627 Subtarget.is64Bit() ? Legal : Custom);
628
629 if (Subtarget.is64Bit()) {
632 }
633
636 if (Subtarget.is64Bit())
638
639 if (Subtarget.hasStdExtZicbop()) {
641 }
642
643 if (Subtarget.hasStdExtA()) {
645 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
647 else
649 } else if (Subtarget.hasForcedAtomics()) {
651 } else {
653 }
654
656
658
659 if (getTargetMachine().getTargetTriple().isOSLinux()) {
660 // Custom lowering of llvm.clear_cache.
662 }
663
664 if (Subtarget.hasVInstructions()) {
666
668
669 // RVV intrinsics may have illegal operands.
670 // We also need to custom legalize vmv.x.s.
673 {MVT::i8, MVT::i16}, Custom);
674 if (Subtarget.is64Bit())
676 MVT::i32, Custom);
677 else
679 MVT::i64, Custom);
680
682 MVT::Other, Custom);
683
684 static const unsigned IntegerVPOps[] = {
685 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
686 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
687 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
688 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
689 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
690 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
691 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
692 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
693 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
694 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
695 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
696 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
697 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
698 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
699 ISD::EXPERIMENTAL_VP_SPLAT};
700
701 static const unsigned FloatingPointVPOps[] = {
702 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
703 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
704 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
705 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
706 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
707 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
708 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
709 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
710 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
711 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
712 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
713 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
714 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
715 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
716
717 static const unsigned IntegerVecReduceOps[] = {
721
722 static const unsigned FloatingPointVecReduceOps[] = {
725
726 static const unsigned FloatingPointLibCallOps[] = {
729
730 if (!Subtarget.is64Bit()) {
731 // We must custom-lower certain vXi64 operations on RV32 due to the vector
732 // element type being illegal.
734 MVT::i64, Custom);
735
736 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
737
738 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
739 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
740 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
741 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
742 MVT::i64, Custom);
743 }
744
745 for (MVT VT : BoolVecVTs) {
746 if (!isTypeLegal(VT))
747 continue;
748
750
751 // Mask VTs are custom-expanded into a series of standard nodes
755 VT, Custom);
756
758 Custom);
759
761 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
762 Expand);
763 setOperationAction(ISD::VP_MERGE, VT, Custom);
764
765 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
766 Custom);
767
768 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
769
772 Custom);
773
775 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
776 Custom);
777
778 // RVV has native int->float & float->int conversions where the
779 // element type sizes are within one power-of-two of each other. Any
780 // wider distances between type sizes have to be lowered as sequences
781 // which progressively narrow the gap in stages.
786 VT, Custom);
788 Custom);
789
790 // Expand all extending loads to types larger than this, and truncating
791 // stores from types larger than this.
793 setTruncStoreAction(VT, OtherVT, Expand);
795 OtherVT, Expand);
796 }
797
798 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
799 ISD::VP_TRUNCATE, ISD::VP_SETCC},
800 VT, Custom);
801
804
806
807 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
808 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
809
812 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
813 }
814
815 for (MVT VT : IntVecVTs) {
816 if (!isTypeLegal(VT))
817 continue;
818
821
822 // Vectors implement MULHS/MULHU.
824
825 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
826 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
828
830 Legal);
831
833
834 // Custom-lower extensions and truncations from/to mask types.
836 VT, Custom);
837
838 // RVV has native int->float & float->int conversions where the
839 // element type sizes are within one power-of-two of each other. Any
840 // wider distances between type sizes have to be lowered as sequences
841 // which progressively narrow the gap in stages.
846 VT, Custom);
848 Custom);
852 VT, Legal);
853
854 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
855 // nodes which truncate by one power of two at a time.
858 Custom);
859
860 // Custom-lower insert/extract operations to simplify patterns.
862 Custom);
863
864 // Custom-lower reduction operations to set up the corresponding custom
865 // nodes' operands.
866 setOperationAction(IntegerVecReduceOps, VT, Custom);
867
868 setOperationAction(IntegerVPOps, VT, Custom);
869
871
873 VT, Custom);
874
876 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
877 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
878 VT, Custom);
879
882 VT, Custom);
883
886
888
890 setTruncStoreAction(VT, OtherVT, Expand);
892 OtherVT, Expand);
893 }
894
897
898 // Splice
900
901 if (Subtarget.hasStdExtZvkb()) {
903 setOperationAction(ISD::VP_BSWAP, VT, Custom);
904 } else {
905 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
907 }
908
909 if (Subtarget.hasStdExtZvbb()) {
911 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
912 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
913 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
914 VT, Custom);
915 } else {
916 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
918 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
919 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
920 VT, Expand);
921
922 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
923 // range of f32.
924 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
925 if (isTypeLegal(FloatVT)) {
927 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
928 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
929 VT, Custom);
930 }
931 }
932
934 }
935
936 for (MVT VT : VecTupleVTs) {
937 if (!isTypeLegal(VT))
938 continue;
939
941 }
942
943 // Expand various CCs to best match the RVV ISA, which natively supports UNE
944 // but no other unordered comparisons, and supports all ordered comparisons
945 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
946 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
947 // and we pattern-match those back to the "original", swapping operands once
948 // more. This way we catch both operations and both "vf" and "fv" forms with
949 // fewer patterns.
950 static const ISD::CondCode VFPCCToExpand[] = {
954 };
955
956 // TODO: support more ops.
957 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
965
966 // TODO: support more vp ops.
967 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
968 ISD::VP_FADD,
969 ISD::VP_FSUB,
970 ISD::VP_FMUL,
971 ISD::VP_FDIV,
972 ISD::VP_FMA,
973 ISD::VP_REDUCE_FMIN,
974 ISD::VP_REDUCE_FMAX,
975 ISD::VP_SQRT,
976 ISD::VP_FMINNUM,
977 ISD::VP_FMAXNUM,
978 ISD::VP_FCEIL,
979 ISD::VP_FFLOOR,
980 ISD::VP_FROUND,
981 ISD::VP_FROUNDEVEN,
982 ISD::VP_FROUNDTOZERO,
983 ISD::VP_FRINT,
984 ISD::VP_FNEARBYINT,
985 ISD::VP_SETCC,
986 ISD::VP_FMINIMUM,
987 ISD::VP_FMAXIMUM,
988 ISD::VP_REDUCE_FMINIMUM,
989 ISD::VP_REDUCE_FMAXIMUM};
990
991 // Sets common operation actions on RVV floating-point vector types.
992 const auto SetCommonVFPActions = [&](MVT VT) {
994 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
995 // sizes are within one power-of-two of each other. Therefore conversions
996 // between vXf16 and vXf64 must be lowered as sequences which convert via
997 // vXf32.
1000 // Custom-lower insert/extract operations to simplify patterns.
1002 Custom);
1003 // Expand various condition codes (explained above).
1004 setCondCodeAction(VFPCCToExpand, VT, Expand);
1005
1008
1012 VT, Custom);
1013
1014 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1015
1016 // Expand FP operations that need libcalls.
1017 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1018
1020
1022
1024 VT, Custom);
1025
1027 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1028 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1029 VT, Custom);
1030
1033
1036 VT, Custom);
1037
1040
1042
1043 setOperationAction(FloatingPointVPOps, VT, Custom);
1044
1046 Custom);
1049 VT, Legal);
1054 VT, Custom);
1055
1057 };
1058
1059 // Sets common extload/truncstore actions on RVV floating-point vector
1060 // types.
1061 const auto SetCommonVFPExtLoadTruncStoreActions =
1062 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1063 for (auto SmallVT : SmallerVTs) {
1064 setTruncStoreAction(VT, SmallVT, Expand);
1065 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1066 }
1067 };
1068
1069 // Sets common actions for f16 and bf16 for when there's only
1070 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1071 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1074 Custom);
1075 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1076 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1077 Custom);
1079 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1085 VT, Custom);
1086 MVT EltVT = VT.getVectorElementType();
1087 if (isTypeLegal(EltVT))
1088 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1090 VT, Custom);
1091 else
1092 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1093 EltVT, Custom);
1095 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1096 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1097 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1098 ISD::VP_SCATTER},
1099 VT, Custom);
1100
1104
1105 // Expand FP operations that need libcalls.
1106 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1107
1108 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1109 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1110 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1111 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1112 } else {
1113 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1114 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1116 }
1117 };
1118
1119 if (Subtarget.hasVInstructionsF16()) {
1120 for (MVT VT : F16VecVTs) {
1121 if (!isTypeLegal(VT))
1122 continue;
1123 SetCommonVFPActions(VT);
1124 }
1125 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1126 for (MVT VT : F16VecVTs) {
1127 if (!isTypeLegal(VT))
1128 continue;
1129 SetCommonPromoteToF32Actions(VT);
1130 }
1131 }
1132
1133 if (Subtarget.hasVInstructionsBF16Minimal()) {
1134 for (MVT VT : BF16VecVTs) {
1135 if (!isTypeLegal(VT))
1136 continue;
1137 SetCommonPromoteToF32Actions(VT);
1138 }
1139 }
1140
1141 if (Subtarget.hasVInstructionsF32()) {
1142 for (MVT VT : F32VecVTs) {
1143 if (!isTypeLegal(VT))
1144 continue;
1145 SetCommonVFPActions(VT);
1146 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1148 }
1149 }
1150
1151 if (Subtarget.hasVInstructionsF64()) {
1152 for (MVT VT : F64VecVTs) {
1153 if (!isTypeLegal(VT))
1154 continue;
1155 SetCommonVFPActions(VT);
1156 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1159 }
1160 }
1161
1162 if (Subtarget.useRVVForFixedLengthVectors()) {
1164 if (!useRVVForFixedLengthVectorVT(VT))
1165 continue;
1166
1167 // By default everything must be expanded.
1168 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1171 setTruncStoreAction(VT, OtherVT, Expand);
1173 OtherVT, Expand);
1174 }
1175
1176 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1177 // expansion to a build_vector of 0s.
1179
1180 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1182 Custom);
1183
1186 Custom);
1187
1189 VT, Custom);
1190
1192
1194
1196
1198
1201 Custom);
1202
1204
1207 Custom);
1208
1210 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1211 Custom);
1212
1214 {
1223 },
1224 VT, Custom);
1226 Custom);
1227
1229
1230 // Operations below are different for between masks and other vectors.
1231 if (VT.getVectorElementType() == MVT::i1) {
1232 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1233 ISD::OR, ISD::XOR},
1234 VT, Custom);
1235
1236 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1237 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1238 VT, Custom);
1239
1240 setOperationAction(ISD::VP_MERGE, VT, Custom);
1241
1242 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1243 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1244 continue;
1245 }
1246
1247 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1248 // it before type legalization for i64 vectors on RV32. It will then be
1249 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1250 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1251 // improvements first.
1252 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1255 }
1256
1259
1260 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1261 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1262 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1263 ISD::VP_SCATTER},
1264 VT, Custom);
1265
1269 VT, Custom);
1270
1273
1275
1276 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1277 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1279
1283 VT, Custom);
1284
1286
1289
1290 // Custom-lower reduction operations to set up the corresponding custom
1291 // nodes' operands.
1295 VT, Custom);
1296
1297 setOperationAction(IntegerVPOps, VT, Custom);
1298
1299 if (Subtarget.hasStdExtZvkb())
1301
1302 if (Subtarget.hasStdExtZvbb()) {
1305 VT, Custom);
1306 } else {
1307 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1308 // range of f32.
1309 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1310 if (isTypeLegal(FloatVT))
1313 Custom);
1314 }
1315
1317 }
1318
1320 // There are no extending loads or truncating stores.
1321 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1322 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1323 setTruncStoreAction(VT, InnerVT, Expand);
1324 }
1325
1326 if (!useRVVForFixedLengthVectorVT(VT))
1327 continue;
1328
1329 // By default everything must be expanded.
1330 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1332
1333 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1334 // expansion to a build_vector of 0s.
1336
1341 VT, Custom);
1342
1345 VT, Custom);
1346 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1347 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1348 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1349 VT, Custom);
1350
1353 Custom);
1354
1355 if (VT.getVectorElementType() == MVT::f16 &&
1356 !Subtarget.hasVInstructionsF16()) {
1358 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1360 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1361 Custom);
1362 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1363 Custom);
1364 if (Subtarget.hasStdExtZfhmin()) {
1366 } else {
1367 // We need to custom legalize f16 build vectors if Zfhmin isn't
1368 // available.
1370 }
1374 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1375 // Don't promote f16 vector operations to f32 if f32 vector type is
1376 // not legal.
1377 // TODO: could split the f16 vector into two vectors and do promotion.
1378 if (!isTypeLegal(F32VecVT))
1379 continue;
1380 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1382 continue;
1383 }
1384
1385 if (VT.getVectorElementType() == MVT::bf16) {
1387 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1388 if (Subtarget.hasStdExtZfbfmin()) {
1390 } else {
1391 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1392 // available.
1394 }
1396 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1397 Custom);
1398 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1399 // Don't promote f16 vector operations to f32 if f32 vector type is
1400 // not legal.
1401 // TODO: could split the f16 vector into two vectors and do promotion.
1402 if (!isTypeLegal(F32VecVT))
1403 continue;
1404 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1405 // TODO: Promote VP ops to fp32.
1406 continue;
1407 }
1408
1410 Custom);
1411
1416 VT, Custom);
1417
1420 VT, Custom);
1421
1422 setCondCodeAction(VFPCCToExpand, VT, Expand);
1423
1426
1428
1429 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1430
1431 setOperationAction(FloatingPointVPOps, VT, Custom);
1432
1439 VT, Custom);
1440 }
1441
1442 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1443 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1444 if (Subtarget.is64Bit())
1446 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1448 if (Subtarget.hasStdExtZfbfmin())
1450 if (Subtarget.hasStdExtFOrZfinx())
1452 if (Subtarget.hasStdExtDOrZdinx())
1454 }
1455 }
1456
1457 if (Subtarget.hasStdExtA())
1459
1460 if (Subtarget.hasForcedAtomics()) {
1461 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1467 XLenVT, LibCall);
1468 }
1469
1470 if (Subtarget.hasVendorXTHeadMemIdx()) {
1471 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1472 setIndexedLoadAction(im, MVT::i8, Legal);
1473 setIndexedStoreAction(im, MVT::i8, Legal);
1474 setIndexedLoadAction(im, MVT::i16, Legal);
1475 setIndexedStoreAction(im, MVT::i16, Legal);
1476 setIndexedLoadAction(im, MVT::i32, Legal);
1477 setIndexedStoreAction(im, MVT::i32, Legal);
1478
1479 if (Subtarget.is64Bit()) {
1480 setIndexedLoadAction(im, MVT::i64, Legal);
1481 setIndexedStoreAction(im, MVT::i64, Legal);
1482 }
1483 }
1484 }
1485
1486 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1490
1494 }
1495
1496 // Function alignments.
1497 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1498 setMinFunctionAlignment(FunctionAlignment);
1499 // Set preferred alignments.
1502
1508
1509 if (Subtarget.hasStdExtFOrZfinx())
1511
1512 if (Subtarget.hasStdExtZbb())
1514
1515 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1516 Subtarget.hasVInstructions())
1518
1519 if (Subtarget.hasStdExtZbkb())
1521
1522 if (Subtarget.hasStdExtFOrZfinx())
1525 if (Subtarget.hasVInstructions())
1527 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1530 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1533 if (Subtarget.hasVendorXTHeadMemPair())
1535 if (Subtarget.useRVVForFixedLengthVectors())
1537
1538 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1539 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1540
1541 // Disable strict node mutation.
1542 IsStrictFPEnabled = true;
1543 EnableExtLdPromotion = true;
1544
1545 // Let the subtarget decide if a predictable select is more expensive than the
1546 // corresponding branch. This information is used in CGP/SelectOpt to decide
1547 // when to convert selects into branches.
1548 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1549
1550 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1551 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1552
1554 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1555 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1556
1558 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1559 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1560
1561 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1562 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1563}
1564
1566 LLVMContext &Context,
1567 EVT VT) const {
1568 if (!VT.isVector())
1569 return getPointerTy(DL);
1570 if (Subtarget.hasVInstructions() &&
1571 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1572 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1574}
1575
1576MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1577 return Subtarget.getXLenVT();
1578}
1579
1580// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1581bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1582 unsigned VF,
1583 bool IsScalable) const {
1584 if (!Subtarget.hasVInstructions())
1585 return true;
1586
1587 if (!IsScalable)
1588 return true;
1589
1590 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1591 return true;
1592
1593 // Don't allow VF=1 if those types are't legal.
1594 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1595 return true;
1596
1597 // VLEN=32 support is incomplete.
1598 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1599 return true;
1600
1601 // The maximum VF is for the smallest element width with LMUL=8.
1602 // VF must be a power of 2.
1603 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1604 return VF > MaxVF || !isPowerOf2_32(VF);
1605}
1606
1608 return !Subtarget.hasVInstructions() ||
1609 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1610}
1611
1613 const CallInst &I,
1614 MachineFunction &MF,
1615 unsigned Intrinsic) const {
1616 auto &DL = I.getDataLayout();
1617
1618 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1619 bool IsUnitStrided, bool UsePtrVal = false) {
1621 // We can't use ptrVal if the intrinsic can access memory before the
1622 // pointer. This means we can't use it for strided or indexed intrinsics.
1623 if (UsePtrVal)
1624 Info.ptrVal = I.getArgOperand(PtrOp);
1625 else
1626 Info.fallbackAddressSpace =
1627 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1628 Type *MemTy;
1629 if (IsStore) {
1630 // Store value is the first operand.
1631 MemTy = I.getArgOperand(0)->getType();
1632 } else {
1633 // Use return type. If it's segment load, return type is a struct.
1634 MemTy = I.getType();
1635 if (MemTy->isStructTy())
1636 MemTy = MemTy->getStructElementType(0);
1637 }
1638 if (!IsUnitStrided)
1639 MemTy = MemTy->getScalarType();
1640
1641 Info.memVT = getValueType(DL, MemTy);
1642 if (MemTy->isTargetExtTy()) {
1643 // RISC-V vector tuple type's alignment type should be its element type.
1644 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1645 MemTy = Type::getIntNTy(
1646 MemTy->getContext(),
1647 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1648 ->getZExtValue());
1649 Info.align = DL.getABITypeAlign(MemTy);
1650 } else {
1651 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1652 }
1654 Info.flags |=
1656 return true;
1657 };
1658
1659 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1661
1663 switch (Intrinsic) {
1664 default:
1665 return false;
1666 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1667 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1668 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1669 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1670 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1671 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1672 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1674 case Intrinsic::riscv_masked_cmpxchg_i32:
1676 Info.memVT = MVT::i32;
1677 Info.ptrVal = I.getArgOperand(0);
1678 Info.offset = 0;
1679 Info.align = Align(4);
1682 return true;
1683 case Intrinsic::riscv_seg2_load:
1684 case Intrinsic::riscv_seg3_load:
1685 case Intrinsic::riscv_seg4_load:
1686 case Intrinsic::riscv_seg5_load:
1687 case Intrinsic::riscv_seg6_load:
1688 case Intrinsic::riscv_seg7_load:
1689 case Intrinsic::riscv_seg8_load:
1690 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1691 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1692 case Intrinsic::riscv_seg2_store:
1693 case Intrinsic::riscv_seg3_store:
1694 case Intrinsic::riscv_seg4_store:
1695 case Intrinsic::riscv_seg5_store:
1696 case Intrinsic::riscv_seg6_store:
1697 case Intrinsic::riscv_seg7_store:
1698 case Intrinsic::riscv_seg8_store:
1699 // Operands are (vec, ..., vec, ptr, vl)
1700 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1701 /*IsStore*/ true,
1702 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1703 case Intrinsic::riscv_vle:
1704 case Intrinsic::riscv_vle_mask:
1705 case Intrinsic::riscv_vleff:
1706 case Intrinsic::riscv_vleff_mask:
1707 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1708 /*IsStore*/ false,
1709 /*IsUnitStrided*/ true,
1710 /*UsePtrVal*/ true);
1711 case Intrinsic::riscv_vse:
1712 case Intrinsic::riscv_vse_mask:
1713 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1714 /*IsStore*/ true,
1715 /*IsUnitStrided*/ true,
1716 /*UsePtrVal*/ true);
1717 case Intrinsic::riscv_vlse:
1718 case Intrinsic::riscv_vlse_mask:
1719 case Intrinsic::riscv_vloxei:
1720 case Intrinsic::riscv_vloxei_mask:
1721 case Intrinsic::riscv_vluxei:
1722 case Intrinsic::riscv_vluxei_mask:
1723 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1724 /*IsStore*/ false,
1725 /*IsUnitStrided*/ false);
1726 case Intrinsic::riscv_vsse:
1727 case Intrinsic::riscv_vsse_mask:
1728 case Intrinsic::riscv_vsoxei:
1729 case Intrinsic::riscv_vsoxei_mask:
1730 case Intrinsic::riscv_vsuxei:
1731 case Intrinsic::riscv_vsuxei_mask:
1732 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1733 /*IsStore*/ true,
1734 /*IsUnitStrided*/ false);
1735 case Intrinsic::riscv_vlseg2:
1736 case Intrinsic::riscv_vlseg3:
1737 case Intrinsic::riscv_vlseg4:
1738 case Intrinsic::riscv_vlseg5:
1739 case Intrinsic::riscv_vlseg6:
1740 case Intrinsic::riscv_vlseg7:
1741 case Intrinsic::riscv_vlseg8:
1742 case Intrinsic::riscv_vlseg2ff:
1743 case Intrinsic::riscv_vlseg3ff:
1744 case Intrinsic::riscv_vlseg4ff:
1745 case Intrinsic::riscv_vlseg5ff:
1746 case Intrinsic::riscv_vlseg6ff:
1747 case Intrinsic::riscv_vlseg7ff:
1748 case Intrinsic::riscv_vlseg8ff:
1749 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1750 /*IsStore*/ false,
1751 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1752 case Intrinsic::riscv_vlseg2_mask:
1753 case Intrinsic::riscv_vlseg3_mask:
1754 case Intrinsic::riscv_vlseg4_mask:
1755 case Intrinsic::riscv_vlseg5_mask:
1756 case Intrinsic::riscv_vlseg6_mask:
1757 case Intrinsic::riscv_vlseg7_mask:
1758 case Intrinsic::riscv_vlseg8_mask:
1759 case Intrinsic::riscv_vlseg2ff_mask:
1760 case Intrinsic::riscv_vlseg3ff_mask:
1761 case Intrinsic::riscv_vlseg4ff_mask:
1762 case Intrinsic::riscv_vlseg5ff_mask:
1763 case Intrinsic::riscv_vlseg6ff_mask:
1764 case Intrinsic::riscv_vlseg7ff_mask:
1765 case Intrinsic::riscv_vlseg8ff_mask:
1766 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1767 /*IsStore*/ false,
1768 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1769 case Intrinsic::riscv_vlsseg2:
1770 case Intrinsic::riscv_vlsseg3:
1771 case Intrinsic::riscv_vlsseg4:
1772 case Intrinsic::riscv_vlsseg5:
1773 case Intrinsic::riscv_vlsseg6:
1774 case Intrinsic::riscv_vlsseg7:
1775 case Intrinsic::riscv_vlsseg8:
1776 case Intrinsic::riscv_vloxseg2:
1777 case Intrinsic::riscv_vloxseg3:
1778 case Intrinsic::riscv_vloxseg4:
1779 case Intrinsic::riscv_vloxseg5:
1780 case Intrinsic::riscv_vloxseg6:
1781 case Intrinsic::riscv_vloxseg7:
1782 case Intrinsic::riscv_vloxseg8:
1783 case Intrinsic::riscv_vluxseg2:
1784 case Intrinsic::riscv_vluxseg3:
1785 case Intrinsic::riscv_vluxseg4:
1786 case Intrinsic::riscv_vluxseg5:
1787 case Intrinsic::riscv_vluxseg6:
1788 case Intrinsic::riscv_vluxseg7:
1789 case Intrinsic::riscv_vluxseg8:
1790 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1791 /*IsStore*/ false,
1792 /*IsUnitStrided*/ false);
1793 case Intrinsic::riscv_vlsseg2_mask:
1794 case Intrinsic::riscv_vlsseg3_mask:
1795 case Intrinsic::riscv_vlsseg4_mask:
1796 case Intrinsic::riscv_vlsseg5_mask:
1797 case Intrinsic::riscv_vlsseg6_mask:
1798 case Intrinsic::riscv_vlsseg7_mask:
1799 case Intrinsic::riscv_vlsseg8_mask:
1800 case Intrinsic::riscv_vloxseg2_mask:
1801 case Intrinsic::riscv_vloxseg3_mask:
1802 case Intrinsic::riscv_vloxseg4_mask:
1803 case Intrinsic::riscv_vloxseg5_mask:
1804 case Intrinsic::riscv_vloxseg6_mask:
1805 case Intrinsic::riscv_vloxseg7_mask:
1806 case Intrinsic::riscv_vloxseg8_mask:
1807 case Intrinsic::riscv_vluxseg2_mask:
1808 case Intrinsic::riscv_vluxseg3_mask:
1809 case Intrinsic::riscv_vluxseg4_mask:
1810 case Intrinsic::riscv_vluxseg5_mask:
1811 case Intrinsic::riscv_vluxseg6_mask:
1812 case Intrinsic::riscv_vluxseg7_mask:
1813 case Intrinsic::riscv_vluxseg8_mask:
1814 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1815 /*IsStore*/ false,
1816 /*IsUnitStrided*/ false);
1817 case Intrinsic::riscv_vsseg2:
1818 case Intrinsic::riscv_vsseg3:
1819 case Intrinsic::riscv_vsseg4:
1820 case Intrinsic::riscv_vsseg5:
1821 case Intrinsic::riscv_vsseg6:
1822 case Intrinsic::riscv_vsseg7:
1823 case Intrinsic::riscv_vsseg8:
1824 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1825 /*IsStore*/ true,
1826 /*IsUnitStrided*/ false);
1827 case Intrinsic::riscv_vsseg2_mask:
1828 case Intrinsic::riscv_vsseg3_mask:
1829 case Intrinsic::riscv_vsseg4_mask:
1830 case Intrinsic::riscv_vsseg5_mask:
1831 case Intrinsic::riscv_vsseg6_mask:
1832 case Intrinsic::riscv_vsseg7_mask:
1833 case Intrinsic::riscv_vsseg8_mask:
1834 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1835 /*IsStore*/ true,
1836 /*IsUnitStrided*/ false);
1837 case Intrinsic::riscv_vssseg2:
1838 case Intrinsic::riscv_vssseg3:
1839 case Intrinsic::riscv_vssseg4:
1840 case Intrinsic::riscv_vssseg5:
1841 case Intrinsic::riscv_vssseg6:
1842 case Intrinsic::riscv_vssseg7:
1843 case Intrinsic::riscv_vssseg8:
1844 case Intrinsic::riscv_vsoxseg2:
1845 case Intrinsic::riscv_vsoxseg3:
1846 case Intrinsic::riscv_vsoxseg4:
1847 case Intrinsic::riscv_vsoxseg5:
1848 case Intrinsic::riscv_vsoxseg6:
1849 case Intrinsic::riscv_vsoxseg7:
1850 case Intrinsic::riscv_vsoxseg8:
1851 case Intrinsic::riscv_vsuxseg2:
1852 case Intrinsic::riscv_vsuxseg3:
1853 case Intrinsic::riscv_vsuxseg4:
1854 case Intrinsic::riscv_vsuxseg5:
1855 case Intrinsic::riscv_vsuxseg6:
1856 case Intrinsic::riscv_vsuxseg7:
1857 case Intrinsic::riscv_vsuxseg8:
1858 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1859 /*IsStore*/ true,
1860 /*IsUnitStrided*/ false);
1861 case Intrinsic::riscv_vssseg2_mask:
1862 case Intrinsic::riscv_vssseg3_mask:
1863 case Intrinsic::riscv_vssseg4_mask:
1864 case Intrinsic::riscv_vssseg5_mask:
1865 case Intrinsic::riscv_vssseg6_mask:
1866 case Intrinsic::riscv_vssseg7_mask:
1867 case Intrinsic::riscv_vssseg8_mask:
1868 case Intrinsic::riscv_vsoxseg2_mask:
1869 case Intrinsic::riscv_vsoxseg3_mask:
1870 case Intrinsic::riscv_vsoxseg4_mask:
1871 case Intrinsic::riscv_vsoxseg5_mask:
1872 case Intrinsic::riscv_vsoxseg6_mask:
1873 case Intrinsic::riscv_vsoxseg7_mask:
1874 case Intrinsic::riscv_vsoxseg8_mask:
1875 case Intrinsic::riscv_vsuxseg2_mask:
1876 case Intrinsic::riscv_vsuxseg3_mask:
1877 case Intrinsic::riscv_vsuxseg4_mask:
1878 case Intrinsic::riscv_vsuxseg5_mask:
1879 case Intrinsic::riscv_vsuxseg6_mask:
1880 case Intrinsic::riscv_vsuxseg7_mask:
1881 case Intrinsic::riscv_vsuxseg8_mask:
1882 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1883 /*IsStore*/ true,
1884 /*IsUnitStrided*/ false);
1885 }
1886}
1887
1889 const AddrMode &AM, Type *Ty,
1890 unsigned AS,
1891 Instruction *I) const {
1892 // No global is ever allowed as a base.
1893 if (AM.BaseGV)
1894 return false;
1895
1896 // None of our addressing modes allows a scalable offset
1897 if (AM.ScalableOffset)
1898 return false;
1899
1900 // RVV instructions only support register addressing.
1901 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1902 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1903
1904 // Require a 12-bit signed offset.
1905 if (!isInt<12>(AM.BaseOffs))
1906 return false;
1907
1908 switch (AM.Scale) {
1909 case 0: // "r+i" or just "i", depending on HasBaseReg.
1910 break;
1911 case 1:
1912 if (!AM.HasBaseReg) // allow "r+i".
1913 break;
1914 return false; // disallow "r+r" or "r+r+i".
1915 default:
1916 return false;
1917 }
1918
1919 return true;
1920}
1921
1923 return isInt<12>(Imm);
1924}
1925
1927 return isInt<12>(Imm);
1928}
1929
1930// On RV32, 64-bit integers are split into their high and low parts and held
1931// in two different registers, so the trunc is free since the low register can
1932// just be used.
1933// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1934// isTruncateFree?
1936 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1937 return false;
1938 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1939 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1940 return (SrcBits == 64 && DestBits == 32);
1941}
1942
1944 // We consider i64->i32 free on RV64 since we have good selection of W
1945 // instructions that make promoting operations back to i64 free in many cases.
1946 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1947 !DstVT.isInteger())
1948 return false;
1949 unsigned SrcBits = SrcVT.getSizeInBits();
1950 unsigned DestBits = DstVT.getSizeInBits();
1951 return (SrcBits == 64 && DestBits == 32);
1952}
1953
1955 EVT SrcVT = Val.getValueType();
1956 // free truncate from vnsrl and vnsra
1957 if (Subtarget.hasVInstructions() &&
1958 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1959 SrcVT.isVector() && VT2.isVector()) {
1960 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1961 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1962 if (SrcBits == DestBits * 2) {
1963 return true;
1964 }
1965 }
1966 return TargetLowering::isTruncateFree(Val, VT2);
1967}
1968
1970 // Zexts are free if they can be combined with a load.
1971 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1972 // poorly with type legalization of compares preferring sext.
1973 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1974 EVT MemVT = LD->getMemoryVT();
1975 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1976 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1977 LD->getExtensionType() == ISD::ZEXTLOAD))
1978 return true;
1979 }
1980
1981 return TargetLowering::isZExtFree(Val, VT2);
1982}
1983
1985 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1986}
1987
1989 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1990}
1991
1993 return Subtarget.hasStdExtZbb() ||
1994 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1995}
1996
1998 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1999 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2000}
2001
2003 const Instruction &AndI) const {
2004 // We expect to be able to match a bit extraction instruction if the Zbs
2005 // extension is supported and the mask is a power of two. However, we
2006 // conservatively return false if the mask would fit in an ANDI instruction,
2007 // on the basis that it's possible the sinking+duplication of the AND in
2008 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2009 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2010 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2011 return false;
2012 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2013 if (!Mask)
2014 return false;
2015 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2016}
2017
2019 EVT VT = Y.getValueType();
2020
2021 // FIXME: Support vectors once we have tests.
2022 if (VT.isVector())
2023 return false;
2024
2025 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2026 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2027}
2028
2030 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2031 if (Subtarget.hasStdExtZbs())
2032 return X.getValueType().isScalarInteger();
2033 auto *C = dyn_cast<ConstantSDNode>(Y);
2034 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2035 if (Subtarget.hasVendorXTHeadBs())
2036 return C != nullptr;
2037 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2038 return C && C->getAPIntValue().ule(10);
2039}
2040
2042 EVT VT) const {
2043 // Only enable for rvv.
2044 if (!VT.isVector() || !Subtarget.hasVInstructions())
2045 return false;
2046
2047 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2048 return false;
2049
2050 return true;
2051}
2052
2054 Type *Ty) const {
2055 assert(Ty->isIntegerTy());
2056
2057 unsigned BitSize = Ty->getIntegerBitWidth();
2058 if (BitSize > Subtarget.getXLen())
2059 return false;
2060
2061 // Fast path, assume 32-bit immediates are cheap.
2062 int64_t Val = Imm.getSExtValue();
2063 if (isInt<32>(Val))
2064 return true;
2065
2066 // A constant pool entry may be more aligned thant he load we're trying to
2067 // replace. If we don't support unaligned scalar mem, prefer the constant
2068 // pool.
2069 // TODO: Can the caller pass down the alignment?
2070 if (!Subtarget.enableUnalignedScalarMem())
2071 return true;
2072
2073 // Prefer to keep the load if it would require many instructions.
2074 // This uses the same threshold we use for constant pools but doesn't
2075 // check useConstantPoolForLargeInts.
2076 // TODO: Should we keep the load only when we're definitely going to emit a
2077 // constant pool?
2078
2080 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2081}
2082
2086 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2087 SelectionDAG &DAG) const {
2088 // One interesting pattern that we'd want to form is 'bit extract':
2089 // ((1 >> Y) & 1) ==/!= 0
2090 // But we also need to be careful not to try to reverse that fold.
2091
2092 // Is this '((1 >> Y) & 1)'?
2093 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2094 return false; // Keep the 'bit extract' pattern.
2095
2096 // Will this be '((1 >> Y) & 1)' after the transform?
2097 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2098 return true; // Do form the 'bit extract' pattern.
2099
2100 // If 'X' is a constant, and we transform, then we will immediately
2101 // try to undo the fold, thus causing endless combine loop.
2102 // So only do the transform if X is not a constant. This matches the default
2103 // implementation of this function.
2104 return !XC;
2105}
2106
2108 unsigned Opc = VecOp.getOpcode();
2109
2110 // Assume target opcodes can't be scalarized.
2111 // TODO - do we have any exceptions?
2112 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2113 return false;
2114
2115 // If the vector op is not supported, try to convert to scalar.
2116 EVT VecVT = VecOp.getValueType();
2117 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2118 return true;
2119
2120 // If the vector op is supported, but the scalar op is not, the transform may
2121 // not be worthwhile.
2122 // Permit a vector binary operation can be converted to scalar binary
2123 // operation which is custom lowered with illegal type.
2124 EVT ScalarVT = VecVT.getScalarType();
2125 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2126 isOperationCustom(Opc, ScalarVT);
2127}
2128
2130 const GlobalAddressSDNode *GA) const {
2131 // In order to maximise the opportunity for common subexpression elimination,
2132 // keep a separate ADD node for the global address offset instead of folding
2133 // it in the global address node. Later peephole optimisations may choose to
2134 // fold it back in when profitable.
2135 return false;
2136}
2137
2138// Returns 0-31 if the fli instruction is available for the type and this is
2139// legal FP immediate for the type. Returns -1 otherwise.
2141 if (!Subtarget.hasStdExtZfa())
2142 return -1;
2143
2144 bool IsSupportedVT = false;
2145 if (VT == MVT::f16) {
2146 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2147 } else if (VT == MVT::f32) {
2148 IsSupportedVT = true;
2149 } else if (VT == MVT::f64) {
2150 assert(Subtarget.hasStdExtD() && "Expect D extension");
2151 IsSupportedVT = true;
2152 }
2153
2154 if (!IsSupportedVT)
2155 return -1;
2156
2157 return RISCVLoadFPImm::getLoadFPImm(Imm);
2158}
2159
2161 bool ForCodeSize) const {
2162 bool IsLegalVT = false;
2163 if (VT == MVT::f16)
2164 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2165 else if (VT == MVT::f32)
2166 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2167 else if (VT == MVT::f64)
2168 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2169 else if (VT == MVT::bf16)
2170 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2171
2172 if (!IsLegalVT)
2173 return false;
2174
2175 if (getLegalZfaFPImm(Imm, VT) >= 0)
2176 return true;
2177
2178 // Cannot create a 64 bit floating-point immediate value for rv32.
2179 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2180 // td can handle +0.0 or -0.0 already.
2181 // -0.0 can be created by fmv + fneg.
2182 return Imm.isZero();
2183 }
2184
2185 // Special case: fmv + fneg
2186 if (Imm.isNegZero())
2187 return true;
2188
2189 // Building an integer and then converting requires a fmv at the end of
2190 // the integer sequence. The fmv is not required for Zfinx.
2191 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2192 const int Cost =
2193 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2194 Subtarget.getXLen(), Subtarget);
2195 return Cost <= FPImmCost;
2196}
2197
2198// TODO: This is very conservative.
2200 unsigned Index) const {
2202 return false;
2203
2204 // Only support extracting a fixed from a fixed vector for now.
2205 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2206 return false;
2207
2208 EVT EltVT = ResVT.getVectorElementType();
2209 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2210
2211 // The smallest type we can slide is i8.
2212 // TODO: We can extract index 0 from a mask vector without a slide.
2213 if (EltVT == MVT::i1)
2214 return false;
2215
2216 unsigned ResElts = ResVT.getVectorNumElements();
2217 unsigned SrcElts = SrcVT.getVectorNumElements();
2218
2219 unsigned MinVLen = Subtarget.getRealMinVLen();
2220 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2221
2222 // If we're extracting only data from the first VLEN bits of the source
2223 // then we can always do this with an m1 vslidedown.vx. Restricting the
2224 // Index ensures we can use a vslidedown.vi.
2225 // TODO: We can generalize this when the exact VLEN is known.
2226 if (Index + ResElts <= MinVLMAX && Index < 31)
2227 return true;
2228
2229 // Convervatively only handle extracting half of a vector.
2230 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2231 // the upper half of a vector until we have more test coverage.
2232 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2233 // a cheap extract. However, this case is important in practice for
2234 // shuffled extracts of longer vectors. How resolve?
2235 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2236}
2237
2240 EVT VT) const {
2241 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2242 // We might still end up using a GPR but that will be decided based on ABI.
2243 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2244 !Subtarget.hasStdExtZfhminOrZhinxmin())
2245 return MVT::f32;
2246
2248
2249 return PartVT;
2250}
2251
2252unsigned
2254 std::optional<MVT> RegisterVT) const {
2255 // Pair inline assembly operand
2256 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2257 *RegisterVT == MVT::Untyped)
2258 return 1;
2259
2260 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2261}
2262
2265 EVT VT) const {
2266 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2267 // We might still end up using a GPR but that will be decided based on ABI.
2268 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2269 !Subtarget.hasStdExtZfhminOrZhinxmin())
2270 return 1;
2271
2273}
2274
2276 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2277 unsigned &NumIntermediates, MVT &RegisterVT) const {
2279 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2280
2281 return NumRegs;
2282}
2283
2284// Changes the condition code and swaps operands if necessary, so the SetCC
2285// operation matches one of the comparisons supported directly by branches
2286// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2287// with 1/-1.
2288static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2289 ISD::CondCode &CC, SelectionDAG &DAG) {
2290 // If this is a single bit test that can't be handled by ANDI, shift the
2291 // bit to be tested to the MSB and perform a signed compare with 0.
2292 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2293 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2294 isa<ConstantSDNode>(LHS.getOperand(1))) {
2295 uint64_t Mask = LHS.getConstantOperandVal(1);
2296 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2297 unsigned ShAmt = 0;
2298 if (isPowerOf2_64(Mask)) {
2300 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2301 } else {
2302 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2303 }
2304
2305 LHS = LHS.getOperand(0);
2306 if (ShAmt != 0)
2307 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2308 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2309 return;
2310 }
2311 }
2312
2313 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2314 int64_t C = RHSC->getSExtValue();
2315 switch (CC) {
2316 default: break;
2317 case ISD::SETGT:
2318 // Convert X > -1 to X >= 0.
2319 if (C == -1) {
2320 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2321 CC = ISD::SETGE;
2322 return;
2323 }
2324 break;
2325 case ISD::SETLT:
2326 // Convert X < 1 to 0 >= X.
2327 if (C == 1) {
2328 RHS = LHS;
2329 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2330 CC = ISD::SETGE;
2331 return;
2332 }
2333 break;
2334 }
2335 }
2336
2337 switch (CC) {
2338 default:
2339 break;
2340 case ISD::SETGT:
2341 case ISD::SETLE:
2342 case ISD::SETUGT:
2343 case ISD::SETULE:
2345 std::swap(LHS, RHS);
2346 break;
2347 }
2348}
2349
2351 if (VT.isRISCVVectorTuple()) {
2352 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2353 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2354 return RISCVII::LMUL_F8;
2355 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2356 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2357 return RISCVII::LMUL_F4;
2358 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2359 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2360 return RISCVII::LMUL_F2;
2361 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2362 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2363 return RISCVII::LMUL_1;
2364 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2365 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2366 return RISCVII::LMUL_2;
2367 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2368 return RISCVII::LMUL_4;
2369 llvm_unreachable("Invalid vector tuple type LMUL.");
2370 }
2371
2372 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2373 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2374 if (VT.getVectorElementType() == MVT::i1)
2375 KnownSize *= 8;
2376
2377 switch (KnownSize) {
2378 default:
2379 llvm_unreachable("Invalid LMUL.");
2380 case 8:
2382 case 16:
2384 case 32:
2386 case 64:
2388 case 128:
2390 case 256:
2392 case 512:
2394 }
2395}
2396
2398 switch (LMul) {
2399 default:
2400 llvm_unreachable("Invalid LMUL.");
2405 return RISCV::VRRegClassID;
2407 return RISCV::VRM2RegClassID;
2409 return RISCV::VRM4RegClassID;
2411 return RISCV::VRM8RegClassID;
2412 }
2413}
2414
2415unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2416 RISCVII::VLMUL LMUL = getLMUL(VT);
2417 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2418 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2419 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2420 LMUL == RISCVII::VLMUL::LMUL_1) {
2421 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2422 "Unexpected subreg numbering");
2423 return RISCV::sub_vrm1_0 + Index;
2424 }
2425 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2426 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2427 "Unexpected subreg numbering");
2428 return RISCV::sub_vrm2_0 + Index;
2429 }
2430 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2431 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2432 "Unexpected subreg numbering");
2433 return RISCV::sub_vrm4_0 + Index;
2434 }
2435 llvm_unreachable("Invalid vector type.");
2436}
2437
2439 if (VT.isRISCVVectorTuple()) {
2440 unsigned NF = VT.getRISCVVectorTupleNumFields();
2441 unsigned RegsPerField =
2442 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2443 (NF * RISCV::RVVBitsPerBlock));
2444 switch (RegsPerField) {
2445 case 1:
2446 if (NF == 2)
2447 return RISCV::VRN2M1RegClassID;
2448 if (NF == 3)
2449 return RISCV::VRN3M1RegClassID;
2450 if (NF == 4)
2451 return RISCV::VRN4M1RegClassID;
2452 if (NF == 5)
2453 return RISCV::VRN5M1RegClassID;
2454 if (NF == 6)
2455 return RISCV::VRN6M1RegClassID;
2456 if (NF == 7)
2457 return RISCV::VRN7M1RegClassID;
2458 if (NF == 8)
2459 return RISCV::VRN8M1RegClassID;
2460 break;
2461 case 2:
2462 if (NF == 2)
2463 return RISCV::VRN2M2RegClassID;
2464 if (NF == 3)
2465 return RISCV::VRN3M2RegClassID;
2466 if (NF == 4)
2467 return RISCV::VRN4M2RegClassID;
2468 break;
2469 case 4:
2470 assert(NF == 2);
2471 return RISCV::VRN2M4RegClassID;
2472 default:
2473 break;
2474 }
2475 llvm_unreachable("Invalid vector tuple type RegClass.");
2476 }
2477
2478 if (VT.getVectorElementType() == MVT::i1)
2479 return RISCV::VRRegClassID;
2480 return getRegClassIDForLMUL(getLMUL(VT));
2481}
2482
2483// Attempt to decompose a subvector insert/extract between VecVT and
2484// SubVecVT via subregister indices. Returns the subregister index that
2485// can perform the subvector insert/extract with the given element index, as
2486// well as the index corresponding to any leftover subvectors that must be
2487// further inserted/extracted within the register class for SubVecVT.
2488std::pair<unsigned, unsigned>
2490 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2491 const RISCVRegisterInfo *TRI) {
2492 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2493 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2494 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2495 "Register classes not ordered");
2496 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2497 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2498
2499 // If VecVT is a vector tuple type, either it's the tuple type with same
2500 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2501 if (VecVT.isRISCVVectorTuple()) {
2502 if (VecRegClassID == SubRegClassID)
2503 return {RISCV::NoSubRegister, 0};
2504
2505 assert(SubVecVT.isScalableVector() &&
2506 "Only allow scalable vector subvector.");
2507 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2508 "Invalid vector tuple insert/extract for vector and subvector with "
2509 "different LMUL.");
2510 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2511 }
2512
2513 // Try to compose a subregister index that takes us from the incoming
2514 // LMUL>1 register class down to the outgoing one. At each step we half
2515 // the LMUL:
2516 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2517 // Note that this is not guaranteed to find a subregister index, such as
2518 // when we are extracting from one VR type to another.
2519 unsigned SubRegIdx = RISCV::NoSubRegister;
2520 for (const unsigned RCID :
2521 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2522 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2523 VecVT = VecVT.getHalfNumVectorElementsVT();
2524 bool IsHi =
2525 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2526 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2527 getSubregIndexByMVT(VecVT, IsHi));
2528 if (IsHi)
2529 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2530 }
2531 return {SubRegIdx, InsertExtractIdx};
2532}
2533
2534// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2535// stores for those types.
2536bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2537 return !Subtarget.useRVVForFixedLengthVectors() ||
2538 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2539}
2540
2542 if (!ScalarTy.isSimple())
2543 return false;
2544 switch (ScalarTy.getSimpleVT().SimpleTy) {
2545 case MVT::iPTR:
2546 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2547 case MVT::i8:
2548 case MVT::i16:
2549 case MVT::i32:
2550 return true;
2551 case MVT::i64:
2552 return Subtarget.hasVInstructionsI64();
2553 case MVT::f16:
2554 return Subtarget.hasVInstructionsF16Minimal();
2555 case MVT::bf16:
2556 return Subtarget.hasVInstructionsBF16Minimal();
2557 case MVT::f32:
2558 return Subtarget.hasVInstructionsF32();
2559 case MVT::f64:
2560 return Subtarget.hasVInstructionsF64();
2561 default:
2562 return false;
2563 }
2564}
2565
2566
2567unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2568 return NumRepeatedDivisors;
2569}
2570
2572 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2573 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2574 "Unexpected opcode");
2575 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2576 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2578 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2579 if (!II)
2580 return SDValue();
2581 return Op.getOperand(II->VLOperand + 1 + HasChain);
2582}
2583
2585 const RISCVSubtarget &Subtarget) {
2586 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2587 if (!Subtarget.useRVVForFixedLengthVectors())
2588 return false;
2589
2590 // We only support a set of vector types with a consistent maximum fixed size
2591 // across all supported vector element types to avoid legalization issues.
2592 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2593 // fixed-length vector type we support is 1024 bytes.
2594 if (VT.getFixedSizeInBits() > 1024 * 8)
2595 return false;
2596
2597 unsigned MinVLen = Subtarget.getRealMinVLen();
2598
2599 MVT EltVT = VT.getVectorElementType();
2600
2601 // Don't use RVV for vectors we cannot scalarize if required.
2602 switch (EltVT.SimpleTy) {
2603 // i1 is supported but has different rules.
2604 default:
2605 return false;
2606 case MVT::i1:
2607 // Masks can only use a single register.
2608 if (VT.getVectorNumElements() > MinVLen)
2609 return false;
2610 MinVLen /= 8;
2611 break;
2612 case MVT::i8:
2613 case MVT::i16:
2614 case MVT::i32:
2615 break;
2616 case MVT::i64:
2617 if (!Subtarget.hasVInstructionsI64())
2618 return false;
2619 break;
2620 case MVT::f16:
2621 if (!Subtarget.hasVInstructionsF16Minimal())
2622 return false;
2623 break;
2624 case MVT::bf16:
2625 if (!Subtarget.hasVInstructionsBF16Minimal())
2626 return false;
2627 break;
2628 case MVT::f32:
2629 if (!Subtarget.hasVInstructionsF32())
2630 return false;
2631 break;
2632 case MVT::f64:
2633 if (!Subtarget.hasVInstructionsF64())
2634 return false;
2635 break;
2636 }
2637
2638 // Reject elements larger than ELEN.
2639 if (EltVT.getSizeInBits() > Subtarget.getELen())
2640 return false;
2641
2642 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2643 // Don't use RVV for types that don't fit.
2644 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2645 return false;
2646
2647 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2648 // the base fixed length RVV support in place.
2649 if (!VT.isPow2VectorType())
2650 return false;
2651
2652 return true;
2653}
2654
2655bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2656 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2657}
2658
2659// Return the largest legal scalable vector type that matches VT's element type.
2661 const RISCVSubtarget &Subtarget) {
2662 // This may be called before legal types are setup.
2663 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2664 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2665 "Expected legal fixed length vector!");
2666
2667 unsigned MinVLen = Subtarget.getRealMinVLen();
2668 unsigned MaxELen = Subtarget.getELen();
2669
2670 MVT EltVT = VT.getVectorElementType();
2671 switch (EltVT.SimpleTy) {
2672 default:
2673 llvm_unreachable("unexpected element type for RVV container");
2674 case MVT::i1:
2675 case MVT::i8:
2676 case MVT::i16:
2677 case MVT::i32:
2678 case MVT::i64:
2679 case MVT::bf16:
2680 case MVT::f16:
2681 case MVT::f32:
2682 case MVT::f64: {
2683 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2684 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2685 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2686 unsigned NumElts =
2688 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2689 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2690 return MVT::getScalableVectorVT(EltVT, NumElts);
2691 }
2692 }
2693}
2694
2696 const RISCVSubtarget &Subtarget) {
2698 Subtarget);
2699}
2700
2702 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2703}
2704
2705// Grow V to consume an entire RVV register.
2707 const RISCVSubtarget &Subtarget) {
2708 assert(VT.isScalableVector() &&
2709 "Expected to convert into a scalable vector!");
2710 assert(V.getValueType().isFixedLengthVector() &&
2711 "Expected a fixed length vector operand!");
2712 SDLoc DL(V);
2713 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2714 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2715}
2716
2717// Shrink V so it's just big enough to maintain a VT's worth of data.
2719 const RISCVSubtarget &Subtarget) {
2721 "Expected to convert into a fixed length vector!");
2722 assert(V.getValueType().isScalableVector() &&
2723 "Expected a scalable vector operand!");
2724 SDLoc DL(V);
2725 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2726 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2727}
2728
2729/// Return the type of the mask type suitable for masking the provided
2730/// vector type. This is simply an i1 element type vector of the same
2731/// (possibly scalable) length.
2732static MVT getMaskTypeFor(MVT VecVT) {
2733 assert(VecVT.isVector());
2735 return MVT::getVectorVT(MVT::i1, EC);
2736}
2737
2738/// Creates an all ones mask suitable for masking a vector of type VecTy with
2739/// vector length VL. .
2740static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2741 SelectionDAG &DAG) {
2742 MVT MaskVT = getMaskTypeFor(VecVT);
2743 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2744}
2745
2746static std::pair<SDValue, SDValue>
2748 const RISCVSubtarget &Subtarget) {
2749 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2750 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2751 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2752 return {Mask, VL};
2753}
2754
2755static std::pair<SDValue, SDValue>
2756getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2757 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2758 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2759 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2760 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2761 return {Mask, VL};
2762}
2763
2764// Gets the two common "VL" operands: an all-ones mask and the vector length.
2765// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2766// the vector type that the fixed-length vector is contained in. Otherwise if
2767// VecVT is scalable, then ContainerVT should be the same as VecVT.
2768static std::pair<SDValue, SDValue>
2769getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2770 const RISCVSubtarget &Subtarget) {
2771 if (VecVT.isFixedLengthVector())
2772 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2773 Subtarget);
2774 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2775 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2776}
2777
2779 SelectionDAG &DAG) const {
2780 assert(VecVT.isScalableVector() && "Expected scalable vector");
2781 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2782 VecVT.getVectorElementCount());
2783}
2784
2785std::pair<unsigned, unsigned>
2787 const RISCVSubtarget &Subtarget) {
2788 assert(VecVT.isScalableVector() && "Expected scalable vector");
2789
2790 unsigned EltSize = VecVT.getScalarSizeInBits();
2791 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2792
2793 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2794 unsigned MaxVLMAX =
2795 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2796
2797 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2798 unsigned MinVLMAX =
2799 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2800
2801 return std::make_pair(MinVLMAX, MaxVLMAX);
2802}
2803
2804// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2805// of either is (currently) supported. This can get us into an infinite loop
2806// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2807// as a ..., etc.
2808// Until either (or both) of these can reliably lower any node, reporting that
2809// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2810// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2811// which is not desirable.
2813 EVT VT, unsigned DefinedValues) const {
2814 return false;
2815}
2816
2818 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2819 // implementation-defined.
2820 if (!VT.isVector())
2822 unsigned DLenFactor = Subtarget.getDLenFactor();
2823 unsigned Cost;
2824 if (VT.isScalableVector()) {
2825 unsigned LMul;
2826 bool Fractional;
2827 std::tie(LMul, Fractional) =
2829 if (Fractional)
2830 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2831 else
2832 Cost = (LMul * DLenFactor);
2833 } else {
2834 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2835 }
2836 return Cost;
2837}
2838
2839
2840/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2841/// is generally quadratic in the number of vreg implied by LMUL. Note that
2842/// operand (index and possibly mask) are handled separately.
2844 return getLMULCost(VT) * getLMULCost(VT);
2845}
2846
2847/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2848/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2849/// or may track the vrgather.vv cost. It is implementation-dependent.
2851 return getLMULCost(VT);
2852}
2853
2854/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2855/// for the type VT. (This does not cover the vslide1up or vslide1down
2856/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2857/// or may track the vrgather.vv cost. It is implementation-dependent.
2859 return getLMULCost(VT);
2860}
2861
2862/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2863/// for the type VT. (This does not cover the vslide1up or vslide1down
2864/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2865/// or may track the vrgather.vv cost. It is implementation-dependent.
2867 return getLMULCost(VT);
2868}
2869
2871 const RISCVSubtarget &Subtarget) {
2872 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2873 // bf16 conversions are always promoted to f32.
2874 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2875 Op.getValueType() == MVT::bf16) {
2876 bool IsStrict = Op->isStrictFPOpcode();
2877
2878 SDLoc DL(Op);
2879 if (IsStrict) {
2880 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2881 {Op.getOperand(0), Op.getOperand(1)});
2882 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2883 {Op.getValueType(), MVT::Other},
2884 {Val.getValue(1), Val.getValue(0),
2885 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2886 }
2887 return DAG.getNode(
2888 ISD::FP_ROUND, DL, Op.getValueType(),
2889 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2890 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2891 }
2892
2893 // Other operations are legal.
2894 return Op;
2895}
2896
2898 const RISCVSubtarget &Subtarget) {
2899 // RISC-V FP-to-int conversions saturate to the destination register size, but
2900 // don't produce 0 for nan. We can use a conversion instruction and fix the
2901 // nan case with a compare and a select.
2902 SDValue Src = Op.getOperand(0);
2903
2904 MVT DstVT = Op.getSimpleValueType();
2905 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2906
2907 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2908
2909 if (!DstVT.isVector()) {
2910 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2911 // the result.
2912 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2913 Src.getValueType() == MVT::bf16) {
2914 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2915 }
2916
2917 unsigned Opc;
2918 if (SatVT == DstVT)
2919 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2920 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2922 else
2923 return SDValue();
2924 // FIXME: Support other SatVTs by clamping before or after the conversion.
2925
2926 SDLoc DL(Op);
2927 SDValue FpToInt = DAG.getNode(
2928 Opc, DL, DstVT, Src,
2930
2931 if (Opc == RISCVISD::FCVT_WU_RV64)
2932 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2933
2934 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2935 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2937 }
2938
2939 // Vectors.
2940
2941 MVT DstEltVT = DstVT.getVectorElementType();
2942 MVT SrcVT = Src.getSimpleValueType();
2943 MVT SrcEltVT = SrcVT.getVectorElementType();
2944 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2945 unsigned DstEltSize = DstEltVT.getSizeInBits();
2946
2947 // Only handle saturating to the destination type.
2948 if (SatVT != DstEltVT)
2949 return SDValue();
2950
2951 MVT DstContainerVT = DstVT;
2952 MVT SrcContainerVT = SrcVT;
2953 if (DstVT.isFixedLengthVector()) {
2954 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2955 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2956 assert(DstContainerVT.getVectorElementCount() ==
2957 SrcContainerVT.getVectorElementCount() &&
2958 "Expected same element count");
2959 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2960 }
2961
2962 SDLoc DL(Op);
2963
2964 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2965
2966 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2967 {Src, Src, DAG.getCondCode(ISD::SETNE),
2968 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2969
2970 // Need to widen by more than 1 step, promote the FP type, then do a widening
2971 // convert.
2972 if (DstEltSize > (2 * SrcEltSize)) {
2973 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2974 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2975 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2976 }
2977
2978 MVT CvtContainerVT = DstContainerVT;
2979 MVT CvtEltVT = DstEltVT;
2980 if (SrcEltSize > (2 * DstEltSize)) {
2981 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2982 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2983 }
2984
2985 unsigned RVVOpc =
2987 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2988
2989 while (CvtContainerVT != DstContainerVT) {
2990 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2991 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2992 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2993 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2995 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2996 }
2997
2998 SDValue SplatZero = DAG.getNode(
2999 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3000 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3001 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3002 Res, DAG.getUNDEF(DstContainerVT), VL);
3003
3004 if (DstVT.isFixedLengthVector())
3005 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3006
3007 return Res;
3008}
3009
3011 const RISCVSubtarget &Subtarget) {
3012 bool IsStrict = Op->isStrictFPOpcode();
3013 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3014
3015 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3016 // bf16 conversions are always promoted to f32.
3017 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3018 SrcVal.getValueType() == MVT::bf16) {
3019 SDLoc DL(Op);
3020 if (IsStrict) {
3021 SDValue Ext =
3022 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3023 {Op.getOperand(0), SrcVal});
3024 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3025 {Ext.getValue(1), Ext.getValue(0)});
3026 }
3027 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3028 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3029 }
3030
3031 // Other operations are legal.
3032 return Op;
3033}
3034
3036 switch (Opc) {
3037 case ISD::FROUNDEVEN:
3039 case ISD::VP_FROUNDEVEN:
3040 return RISCVFPRndMode::RNE;
3041 case ISD::FTRUNC:
3042 case ISD::STRICT_FTRUNC:
3043 case ISD::VP_FROUNDTOZERO:
3044 return RISCVFPRndMode::RTZ;
3045 case ISD::FFLOOR:
3046 case ISD::STRICT_FFLOOR:
3047 case ISD::VP_FFLOOR:
3048 return RISCVFPRndMode::RDN;
3049 case ISD::FCEIL:
3050 case ISD::STRICT_FCEIL:
3051 case ISD::VP_FCEIL:
3052 return RISCVFPRndMode::RUP;
3053 case ISD::FROUND:
3054 case ISD::STRICT_FROUND:
3055 case ISD::VP_FROUND:
3056 return RISCVFPRndMode::RMM;
3057 case ISD::FRINT:
3058 case ISD::VP_FRINT:
3059 return RISCVFPRndMode::DYN;
3060 }
3061
3063}
3064
3065// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3066// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3067// the integer domain and back. Taking care to avoid converting values that are
3068// nan or already correct.
3069static SDValue
3071 const RISCVSubtarget &Subtarget) {
3072 MVT VT = Op.getSimpleValueType();
3073 assert(VT.isVector() && "Unexpected type");
3074
3075 SDLoc DL(Op);
3076
3077 SDValue Src = Op.getOperand(0);
3078
3079 MVT ContainerVT = VT;
3080 if (VT.isFixedLengthVector()) {
3081 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3082 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3083 }
3084
3085 SDValue Mask, VL;
3086 if (Op->isVPOpcode()) {
3087 Mask = Op.getOperand(1);
3088 if (VT.isFixedLengthVector())
3089 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3090 Subtarget);
3091 VL = Op.getOperand(2);
3092 } else {
3093 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3094 }
3095
3096 // Freeze the source since we are increasing the number of uses.
3097 Src = DAG.getFreeze(Src);
3098
3099 // We do the conversion on the absolute value and fix the sign at the end.
3100 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3101
3102 // Determine the largest integer that can be represented exactly. This and
3103 // values larger than it don't have any fractional bits so don't need to
3104 // be converted.
3105 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3106 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3107 APFloat MaxVal = APFloat(FltSem);
3108 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3109 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3110 SDValue MaxValNode =
3111 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3112 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3113 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3114
3115 // If abs(Src) was larger than MaxVal or nan, keep it.
3116 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3117 Mask =
3118 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3119 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3120 Mask, Mask, VL});
3121
3122 // Truncate to integer and convert back to FP.
3123 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3124 MVT XLenVT = Subtarget.getXLenVT();
3125 SDValue Truncated;
3126
3127 switch (Op.getOpcode()) {
3128 default:
3129 llvm_unreachable("Unexpected opcode");
3130 case ISD::FRINT:
3131 case ISD::VP_FRINT:
3132 case ISD::FCEIL:
3133 case ISD::VP_FCEIL:
3134 case ISD::FFLOOR:
3135 case ISD::VP_FFLOOR:
3136 case ISD::FROUND:
3137 case ISD::FROUNDEVEN:
3138 case ISD::VP_FROUND:
3139 case ISD::VP_FROUNDEVEN:
3140 case ISD::VP_FROUNDTOZERO: {
3143 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3144 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3145 break;
3146 }
3147 case ISD::FTRUNC:
3148 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3149 Mask, VL);
3150 break;
3151 case ISD::FNEARBYINT:
3152 case ISD::VP_FNEARBYINT:
3153 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3154 Mask, VL);
3155 break;
3156 }
3157
3158 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3159 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3160 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3161 Mask, VL);
3162
3163 // Restore the original sign so that -0.0 is preserved.
3164 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3165 Src, Src, Mask, VL);
3166
3167 if (!VT.isFixedLengthVector())
3168 return Truncated;
3169
3170 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3171}
3172
3173// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3174// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3175// qNan and coverting the new source to integer and back to FP.
3176static SDValue
3178 const RISCVSubtarget &Subtarget) {
3179 SDLoc DL(Op);
3180 MVT VT = Op.getSimpleValueType();
3181 SDValue Chain = Op.getOperand(0);
3182 SDValue Src = Op.getOperand(1);
3183
3184 MVT ContainerVT = VT;
3185 if (VT.isFixedLengthVector()) {
3186 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3187 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3188 }
3189
3190 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3191
3192 // Freeze the source since we are increasing the number of uses.
3193 Src = DAG.getFreeze(Src);
3194
3195 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3196 MVT MaskVT = Mask.getSimpleValueType();
3198 DAG.getVTList(MaskVT, MVT::Other),
3199 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3200 DAG.getUNDEF(MaskVT), Mask, VL});
3201 Chain = Unorder.getValue(1);
3203 DAG.getVTList(ContainerVT, MVT::Other),
3204 {Chain, Src, Src, Src, Unorder, VL});
3205 Chain = Src.getValue(1);
3206
3207 // We do the conversion on the absolute value and fix the sign at the end.
3208 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3209
3210 // Determine the largest integer that can be represented exactly. This and
3211 // values larger than it don't have any fractional bits so don't need to
3212 // be converted.
3213 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3214 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3215 APFloat MaxVal = APFloat(FltSem);
3216 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3217 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3218 SDValue MaxValNode =
3219 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3220 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3221 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3222
3223 // If abs(Src) was larger than MaxVal or nan, keep it.
3224 Mask = DAG.getNode(
3225 RISCVISD::SETCC_VL, DL, MaskVT,
3226 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3227
3228 // Truncate to integer and convert back to FP.
3229 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3230 MVT XLenVT = Subtarget.getXLenVT();
3231 SDValue Truncated;
3232
3233 switch (Op.getOpcode()) {
3234 default:
3235 llvm_unreachable("Unexpected opcode");
3236 case ISD::STRICT_FCEIL:
3237 case ISD::STRICT_FFLOOR:
3238 case ISD::STRICT_FROUND:
3242 Truncated = DAG.getNode(
3243 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3244 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3245 break;
3246 }
3247 case ISD::STRICT_FTRUNC:
3248 Truncated =
3250 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3251 break;
3254 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3255 Mask, VL);
3256 break;
3257 }
3258 Chain = Truncated.getValue(1);
3259
3260 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3261 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3262 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3263 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3264 Truncated, Mask, VL);
3265 Chain = Truncated.getValue(1);
3266 }
3267
3268 // Restore the original sign so that -0.0 is preserved.
3269 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3270 Src, Src, Mask, VL);
3271
3272 if (VT.isFixedLengthVector())
3273 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3274 return DAG.getMergeValues({Truncated, Chain}, DL);
3275}
3276
3277static SDValue
3279 const RISCVSubtarget &Subtarget) {
3280 MVT VT = Op.getSimpleValueType();
3281 if (VT.isVector())
3282 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3283
3284 if (DAG.shouldOptForSize())
3285 return SDValue();
3286
3287 SDLoc DL(Op);
3288 SDValue Src = Op.getOperand(0);
3289
3290 // Create an integer the size of the mantissa with the MSB set. This and all
3291 // values larger than it don't have any fractional bits so don't need to be
3292 // converted.
3293 const fltSemantics &FltSem = VT.getFltSemantics();
3294 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3295 APFloat MaxVal = APFloat(FltSem);
3296 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3297 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3298 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3299
3301 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3302 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3303}
3304
3305// Expand vector LRINT and LLRINT by converting to the integer domain.
3307 const RISCVSubtarget &Subtarget) {
3308 MVT VT = Op.getSimpleValueType();
3309 assert(VT.isVector() && "Unexpected type");
3310
3311 SDLoc DL(Op);
3312 SDValue Src = Op.getOperand(0);
3313 MVT ContainerVT = VT;
3314
3315 if (VT.isFixedLengthVector()) {
3316 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3317 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3318 }
3319
3320 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3321 SDValue Truncated = DAG.getNode(
3322 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3324 VL);
3325
3326 if (!VT.isFixedLengthVector())
3327 return Truncated;
3328
3329 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3330}
3331
3332static SDValue
3334 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3335 SDValue Offset, SDValue Mask, SDValue VL,
3337 if (Passthru.isUndef())
3339 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3340 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3341 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3342}
3343
3344static SDValue
3345getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3346 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3347 SDValue VL,
3349 if (Passthru.isUndef())
3351 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3352 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3353 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3354}
3355
3356static MVT getLMUL1VT(MVT VT) {
3358 "Unexpected vector MVT");
3362}
3363
3367 int64_t Addend;
3368};
3369
3370static std::optional<APInt> getExactInteger(const APFloat &APF,
3372 // We will use a SINT_TO_FP to materialize this constant so we should use a
3373 // signed APSInt here.
3374 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3375 // We use an arbitrary rounding mode here. If a floating-point is an exact
3376 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3377 // the rounding mode changes the output value, then it is not an exact
3378 // integer.
3380 bool IsExact;
3381 // If it is out of signed integer range, it will return an invalid operation.
3382 // If it is not an exact integer, IsExact is false.
3383 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3385 !IsExact)
3386 return std::nullopt;
3387 return ValInt.extractBits(BitWidth, 0);
3388}
3389
3390// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3391// to the (non-zero) step S and start value X. This can be then lowered as the
3392// RVV sequence (VID * S) + X, for example.
3393// The step S is represented as an integer numerator divided by a positive
3394// denominator. Note that the implementation currently only identifies
3395// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3396// cannot detect 2/3, for example.
3397// Note that this method will also match potentially unappealing index
3398// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3399// determine whether this is worth generating code for.
3400//
3401// EltSizeInBits is the size of the type that the sequence will be calculated
3402// in, i.e. SEW for build_vectors or XLEN for address calculations.
3403static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3404 unsigned EltSizeInBits) {
3405 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3406 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3407 return std::nullopt;
3408 bool IsInteger = Op.getValueType().isInteger();
3409
3410 std::optional<unsigned> SeqStepDenom;
3411 std::optional<APInt> SeqStepNum;
3412 std::optional<APInt> SeqAddend;
3413 std::optional<std::pair<APInt, unsigned>> PrevElt;
3414 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3415
3416 // First extract the ops into a list of constant integer values. This may not
3417 // be possible for floats if they're not all representable as integers.
3419 const unsigned OpSize = Op.getScalarValueSizeInBits();
3420 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3421 if (Elt.isUndef()) {
3422 Elts[Idx] = std::nullopt;
3423 continue;
3424 }
3425 if (IsInteger) {
3426 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3427 } else {
3428 auto ExactInteger =
3429 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3430 if (!ExactInteger)
3431 return std::nullopt;
3432 Elts[Idx] = *ExactInteger;
3433 }
3434 }
3435
3436 for (auto [Idx, Elt] : enumerate(Elts)) {
3437 // Assume undef elements match the sequence; we just have to be careful
3438 // when interpolating across them.
3439 if (!Elt)
3440 continue;
3441
3442 if (PrevElt) {
3443 // Calculate the step since the last non-undef element, and ensure
3444 // it's consistent across the entire sequence.
3445 unsigned IdxDiff = Idx - PrevElt->second;
3446 APInt ValDiff = *Elt - PrevElt->first;
3447
3448 // A zero-value value difference means that we're somewhere in the middle
3449 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3450 // step change before evaluating the sequence.
3451 if (ValDiff == 0)
3452 continue;
3453
3454 int64_t Remainder = ValDiff.srem(IdxDiff);
3455 // Normalize the step if it's greater than 1.
3456 if (Remainder != ValDiff.getSExtValue()) {
3457 // The difference must cleanly divide the element span.
3458 if (Remainder != 0)
3459 return std::nullopt;
3460 ValDiff = ValDiff.sdiv(IdxDiff);
3461 IdxDiff = 1;
3462 }
3463
3464 if (!SeqStepNum)
3465 SeqStepNum = ValDiff;
3466 else if (ValDiff != SeqStepNum)
3467 return std::nullopt;
3468
3469 if (!SeqStepDenom)
3470 SeqStepDenom = IdxDiff;
3471 else if (IdxDiff != *SeqStepDenom)
3472 return std::nullopt;
3473 }
3474
3475 // Record this non-undef element for later.
3476 if (!PrevElt || PrevElt->first != *Elt)
3477 PrevElt = std::make_pair(*Elt, Idx);
3478 }
3479
3480 // We need to have logged a step for this to count as a legal index sequence.
3481 if (!SeqStepNum || !SeqStepDenom)
3482 return std::nullopt;
3483
3484 // Loop back through the sequence and validate elements we might have skipped
3485 // while waiting for a valid step. While doing this, log any sequence addend.
3486 for (auto [Idx, Elt] : enumerate(Elts)) {
3487 if (!Elt)
3488 continue;
3489 APInt ExpectedVal =
3490 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3491 *SeqStepNum)
3492 .sdiv(*SeqStepDenom);
3493
3494 APInt Addend = *Elt - ExpectedVal;
3495 if (!SeqAddend)
3496 SeqAddend = Addend;
3497 else if (Addend != SeqAddend)
3498 return std::nullopt;
3499 }
3500
3501 assert(SeqAddend && "Must have an addend if we have a step");
3502
3503 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3504 SeqAddend->getSExtValue()};
3505}
3506
3507// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3508// and lower it as a VRGATHER_VX_VL from the source vector.
3509static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3510 SelectionDAG &DAG,
3511 const RISCVSubtarget &Subtarget) {
3512 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3513 return SDValue();
3514 SDValue Vec = SplatVal.getOperand(0);
3515 // Don't perform this optimization for i1 vectors, or if the element types are
3516 // different
3517 // FIXME: Support i1 vectors, maybe by promoting to i8?
3518 MVT EltTy = VT.getVectorElementType();
3519 if (EltTy == MVT::i1 ||
3521 return SDValue();
3522 SDValue Idx = SplatVal.getOperand(1);
3523 // The index must be a legal type.
3524 if (Idx.getValueType() != Subtarget.getXLenVT())
3525 return SDValue();
3526
3527 // Check that Index lies within VT
3528 // TODO: Can we check if the Index is constant and known in-bounds?
3530 return SDValue();
3531
3532 MVT ContainerVT = VT;
3533 if (VT.isFixedLengthVector())
3534 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3535
3536 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3537 DAG.getUNDEF(ContainerVT), Vec,
3538 DAG.getVectorIdxConstant(0, DL));
3539
3540 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3541
3542 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3543 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3544
3545 if (!VT.isFixedLengthVector())
3546 return Gather;
3547
3548 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3549}
3550
3551/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3552/// which constitute a large proportion of the elements. In such cases we can
3553/// splat a vector with the dominant element and make up the shortfall with
3554/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3555/// Note that this includes vectors of 2 elements by association. The
3556/// upper-most element is the "dominant" one, allowing us to use a splat to
3557/// "insert" the upper element, and an insert of the lower element at position
3558/// 0, which improves codegen.
3560 const RISCVSubtarget &Subtarget) {
3561 MVT VT = Op.getSimpleValueType();
3562 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3563
3564 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3565
3566 SDLoc DL(Op);
3567 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3568
3569 MVT XLenVT = Subtarget.getXLenVT();
3570 unsigned NumElts = Op.getNumOperands();
3571
3572 SDValue DominantValue;
3573 unsigned MostCommonCount = 0;
3574 DenseMap<SDValue, unsigned> ValueCounts;
3575 unsigned NumUndefElts =
3576 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3577
3578 // Track the number of scalar loads we know we'd be inserting, estimated as
3579 // any non-zero floating-point constant. Other kinds of element are either
3580 // already in registers or are materialized on demand. The threshold at which
3581 // a vector load is more desirable than several scalar materializion and
3582 // vector-insertion instructions is not known.
3583 unsigned NumScalarLoads = 0;
3584
3585 for (SDValue V : Op->op_values()) {
3586 if (V.isUndef())
3587 continue;
3588
3589 unsigned &Count = ValueCounts[V];
3590 if (0 == Count)
3591 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3592 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3593
3594 // Is this value dominant? In case of a tie, prefer the highest element as
3595 // it's cheaper to insert near the beginning of a vector than it is at the
3596 // end.
3597 if (++Count >= MostCommonCount) {
3598 DominantValue = V;
3599 MostCommonCount = Count;
3600 }
3601 }
3602
3603 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3604 unsigned NumDefElts = NumElts - NumUndefElts;
3605 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3606
3607 // Don't perform this optimization when optimizing for size, since
3608 // materializing elements and inserting them tends to cause code bloat.
3609 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3610 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3611 ((MostCommonCount > DominantValueCountThreshold) ||
3612 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3613 // Start by splatting the most common element.
3614 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3615
3616 DenseSet<SDValue> Processed{DominantValue};
3617
3618 // We can handle an insert into the last element (of a splat) via
3619 // v(f)slide1down. This is slightly better than the vslideup insert
3620 // lowering as it avoids the need for a vector group temporary. It
3621 // is also better than using vmerge.vx as it avoids the need to
3622 // materialize the mask in a vector register.
3623 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3624 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3625 LastOp != DominantValue) {
3626 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3627 auto OpCode =
3629 if (!VT.isFloatingPoint())
3630 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3631 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3632 LastOp, Mask, VL);
3633 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3634 Processed.insert(LastOp);
3635 }
3636
3637 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3638 for (const auto &OpIdx : enumerate(Op->ops())) {
3639 const SDValue &V = OpIdx.value();
3640 if (V.isUndef() || !Processed.insert(V).second)
3641 continue;
3642 if (ValueCounts[V] == 1) {
3643 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3644 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3645 } else {
3646 // Blend in all instances of this value using a VSELECT, using a
3647 // mask where each bit signals whether that element is the one
3648 // we're after.
3650 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3651 return DAG.getConstant(V == V1, DL, XLenVT);
3652 });
3653 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3654 DAG.getBuildVector(SelMaskTy, DL, Ops),
3655 DAG.getSplatBuildVector(VT, DL, V), Vec);
3656 }
3657 }
3658
3659 return Vec;
3660 }
3661
3662 return SDValue();
3663}
3664
3666 const RISCVSubtarget &Subtarget) {
3667 MVT VT = Op.getSimpleValueType();
3668 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3669
3670 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3671
3672 SDLoc DL(Op);
3673 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3674
3675 MVT XLenVT = Subtarget.getXLenVT();
3676 unsigned NumElts = Op.getNumOperands();
3677
3678 if (VT.getVectorElementType() == MVT::i1) {
3679 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3680 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3681 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3682 }
3683
3684 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3685 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3686 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3687 }
3688
3689 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3690 // scalar integer chunks whose bit-width depends on the number of mask
3691 // bits and XLEN.
3692 // First, determine the most appropriate scalar integer type to use. This
3693 // is at most XLenVT, but may be shrunk to a smaller vector element type
3694 // according to the size of the final vector - use i8 chunks rather than
3695 // XLenVT if we're producing a v8i1. This results in more consistent
3696 // codegen across RV32 and RV64.
3697 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3698 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3699 // If we have to use more than one INSERT_VECTOR_ELT then this
3700 // optimization is likely to increase code size; avoid peforming it in
3701 // such a case. We can use a load from a constant pool in this case.
3702 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3703 return SDValue();
3704 // Now we can create our integer vector type. Note that it may be larger
3705 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3706 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3707 MVT IntegerViaVecVT =
3708 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3709 IntegerViaVecElts);
3710
3711 uint64_t Bits = 0;
3712 unsigned BitPos = 0, IntegerEltIdx = 0;
3713 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3714
3715 for (unsigned I = 0; I < NumElts;) {
3716 SDValue V = Op.getOperand(I);
3717 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3718 Bits |= ((uint64_t)BitValue << BitPos);
3719 ++BitPos;
3720 ++I;
3721
3722 // Once we accumulate enough bits to fill our scalar type or process the
3723 // last element, insert into our vector and clear our accumulated data.
3724 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3725 if (NumViaIntegerBits <= 32)
3726 Bits = SignExtend64<32>(Bits);
3727 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3728 Elts[IntegerEltIdx] = Elt;
3729 Bits = 0;
3730 BitPos = 0;
3731 IntegerEltIdx++;
3732 }
3733 }
3734
3735 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3736
3737 if (NumElts < NumViaIntegerBits) {
3738 // If we're producing a smaller vector than our minimum legal integer
3739 // type, bitcast to the equivalent (known-legal) mask type, and extract
3740 // our final mask.
3741 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3742 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3743 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3744 DAG.getConstant(0, DL, XLenVT));
3745 } else {
3746 // Else we must have produced an integer type with the same size as the
3747 // mask type; bitcast for the final result.
3748 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3749 Vec = DAG.getBitcast(VT, Vec);
3750 }
3751
3752 return Vec;
3753 }
3754
3755 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3756 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3758 if (!VT.isFloatingPoint())
3759 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3760 Splat =
3761 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3762 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3763 }
3764
3765 // Try and match index sequences, which we can lower to the vid instruction
3766 // with optional modifications. An all-undef vector is matched by
3767 // getSplatValue, above.
3768 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3769 int64_t StepNumerator = SimpleVID->StepNumerator;
3770 unsigned StepDenominator = SimpleVID->StepDenominator;
3771 int64_t Addend = SimpleVID->Addend;
3772
3773 assert(StepNumerator != 0 && "Invalid step");
3774 bool Negate = false;
3775 int64_t SplatStepVal = StepNumerator;
3776 unsigned StepOpcode = ISD::MUL;
3777 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3778 // anyway as the shift of 63 won't fit in uimm5.
3779 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3780 isPowerOf2_64(std::abs(StepNumerator))) {
3781 Negate = StepNumerator < 0;
3782 StepOpcode = ISD::SHL;
3783 SplatStepVal = Log2_64(std::abs(StepNumerator));
3784 }
3785
3786 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3787 // threshold since it's the immediate value many RVV instructions accept.
3788 // There is no vmul.vi instruction so ensure multiply constant can fit in
3789 // a single addi instruction.
3790 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3791 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3792 isPowerOf2_32(StepDenominator) &&
3793 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3794 MVT VIDVT =
3796 MVT VIDContainerVT =
3797 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3798 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3799 // Convert right out of the scalable type so we can use standard ISD
3800 // nodes for the rest of the computation. If we used scalable types with
3801 // these, we'd lose the fixed-length vector info and generate worse
3802 // vsetvli code.
3803 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3804 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3805 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3806 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3807 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3808 }
3809 if (StepDenominator != 1) {
3810 SDValue SplatStep =
3811 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3812 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3813 }
3814 if (Addend != 0 || Negate) {
3815 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3816 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3817 VID);
3818 }
3819 if (VT.isFloatingPoint()) {
3820 // TODO: Use vfwcvt to reduce register pressure.
3821 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3822 }
3823 return VID;
3824 }
3825 }
3826
3827 // For very small build_vectors, use a single scalar insert of a constant.
3828 // TODO: Base this on constant rematerialization cost, not size.
3829 const unsigned EltBitSize = VT.getScalarSizeInBits();
3830 if (VT.getSizeInBits() <= 32 &&
3832 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3833 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3834 "Unexpected sequence type");
3835 // If we can use the original VL with the modified element type, this
3836 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3837 // be moved into InsertVSETVLI?
3838 unsigned ViaVecLen =
3839 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3840 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3841
3842 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3843 uint64_t SplatValue = 0;
3844 // Construct the amalgamated value at this larger vector type.
3845 for (const auto &OpIdx : enumerate(Op->op_values())) {
3846 const auto &SeqV = OpIdx.value();
3847 if (!SeqV.isUndef())
3848 SplatValue |=
3849 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3850 }
3851
3852 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3853 // achieve better constant materializion.
3854 // On RV32, we need to sign-extend to use getSignedConstant.
3855 if (ViaIntVT == MVT::i32)
3856 SplatValue = SignExtend64<32>(SplatValue);
3857
3858 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3859 DAG.getUNDEF(ViaVecVT),
3860 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3861 DAG.getVectorIdxConstant(0, DL));
3862 if (ViaVecLen != 1)
3864 MVT::getVectorVT(ViaIntVT, 1), Vec,
3865 DAG.getConstant(0, DL, XLenVT));
3866 return DAG.getBitcast(VT, Vec);
3867 }
3868
3869
3870 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3871 // when re-interpreted as a vector with a larger element type. For example,
3872 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3873 // could be instead splat as
3874 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3875 // TODO: This optimization could also work on non-constant splats, but it
3876 // would require bit-manipulation instructions to construct the splat value.
3877 SmallVector<SDValue> Sequence;
3878 const auto *BV = cast<BuildVectorSDNode>(Op);
3879 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3881 BV->getRepeatedSequence(Sequence) &&
3882 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3883 unsigned SeqLen = Sequence.size();
3884 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3885 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3886 ViaIntVT == MVT::i64) &&
3887 "Unexpected sequence type");
3888
3889 // If we can use the original VL with the modified element type, this
3890 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3891 // be moved into InsertVSETVLI?
3892 const unsigned RequiredVL = NumElts / SeqLen;
3893 const unsigned ViaVecLen =
3894 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3895 NumElts : RequiredVL;
3896 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3897
3898 unsigned EltIdx = 0;
3899 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3900 uint64_t SplatValue = 0;
3901 // Construct the amalgamated value which can be splatted as this larger
3902 // vector type.
3903 for (const auto &SeqV : Sequence) {
3904 if (!SeqV.isUndef())
3905 SplatValue |=
3906 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3907 EltIdx++;
3908 }
3909
3910 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3911 // achieve better constant materializion.
3912 // On RV32, we need to sign-extend to use getSignedConstant.
3913 if (ViaIntVT == MVT::i32)
3914 SplatValue = SignExtend64<32>(SplatValue);
3915
3916 // Since we can't introduce illegal i64 types at this stage, we can only
3917 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3918 // way we can use RVV instructions to splat.
3919 assert((ViaIntVT.bitsLE(XLenVT) ||
3920 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3921 "Unexpected bitcast sequence");
3922 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3923 SDValue ViaVL =
3924 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3925 MVT ViaContainerVT =
3926 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3927 SDValue Splat =
3928 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3929 DAG.getUNDEF(ViaContainerVT),
3930 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3931 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3932 if (ViaVecLen != RequiredVL)
3934 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3935 DAG.getConstant(0, DL, XLenVT));
3936 return DAG.getBitcast(VT, Splat);
3937 }
3938 }
3939
3940 // If the number of signbits allows, see if we can lower as a <N x i8>.
3941 // Our main goal here is to reduce LMUL (and thus work) required to
3942 // build the constant, but we will also narrow if the resulting
3943 // narrow vector is known to materialize cheaply.
3944 // TODO: We really should be costing the smaller vector. There are
3945 // profitable cases this misses.
3946 if (EltBitSize > 8 && VT.isInteger() &&
3947 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3948 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3949 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3950 DL, Op->ops());
3951 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3952 Source, DAG, Subtarget);
3953 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3954 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3955 }
3956
3957 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3958 return Res;
3959
3960 // For constant vectors, use generic constant pool lowering. Otherwise,
3961 // we'd have to materialize constants in GPRs just to move them into the
3962 // vector.
3963 return SDValue();
3964}
3965
3966static unsigned getPACKOpcode(unsigned DestBW,
3967 const RISCVSubtarget &Subtarget) {
3968 switch (DestBW) {
3969 default:
3970 llvm_unreachable("Unsupported pack size");
3971 case 16:
3972 return RISCV::PACKH;
3973 case 32:
3974 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3975 case 64:
3976 assert(Subtarget.is64Bit());
3977 return RISCV::PACK;
3978 }
3979}
3980
3981/// Double the element size of the build vector to reduce the number
3982/// of vslide1down in the build vector chain. In the worst case, this
3983/// trades three scalar operations for 1 vector operation. Scalar
3984/// operations are generally lower latency, and for out-of-order cores
3985/// we also benefit from additional parallelism.
3987 const RISCVSubtarget &Subtarget) {
3988 SDLoc DL(Op);
3989 MVT VT = Op.getSimpleValueType();
3990 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3991 MVT ElemVT = VT.getVectorElementType();
3992 if (!ElemVT.isInteger())
3993 return SDValue();
3994
3995 // TODO: Relax these architectural restrictions, possibly with costing
3996 // of the actual instructions required.
3997 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3998 return SDValue();
3999
4000 unsigned NumElts = VT.getVectorNumElements();
4001 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4002 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4003 NumElts % 2 != 0)
4004 return SDValue();
4005
4006 // Produce [B,A] packed into a type twice as wide. Note that all
4007 // scalars are XLenVT, possibly masked (see below).
4008 MVT XLenVT = Subtarget.getXLenVT();
4009 SDValue Mask = DAG.getConstant(
4010 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4011 auto pack = [&](SDValue A, SDValue B) {
4012 // Bias the scheduling of the inserted operations to near the
4013 // definition of the element - this tends to reduce register
4014 // pressure overall.
4015 SDLoc ElemDL(B);
4016 if (Subtarget.hasStdExtZbkb())
4017 // Note that we're relying on the high bits of the result being
4018 // don't care. For PACKW, the result is *sign* extended.
4019 return SDValue(
4020 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4021 ElemDL, XLenVT, A, B),
4022 0);
4023
4024 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4025 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4026 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4027 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4028 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4030 };
4031
4032 SmallVector<SDValue> NewOperands;
4033 NewOperands.reserve(NumElts / 2);
4034 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4035 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4036 assert(NumElts == NewOperands.size() * 2);
4037 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4038 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4039 return DAG.getNode(ISD::BITCAST, DL, VT,
4040 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4041}
4042
4044 const RISCVSubtarget &Subtarget) {
4045 MVT VT = Op.getSimpleValueType();
4046 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4047
4048 MVT EltVT = VT.getVectorElementType();
4049 MVT XLenVT = Subtarget.getXLenVT();
4050
4051 SDLoc DL(Op);
4052
4053 // Proper support for f16 requires Zvfh. bf16 always requires special
4054 // handling. We need to cast the scalar to integer and create an integer
4055 // build_vector.
4056 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4057 MVT IVT = VT.changeVectorElementType(MVT::i16);
4059 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4060 SDValue Elem = Op.getOperand(I);
4061 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4062 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4063 // Called by LegalizeDAG, we need to use XLenVT operations since we
4064 // can't create illegal types.
4065 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4066 // Manually constant fold so the integer build_vector can be lowered
4067 // better. Waiting for DAGCombine will be too late.
4068 APInt V =
4069 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4070 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4071 } else {
4072 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4073 }
4074 } else {
4075 // Called by scalar type legalizer, we can use i16.
4076 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4077 }
4078 }
4079 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4080 return DAG.getBitcast(VT, Res);
4081 }
4082
4083 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4085 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4086
4087 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4088
4089 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4090
4091 if (VT.getVectorElementType() == MVT::i1) {
4092 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4093 // vector type, we have a legal equivalently-sized i8 type, so we can use
4094 // that.
4095 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4096 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4097
4098 SDValue WideVec;
4099 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4100 // For a splat, perform a scalar truncate before creating the wider
4101 // vector.
4102 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4103 DAG.getConstant(1, DL, Splat.getValueType()));
4104 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4105 } else {
4106 SmallVector<SDValue, 8> Ops(Op->op_values());
4107 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4108 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4109 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4110 }
4111
4112 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4113 }
4114
4115 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4116 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4117 return Gather;
4118 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4120 if (!VT.isFloatingPoint())
4121 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4122 Splat =
4123 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4124 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4125 }
4126
4127 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4128 return Res;
4129
4130 // If we're compiling for an exact VLEN value, we can split our work per
4131 // register in the register group.
4132 if (const auto VLen = Subtarget.getRealVLen();
4133 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4134 MVT ElemVT = VT.getVectorElementType();
4135 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4136 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4137 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4138 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4139 assert(M1VT == getLMUL1VT(M1VT));
4140
4141 // The following semantically builds up a fixed length concat_vector
4142 // of the component build_vectors. We eagerly lower to scalable and
4143 // insert_subvector here to avoid DAG combining it back to a large
4144 // build_vector.
4145 SmallVector<SDValue> BuildVectorOps(Op->ops());
4146 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4147 SDValue Vec = DAG.getUNDEF(ContainerVT);
4148 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4149 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4150 SDValue SubBV =
4151 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4152 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4153 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4154 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4155 DAG.getVectorIdxConstant(InsertIdx, DL));
4156 }
4157 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4158 }
4159
4160 // If we're about to resort to vslide1down (or stack usage), pack our
4161 // elements into the widest scalar type we can. This will force a VL/VTYPE
4162 // toggle, but reduces the critical path, the number of vslide1down ops
4163 // required, and possibly enables scalar folds of the values.
4164 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4165 return Res;
4166
4167 // For m1 vectors, if we have non-undef values in both halves of our vector,
4168 // split the vector into low and high halves, build them separately, then
4169 // use a vselect to combine them. For long vectors, this cuts the critical
4170 // path of the vslide1down sequence in half, and gives us an opportunity
4171 // to special case each half independently. Note that we don't change the
4172 // length of the sub-vectors here, so if both fallback to the generic
4173 // vslide1down path, we should be able to fold the vselect into the final
4174 // vslidedown (for the undef tail) for the first half w/ masking.
4175 unsigned NumElts = VT.getVectorNumElements();
4176 unsigned NumUndefElts =
4177 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4178 unsigned NumDefElts = NumElts - NumUndefElts;
4179 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4180 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4181 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4182 SmallVector<SDValue> MaskVals;
4183 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4184 SubVecAOps.reserve(NumElts);
4185 SubVecBOps.reserve(NumElts);
4186 for (unsigned i = 0; i < NumElts; i++) {
4187 SDValue Elem = Op->getOperand(i);
4188 if (i < NumElts / 2) {
4189 SubVecAOps.push_back(Elem);
4190 SubVecBOps.push_back(UndefElem);
4191 } else {
4192 SubVecAOps.push_back(UndefElem);
4193 SubVecBOps.push_back(Elem);
4194 }
4195 bool SelectMaskVal = (i < NumElts / 2);
4196 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4197 }
4198 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4199 MaskVals.size() == NumElts);
4200
4201 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4202 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4203 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4204 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4205 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4206 }
4207
4208 // Cap the cost at a value linear to the number of elements in the vector.
4209 // The default lowering is to use the stack. The vector store + scalar loads
4210 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4211 // being (at least) linear in LMUL. As a result, using the vslidedown
4212 // lowering for every element ends up being VL*LMUL..
4213 // TODO: Should we be directly costing the stack alternative? Doing so might
4214 // give us a more accurate upper bound.
4215 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4216
4217 // TODO: unify with TTI getSlideCost.
4218 InstructionCost PerSlideCost = 1;
4219 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4220 default: break;
4222 PerSlideCost = 2;
4223 break;
4225 PerSlideCost = 4;
4226 break;
4228 PerSlideCost = 8;
4229 break;
4230 }
4231
4232 // TODO: Should we be using the build instseq then cost + evaluate scheme
4233 // we use for integer constants here?
4234 unsigned UndefCount = 0;
4235 for (const SDValue &V : Op->ops()) {
4236 if (V.isUndef()) {
4237 UndefCount++;
4238 continue;
4239 }
4240 if (UndefCount) {
4241 LinearBudget -= PerSlideCost;
4242 UndefCount = 0;
4243 }
4244 LinearBudget -= PerSlideCost;
4245 }
4246 if (UndefCount) {
4247 LinearBudget -= PerSlideCost;
4248 }
4249
4250 if (LinearBudget < 0)
4251 return SDValue();
4252
4253 assert((!VT.isFloatingPoint() ||
4254 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4255 "Illegal type which will result in reserved encoding");
4256
4257 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4258
4259 SDValue Vec;
4260 UndefCount = 0;
4261 for (SDValue V : Op->ops()) {
4262 if (V.isUndef()) {
4263 UndefCount++;
4264 continue;
4265 }
4266
4267 // Start our sequence with a TA splat in the hopes that hardware is able to
4268 // recognize there's no dependency on the prior value of our temporary
4269 // register.
4270 if (!Vec) {
4271 Vec = DAG.getSplatVector(VT, DL, V);
4272 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4273 UndefCount = 0;
4274 continue;
4275 }
4276
4277 if (UndefCount) {
4278 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4279 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4280 Vec, Offset, Mask, VL, Policy);
4281 UndefCount = 0;
4282 }
4283 auto OpCode =
4285 if (!VT.isFloatingPoint())
4286 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4287 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4288 V, Mask, VL);
4289 }
4290 if (UndefCount) {
4291 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4292 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4293 Vec, Offset, Mask, VL, Policy);
4294 }
4295 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4296}
4297
4298static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4300 SelectionDAG &DAG) {
4301 if (!Passthru)
4302 Passthru = DAG.getUNDEF(VT);
4303 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4304 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4305 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4306 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4307 // node in order to try and match RVV vector/scalar instructions.
4308 if ((LoC >> 31) == HiC)
4309 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4310
4311 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4312 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4313 // vlmax vsetvli or vsetivli to change the VL.
4314 // FIXME: Support larger constants?
4315 // FIXME: Support non-constant VLs by saturating?
4316 if (LoC == HiC) {
4317 SDValue NewVL;
4318 if (isAllOnesConstant(VL) ||
4319 (isa<RegisterSDNode>(VL) &&
4320 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4321 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4322 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4323 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4324
4325 if (NewVL) {
4326 MVT InterVT =
4327 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4328 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4329 DAG.getUNDEF(InterVT), Lo, NewVL);
4330 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4331 }
4332 }
4333 }
4334
4335 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4336 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4337 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4338 Hi.getConstantOperandVal(1) == 31)
4339 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4340
4341 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4342 // even if it might be sign extended.
4343 if (Hi.isUndef())
4344 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4345
4346 // Fall back to a stack store and stride x0 vector load.
4347 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4348 Hi, VL);
4349}
4350
4351// Called by type legalization to handle splat of i64 on RV32.
4352// FIXME: We can optimize this when the type has sign or zero bits in one
4353// of the halves.
4354static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4355 SDValue Scalar, SDValue VL,
4356 SelectionDAG &DAG) {
4357 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4358 SDValue Lo, Hi;
4359 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4360 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4361}
4362
4363// This function lowers a splat of a scalar operand Splat with the vector
4364// length VL. It ensures the final sequence is type legal, which is useful when
4365// lowering a splat after type legalization.
4366static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4367 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4368 const RISCVSubtarget &Subtarget) {
4369 bool HasPassthru = Passthru && !Passthru.isUndef();
4370 if (!HasPassthru && !Passthru)
4371 Passthru = DAG.getUNDEF(VT);
4372
4373 MVT EltVT = VT.getVectorElementType();
4374 MVT XLenVT = Subtarget.getXLenVT();
4375
4376 if (VT.isFloatingPoint()) {
4377 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4378 EltVT == MVT::bf16) {
4379 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4380 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4381 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4382 else
4383 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4384 MVT IVT = VT.changeVectorElementType(MVT::i16);
4385 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4386 SDValue Splat =
4387 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4388 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4389 }
4390 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4391 }
4392
4393 // Simplest case is that the operand needs to be promoted to XLenVT.
4394 if (Scalar.getValueType().bitsLE(XLenVT)) {
4395 // If the operand is a constant, sign extend to increase our chances
4396 // of being able to use a .vi instruction. ANY_EXTEND would become a
4397 // a zero extend and the simm5 check in isel would fail.
4398 // FIXME: Should we ignore the upper bits in isel instead?
4399 unsigned ExtOpc =
4400 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4401 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4402 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4403 }
4404
4405 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4406 "Unexpected scalar for splat lowering!");
4407
4408 if (isOneConstant(VL) && isNullConstant(Scalar))
4409 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4410 DAG.getConstant(0, DL, XLenVT), VL);
4411
4412 // Otherwise use the more complicated splatting algorithm.
4413 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4414}
4415
4416// This function lowers an insert of a scalar operand Scalar into lane
4417// 0 of the vector regardless of the value of VL. The contents of the
4418// remaining lanes of the result vector are unspecified. VL is assumed
4419// to be non-zero.
4421 const SDLoc &DL, SelectionDAG &DAG,
4422 const RISCVSubtarget &Subtarget) {
4423 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4424
4425 const MVT XLenVT = Subtarget.getXLenVT();
4426 SDValue Passthru = DAG.getUNDEF(VT);
4427
4428 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4429 isNullConstant(Scalar.getOperand(1))) {
4430 SDValue ExtractedVal = Scalar.getOperand(0);
4431 // The element types must be the same.
4432 if (ExtractedVal.getValueType().getVectorElementType() ==
4433 VT.getVectorElementType()) {
4434 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4435 MVT ExtractedContainerVT = ExtractedVT;
4436 if (ExtractedContainerVT.isFixedLengthVector()) {
4437 ExtractedContainerVT = getContainerForFixedLengthVector(
4438 DAG, ExtractedContainerVT, Subtarget);
4439 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4440 ExtractedVal, DAG, Subtarget);
4441 }
4442 if (ExtractedContainerVT.bitsLE(VT))
4443 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4444 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4445 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4446 DAG.getVectorIdxConstant(0, DL));
4447 }
4448 }
4449
4450
4451 if (VT.isFloatingPoint())
4452 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4453 DAG.getUNDEF(VT), Scalar, VL);
4454
4455 // Avoid the tricky legalization cases by falling back to using the
4456 // splat code which already handles it gracefully.
4457 if (!Scalar.getValueType().bitsLE(XLenVT))
4458 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4459 DAG.getConstant(1, DL, XLenVT),
4460 VT, DL, DAG, Subtarget);
4461
4462 // If the operand is a constant, sign extend to increase our chances
4463 // of being able to use a .vi instruction. ANY_EXTEND would become a
4464 // a zero extend and the simm5 check in isel would fail.
4465 // FIXME: Should we ignore the upper bits in isel instead?
4466 unsigned ExtOpc =
4467 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4468 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4469 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4470 VL);
4471}
4472
4473// Can this shuffle be performed on exactly one (possibly larger) input?
4474static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4475 SDValue V2) {
4476
4477 if (V2.isUndef() &&
4479 return V1;
4480
4481 // Both input must be extracts.
4482 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4483 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4484 return SDValue();
4485
4486 // Extracting from the same source.
4487 SDValue Src = V1.getOperand(0);
4488 if (Src != V2.getOperand(0))
4489 return SDValue();
4490
4491 // Src needs to have twice the number of elements.
4492 unsigned NumElts = VT.getVectorNumElements();
4493 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4494 return SDValue();
4495
4496 // The extracts must extract the two halves of the source.
4497 if (V1.getConstantOperandVal(1) != 0 ||
4498 V2.getConstantOperandVal(1) != NumElts)
4499 return SDValue();
4500
4501 return Src;
4502}
4503
4504/// Is this shuffle interleaving contiguous elements from one vector into the
4505/// even elements and contiguous elements from another vector into the odd
4506/// elements. \p EvenSrc will contain the element that should be in the first
4507/// even element. \p OddSrc will contain the element that should be in the first
4508/// odd element. These can be the first element in a source or the element half
4509/// way through the source.
4510static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4511 int &OddSrc, const RISCVSubtarget &Subtarget) {
4512 // We need to be able to widen elements to the next larger integer type.
4513 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4514 return false;
4515
4516 int Size = Mask.size();
4517 int NumElts = VT.getVectorNumElements();
4518 assert(Size == (int)NumElts && "Unexpected mask size");
4519
4520 SmallVector<unsigned, 2> StartIndexes;
4521 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4522 return false;
4523
4524 EvenSrc = StartIndexes[0];
4525 OddSrc = StartIndexes[1];
4526
4527 // One source should be low half of first vector.
4528 if (EvenSrc != 0 && OddSrc != 0)
4529 return false;
4530
4531 // Subvectors will be subtracted from either at the start of the two input
4532 // vectors, or at the start and middle of the first vector if it's an unary
4533 // interleave.
4534 // In both cases, HalfNumElts will be extracted.
4535 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4536 // we'll create an illegal extract_subvector.
4537 // FIXME: We could support other values using a slidedown first.
4538 int HalfNumElts = NumElts / 2;
4539 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4540}
4541
4542/// Match shuffles that concatenate two vectors, rotate the concatenation,
4543/// and then extract the original number of elements from the rotated result.
4544/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4545/// returned rotation amount is for a rotate right, where elements move from
4546/// higher elements to lower elements. \p LoSrc indicates the first source
4547/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4548/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4549/// 0 or 1 if a rotation is found.
4550///
4551/// NOTE: We talk about rotate to the right which matches how bit shift and
4552/// rotate instructions are described where LSBs are on the right, but LLVM IR
4553/// and the table below write vectors with the lowest elements on the left.
4554static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4555 int Size = Mask.size();
4556
4557 // We need to detect various ways of spelling a rotation:
4558 // [11, 12, 13, 14, 15, 0, 1, 2]
4559 // [-1, 12, 13, 14, -1, -1, 1, -1]
4560 // [-1, -1, -1, -1, -1, -1, 1, 2]
4561 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4562 // [-1, 4, 5, 6, -1, -1, 9, -1]
4563 // [-1, 4, 5, 6, -1, -1, -1, -1]
4564 int Rotation = 0;
4565 LoSrc = -1;
4566 HiSrc = -1;
4567 for (int i = 0; i != Size; ++i) {
4568 int M = Mask[i];
4569 if (M < 0)
4570 continue;
4571
4572 // Determine where a rotate vector would have started.
4573 int StartIdx = i - (M % Size);
4574 // The identity rotation isn't interesting, stop.
4575 if (StartIdx == 0)
4576 return -1;
4577
4578 // If we found the tail of a vector the rotation must be the missing
4579 // front. If we found the head of a vector, it must be how much of the
4580 // head.
4581 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4582
4583 if (Rotation == 0)
4584 Rotation = CandidateRotation;
4585 else if (Rotation != CandidateRotation)
4586 // The rotations don't match, so we can't match this mask.
4587 return -1;
4588
4589 // Compute which value this mask is pointing at.
4590 int MaskSrc = M < Size ? 0 : 1;
4591
4592 // Compute which of the two target values this index should be assigned to.
4593 // This reflects whether the high elements are remaining or the low elemnts
4594 // are remaining.
4595 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4596
4597 // Either set up this value if we've not encountered it before, or check
4598 // that it remains consistent.
4599 if (TargetSrc < 0)
4600 TargetSrc = MaskSrc;
4601 else if (TargetSrc != MaskSrc)
4602 // This may be a rotation, but it pulls from the inputs in some
4603 // unsupported interleaving.
4604 return -1;
4605 }
4606
4607 // Check that we successfully analyzed the mask, and normalize the results.
4608 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4609 assert((LoSrc >= 0 || HiSrc >= 0) &&
4610 "Failed to find a rotated input vector!");
4611
4612 return Rotation;
4613}
4614
4615// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4616// 2, 4, 8 and the integer type Factor-times larger than VT's
4617// element type must be a legal element type.
4618// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4619// -> [p, q, r, s] (Factor=2, Index=1)
4621 SDValue Src, unsigned Factor,
4622 unsigned Index, SelectionDAG &DAG) {
4623 unsigned EltBits = VT.getScalarSizeInBits();
4624 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4625 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4626 SrcEC.divideCoefficientBy(Factor));
4627 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4628 SrcEC.divideCoefficientBy(Factor));
4629 Src = DAG.getBitcast(WideSrcVT, Src);
4630
4631 unsigned Shift = Index * EltBits;
4632 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4633 DAG.getConstant(Shift, DL, WideSrcVT));
4634 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4636 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4637 DAG.getVectorIdxConstant(0, DL));
4638 return DAG.getBitcast(VT, Res);
4639}
4640
4641// Lower the following shuffle to vslidedown.
4642// a)
4643// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4644// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4645// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4646// b)
4647// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4648// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4649// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4650// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4651// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4652// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4654 SDValue V1, SDValue V2,
4655 ArrayRef<int> Mask,
4656 const RISCVSubtarget &Subtarget,
4657 SelectionDAG &DAG) {
4658 auto findNonEXTRACT_SUBVECTORParent =
4659 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4660 uint64_t Offset = 0;
4661 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4662 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4663 // a scalable vector. But we don't want to match the case.
4664 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4665 Offset += Parent.getConstantOperandVal(1);
4666 Parent = Parent.getOperand(0);
4667 }
4668 return std::make_pair(Parent, Offset);
4669 };
4670
4671 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4672 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4673
4674 // Extracting from the same source.
4675 SDValue Src = V1Src;
4676 if (Src != V2Src)
4677 return SDValue();
4678
4679 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4680 SmallVector<int, 16> NewMask(Mask);
4681 for (size_t i = 0; i != NewMask.size(); ++i) {
4682 if (NewMask[i] == -1)
4683 continue;
4684
4685 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4686 NewMask[i] = NewMask[i] + V1IndexOffset;
4687 } else {
4688 // Minus NewMask.size() is needed. Otherwise, the b case would be
4689 // <5,6,7,12> instead of <5,6,7,8>.
4690 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4691 }
4692 }
4693
4694 // First index must be known and non-zero. It will be used as the slidedown
4695 // amount.
4696 if (NewMask[0] <= 0)
4697 return SDValue();
4698
4699 // NewMask is also continuous.
4700 for (unsigned i = 1; i != NewMask.size(); ++i)
4701 if (NewMask[i - 1] + 1 != NewMask[i])
4702 return SDValue();
4703
4704 MVT XLenVT = Subtarget.getXLenVT();
4705 MVT SrcVT = Src.getSimpleValueType();
4706 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4707 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4708 SDValue Slidedown =
4709 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4710 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4711 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4712 return DAG.getNode(
4714 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4715 DAG.getConstant(0, DL, XLenVT));
4716}
4717
4718// Because vslideup leaves the destination elements at the start intact, we can
4719// use it to perform shuffles that insert subvectors:
4720//
4721// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4722// ->
4723// vsetvli zero, 8, e8, mf2, ta, ma
4724// vslideup.vi v8, v9, 4
4725//
4726// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4727// ->
4728// vsetvli zero, 5, e8, mf2, tu, ma
4729// vslideup.v1 v8, v9, 2
4731 SDValue V1, SDValue V2,
4732 ArrayRef<int> Mask,
4733 const RISCVSubtarget &Subtarget,
4734 SelectionDAG &DAG) {
4735 unsigned NumElts = VT.getVectorNumElements();
4736 int NumSubElts, Index;
4737 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4738 Index))
4739 return SDValue();
4740
4741 bool OpsSwapped = Mask[Index] < (int)NumElts;
4742 SDValue InPlace = OpsSwapped ? V2 : V1;
4743 SDValue ToInsert = OpsSwapped ? V1 : V2;
4744
4745 MVT XLenVT = Subtarget.getXLenVT();
4746 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4747 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4748 // We slide up by the index that the subvector is being inserted at, and set
4749 // VL to the index + the number of elements being inserted.
4751 // If the we're adding a suffix to the in place vector, i.e. inserting right
4752 // up to the very end of it, then we don't actually care about the tail.
4753 if (NumSubElts + Index >= (int)NumElts)
4754 Policy |= RISCVII::TAIL_AGNOSTIC;
4755
4756 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4757 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4758 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4759
4760 SDValue Res;
4761 // If we're inserting into the lowest elements, use a tail undisturbed
4762 // vmv.v.v.
4763 if (Index == 0)
4764 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4765 VL);
4766 else
4767 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4768 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4769 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4770}
4771
4772/// Match v(f)slide1up/down idioms. These operations involve sliding
4773/// N-1 elements to make room for an inserted scalar at one end.
4775 SDValue V1, SDValue V2,
4776 ArrayRef<int> Mask,
4777 const RISCVSubtarget &Subtarget,
4778 SelectionDAG &DAG) {
4779 bool OpsSwapped = false;
4780 if (!isa<BuildVectorSDNode>(V1)) {
4781 if (!isa<BuildVectorSDNode>(V2))
4782 return SDValue();
4783 std::swap(V1, V2);
4784 OpsSwapped = true;
4785 }
4786 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4787 if (!Splat)
4788 return SDValue();
4789
4790 // Return true if the mask could describe a slide of Mask.size() - 1
4791 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4792 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4793 const unsigned S = (Offset > 0) ? 0 : -Offset;
4794 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4795 for (unsigned i = S; i != E; ++i)
4796 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4797 return false;
4798 return true;
4799 };
4800
4801 const unsigned NumElts = VT.getVectorNumElements();
4802 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4803 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4804 return SDValue();
4805
4806 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4807 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4808 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4809 return SDValue();
4810
4811 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4812 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4813
4814 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4815 // vslide1{down,up}.vx instead.
4816 if (VT.getVectorElementType() == MVT::bf16 ||
4817 (VT.getVectorElementType() == MVT::f16 &&
4818 !Subtarget.hasVInstructionsF16())) {
4819 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4820 Splat =
4821 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4822 V2 = DAG.getBitcast(
4823 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4824 SDValue Vec = DAG.getNode(
4826 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4827 Vec = DAG.getBitcast(ContainerVT, Vec);
4828 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4829 }
4830
4831 auto OpCode = IsVSlidedown ?
4834 if (!VT.isFloatingPoint())
4835 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4836 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4837 DAG.getUNDEF(ContainerVT),
4838 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4839 Splat, TrueMask, VL);
4840 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4841}
4842
4843// Match a mask which "spreads" the leading elements of a vector evenly
4844// across the result. Factor is the spread amount, and Index is the
4845// offset applied. (on success, Index < Factor) This is the inverse
4846// of a deinterleave with the same Factor and Index. This is analogous
4847// to an interleave, except that all but one lane is undef.
4848static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4849 SmallVector<bool> LaneIsUndef(Factor, true);
4850 for (unsigned i = 0; i < Mask.size(); i++)
4851 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4852
4853 bool Found = false;
4854 for (unsigned i = 0; i < Factor; i++) {
4855 if (LaneIsUndef[i])
4856 continue;
4857 if (Found)
4858 return false;
4859 Index = i;
4860 Found = true;
4861 }
4862 if (!Found)
4863 return false;
4864
4865 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4866 unsigned j = i * Factor + Index;
4867 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4868 return false;
4869 }
4870 return true;
4871}
4872
4873// Given a vector a, b, c, d return a vector Factor times longer
4874// with Factor-1 undef's between elements. Ex:
4875// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4876// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4877static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4878 const SDLoc &DL, SelectionDAG &DAG) {
4879
4880 MVT VT = V.getSimpleValueType();
4881 unsigned EltBits = VT.getScalarSizeInBits();
4883 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4884
4885 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4886
4887 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4888 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4889 // allow the SHL to fold away if Index is 0.
4890 if (Index != 0)
4891 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4892 DAG.getConstant(EltBits * Index, DL, WideVT));
4893 // Make sure to use original element type
4895 EC.multiplyCoefficientBy(Factor));
4896 return DAG.getBitcast(ResultVT, Result);
4897}
4898
4899// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4900// to create an interleaved vector of <[vscale x] n*2 x ty>.
4901// This requires that the size of ty is less than the subtarget's maximum ELEN.
4903 const SDLoc &DL, SelectionDAG &DAG,
4904 const RISCVSubtarget &Subtarget) {
4905
4906 // FIXME: Not only does this optimize the code, it fixes some correctness
4907 // issues because MIR does not have freeze.
4908 if (EvenV.isUndef())
4909 return getWideningSpread(OddV, 2, 1, DL, DAG);
4910 if (OddV.isUndef())
4911 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4912
4913 MVT VecVT = EvenV.getSimpleValueType();
4914 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4915 // Convert fixed vectors to scalable if needed
4916 if (VecContainerVT.isFixedLengthVector()) {
4917 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4918 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4919 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4920 }
4921
4922 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4923
4924 // We're working with a vector of the same size as the resulting
4925 // interleaved vector, but with half the number of elements and
4926 // twice the SEW (Hence the restriction on not using the maximum
4927 // ELEN)
4928 MVT WideVT =
4930 VecVT.getVectorElementCount());
4931 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4932 if (WideContainerVT.isFixedLengthVector())
4933 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4934
4935 // Bitcast the input vectors to integers in case they are FP
4936 VecContainerVT = VecContainerVT.changeTypeToInteger();
4937 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4938 OddV = DAG.getBitcast(VecContainerVT, OddV);
4939
4940 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4941 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4942
4943 SDValue Interleaved;
4944 if (Subtarget.hasStdExtZvbb()) {
4945 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4946 SDValue OffsetVec =
4947 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4948 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4949 OffsetVec, Passthru, Mask, VL);
4950 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4951 Interleaved, EvenV, Passthru, Mask, VL);
4952 } else {
4953 // FIXME: We should freeze the odd vector here. We already handled the case
4954 // of provably undef/poison above.
4955
4956 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4957 // vwaddu.vv
4958 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4959 OddV, Passthru, Mask, VL);
4960
4961 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4962 SDValue AllOnesVec = DAG.getSplatVector(
4963 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4964 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4965 OddV, AllOnesVec, Passthru, Mask, VL);
4966
4967 // Add the two together so we get
4968 // (OddV * 0xff...ff) + (OddV + EvenV)
4969 // = (OddV * 0x100...00) + EvenV
4970 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4971 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4972 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4973 Interleaved, OddsMul, Passthru, Mask, VL);
4974 }
4975
4976 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4977 MVT ResultContainerVT = MVT::getVectorVT(
4978 VecVT.getVectorElementType(), // Make sure to use original type
4979 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4980 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4981
4982 // Convert back to a fixed vector if needed
4983 MVT ResultVT =
4986 if (ResultVT.isFixedLengthVector())
4987 Interleaved =
4988 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4989
4990 return Interleaved;
4991}
4992
4993// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4994// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4996 SelectionDAG &DAG,
4997 const RISCVSubtarget &Subtarget) {
4998 SDLoc DL(SVN);
4999 MVT VT = SVN->getSimpleValueType(0);
5000 SDValue V = SVN->getOperand(0);
5001 unsigned NumElts = VT.getVectorNumElements();
5002
5003 assert(VT.getVectorElementType() == MVT::i1);
5004
5006 SVN->getMask().size()) ||
5007 !SVN->getOperand(1).isUndef())
5008 return SDValue();
5009
5010 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5011 EVT ViaVT = EVT::getVectorVT(
5012 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5013 EVT ViaBitVT =
5014 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5015
5016 // If we don't have zvbb or the larger element type > ELEN, the operation will
5017 // be illegal.
5019 ViaVT) ||
5020 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5021 return SDValue();
5022
5023 // If the bit vector doesn't fit exactly into the larger element type, we need
5024 // to insert it into the larger vector and then shift up the reversed bits
5025 // afterwards to get rid of the gap introduced.
5026 if (ViaEltSize > NumElts)
5027 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5028 V, DAG.getVectorIdxConstant(0, DL));
5029
5030 SDValue Res =
5031 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5032
5033 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5034 // element type.
5035 if (ViaEltSize > NumElts)
5036 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5037 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5038
5039 Res = DAG.getBitcast(ViaBitVT, Res);
5040
5041 if (ViaEltSize > NumElts)
5042 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5043 DAG.getVectorIdxConstant(0, DL));
5044 return Res;
5045}
5046
5048 SelectionDAG &DAG,
5049 const RISCVSubtarget &Subtarget,
5050 MVT &RotateVT, unsigned &RotateAmt) {
5051 SDLoc DL(SVN);
5052
5053 EVT VT = SVN->getValueType(0);
5054 unsigned NumElts = VT.getVectorNumElements();
5055 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5056 unsigned NumSubElts;
5057 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5058 NumElts, NumSubElts, RotateAmt))
5059 return false;
5060 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5061 NumElts / NumSubElts);
5062
5063 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5064 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5065}
5066
5067// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5068// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5069// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5071 SelectionDAG &DAG,
5072 const RISCVSubtarget &Subtarget) {
5073 SDLoc DL(SVN);
5074
5075 EVT VT = SVN->getValueType(0);
5076 unsigned RotateAmt;
5077 MVT RotateVT;
5078 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5079 return SDValue();
5080
5081 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5082
5083 SDValue Rotate;
5084 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5085 // so canonicalize to vrev8.
5086 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5087 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5088 else
5089 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5090 DAG.getConstant(RotateAmt, DL, RotateVT));
5091
5092 return DAG.getBitcast(VT, Rotate);
5093}
5094
5095// If compiling with an exactly known VLEN, see if we can split a
5096// shuffle on m2 or larger into a small number of m1 sized shuffles
5097// which write each destination registers exactly once.
5099 SelectionDAG &DAG,
5100 const RISCVSubtarget &Subtarget) {
5101 SDLoc DL(SVN);
5102 MVT VT = SVN->getSimpleValueType(0);
5103 SDValue V1 = SVN->getOperand(0);
5104 SDValue V2 = SVN->getOperand(1);
5105 ArrayRef<int> Mask = SVN->getMask();
5106 unsigned NumElts = VT.getVectorNumElements();
5107
5108 // If we don't know exact data layout, not much we can do. If this
5109 // is already m1 or smaller, no point in splitting further.
5110 const auto VLen = Subtarget.getRealVLen();
5111 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5112 return SDValue();
5113
5114 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5115 // expansion for.
5116 unsigned RotateAmt;
5117 MVT RotateVT;
5118 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5119 return SDValue();
5120
5121 MVT ElemVT = VT.getVectorElementType();
5122 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5123 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5124
5126 OutMasks(VRegsPerSrc, {-1, {}});
5127
5128 // Check if our mask can be done as a 1-to-1 mapping from source
5129 // to destination registers in the group without needing to
5130 // write each destination more than once.
5131 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5132 int DstVecIdx = DstIdx / ElemsPerVReg;
5133 int DstSubIdx = DstIdx % ElemsPerVReg;
5134 int SrcIdx = Mask[DstIdx];
5135 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5136 continue;
5137 int SrcVecIdx = SrcIdx / ElemsPerVReg;
5138 int SrcSubIdx = SrcIdx % ElemsPerVReg;
5139 if (OutMasks[DstVecIdx].first == -1)
5140 OutMasks[DstVecIdx].first = SrcVecIdx;
5141 if (OutMasks[DstVecIdx].first != SrcVecIdx)
5142 // Note: This case could easily be handled by keeping track of a chain
5143 // of source values and generating two element shuffles below. This is
5144 // less an implementation question, and more a profitability one.
5145 return SDValue();
5146
5147 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5148 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5149 }
5150
5151 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5152 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5153 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5154 assert(M1VT == getLMUL1VT(M1VT));
5155 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5156 SDValue Vec = DAG.getUNDEF(ContainerVT);
5157 // The following semantically builds up a fixed length concat_vector
5158 // of the component shuffle_vectors. We eagerly lower to scalable here
5159 // to avoid DAG combining it back to a large shuffle_vector again.
5160 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5161 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5162 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5163 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5164 if (SrcVecIdx == -1)
5165 continue;
5166 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5167 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5168 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5169 DAG.getVectorIdxConstant(ExtractIdx, DL));
5170 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5171 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5172 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5173 unsigned InsertIdx = DstVecIdx * NumOpElts;
5174 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5175 DAG.getVectorIdxConstant(InsertIdx, DL));
5176 }
5177 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5178}
5179
5180// Matches a subset of compress masks with a contiguous prefix of output
5181// elements. This could be extended to allow gaps by deciding which
5182// source elements to spuriously demand.
5184 int Last = -1;
5185 bool SawUndef = false;
5186 for (unsigned i = 0; i < Mask.size(); i++) {
5187 if (Mask[i] == -1) {
5188 SawUndef = true;
5189 continue;
5190 }
5191 if (SawUndef)
5192 return false;
5193 if (i > (unsigned)Mask[i])
5194 return false;
5195 if (Mask[i] <= Last)
5196 return false;
5197 Last = Mask[i];
5198 }
5199 return true;
5200}
5201
5202/// Given a shuffle where the indices are disjoint between the two sources,
5203/// e.g.:
5204///
5205/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5206///
5207/// Merge the two sources into one and do a single source shuffle:
5208///
5209/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5210/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5211///
5212/// A vselect will either be merged into a masked instruction or be lowered as a
5213/// vmerge.vvm, which is cheaper than a vrgather.vv.
5215 SelectionDAG &DAG,
5216 const RISCVSubtarget &Subtarget) {
5217 MVT VT = SVN->getSimpleValueType(0);
5218 MVT XLenVT = Subtarget.getXLenVT();
5219 SDLoc DL(SVN);
5220
5221 const ArrayRef<int> Mask = SVN->getMask();
5222
5223 // Work out which source each lane will come from.
5224 SmallVector<int, 16> Srcs(Mask.size(), -1);
5225
5226 for (int Idx : Mask) {
5227 if (Idx == -1)
5228 continue;
5229 unsigned SrcIdx = Idx % Mask.size();
5230 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5231 if (Srcs[SrcIdx] == -1)
5232 // Mark this source as using this lane.
5233 Srcs[SrcIdx] = Src;
5234 else if (Srcs[SrcIdx] != Src)
5235 // The other source is using this lane: not disjoint.
5236 return SDValue();
5237 }
5238
5239 SmallVector<SDValue> SelectMaskVals;
5240 for (int Lane : Srcs) {
5241 if (Lane == -1)
5242 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5243 else
5244 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5245 }
5246 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5247 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5248 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5249 SVN->getOperand(0), SVN->getOperand(1));
5250
5251 // Move all indices relative to the first source.
5252 SmallVector<int> NewMask(Mask.size());
5253 for (unsigned I = 0; I < Mask.size(); I++) {
5254 if (Mask[I] == -1)
5255 NewMask[I] = -1;
5256 else
5257 NewMask[I] = Mask[I] % Mask.size();
5258 }
5259
5260 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5261}
5262
5264 const RISCVSubtarget &Subtarget) {
5265 SDValue V1 = Op.getOperand(0);
5266 SDValue V2 = Op.getOperand(1);
5267 SDLoc DL(Op);
5268 MVT XLenVT = Subtarget.getXLenVT();
5269 MVT VT = Op.getSimpleValueType();
5270 unsigned NumElts = VT.getVectorNumElements();
5271 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5272
5273 if (VT.getVectorElementType() == MVT::i1) {
5274 // Lower to a vror.vi of a larger element type if possible before we promote
5275 // i1s to i8s.
5276 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5277 return V;
5278 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5279 return V;
5280
5281 // Promote i1 shuffle to i8 shuffle.
5282 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5283 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5284 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5285 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5286 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5287 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5288 ISD::SETNE);
5289 }
5290
5291 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5292
5293 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5294
5295 if (SVN->isSplat()) {
5296 const int Lane = SVN->getSplatIndex();
5297 if (Lane >= 0) {
5298 MVT SVT = VT.getVectorElementType();
5299
5300 // Turn splatted vector load into a strided load with an X0 stride.
5301 SDValue V = V1;
5302 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5303 // with undef.
5304 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5305 int Offset = Lane;
5306 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5307 int OpElements =
5308 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5309 V = V.getOperand(Offset / OpElements);
5310 Offset %= OpElements;
5311 }
5312
5313 // We need to ensure the load isn't atomic or volatile.
5314 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5315 auto *Ld = cast<LoadSDNode>(V);
5316 Offset *= SVT.getStoreSize();
5317 SDValue NewAddr = DAG.getMemBasePlusOffset(
5318 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5319
5320 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5321 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5322 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5323 SDValue IntID =
5324 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5325 SDValue Ops[] = {Ld->getChain(),
5326 IntID,
5327 DAG.getUNDEF(ContainerVT),
5328 NewAddr,
5329 DAG.getRegister(RISCV::X0, XLenVT),
5330 VL};
5331 SDValue NewLoad = DAG.getMemIntrinsicNode(
5332 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5334 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5335 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5336 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5337 }
5338
5339 MVT SplatVT = ContainerVT;
5340
5341 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5342 if (SVT == MVT::bf16 ||
5343 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5344 SVT = MVT::i16;
5345 SplatVT = ContainerVT.changeVectorElementType(SVT);
5346 }
5347
5348 // Otherwise use a scalar load and splat. This will give the best
5349 // opportunity to fold a splat into the operation. ISel can turn it into
5350 // the x0 strided load if we aren't able to fold away the select.
5351 if (SVT.isFloatingPoint())
5352 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5353 Ld->getPointerInfo().getWithOffset(Offset),
5354 Ld->getOriginalAlign(),
5355 Ld->getMemOperand()->getFlags());
5356 else
5357 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5358 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5359 Ld->getOriginalAlign(),
5360 Ld->getMemOperand()->getFlags());
5362
5363 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5365 SDValue Splat =
5366 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5367 Splat = DAG.getBitcast(ContainerVT, Splat);
5368 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5369 }
5370
5371 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5372 assert(Lane < (int)NumElts && "Unexpected lane!");
5373 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5374 V1, DAG.getConstant(Lane, DL, XLenVT),
5375 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5376 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5377 }
5378 }
5379
5380 // For exact VLEN m2 or greater, try to split to m1 operations if we
5381 // can split cleanly.
5382 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5383 return V;
5384
5385 ArrayRef<int> Mask = SVN->getMask();
5386
5387 if (SDValue V =
5388 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5389 return V;
5390
5391 if (SDValue V =
5392 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5393 return V;
5394
5395 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5396 // available.
5397 if (Subtarget.hasStdExtZvkb())
5398 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5399 return V;
5400
5401 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5402 // be undef which can be handled with a single SLIDEDOWN/UP.
5403 int LoSrc, HiSrc;
5404 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5405 if (Rotation > 0) {
5406 SDValue LoV, HiV;
5407 if (LoSrc >= 0) {
5408 LoV = LoSrc == 0 ? V1 : V2;
5409 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5410 }
5411 if (HiSrc >= 0) {
5412 HiV = HiSrc == 0 ? V1 : V2;
5413 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5414 }
5415
5416 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5417 // to slide LoV up by (NumElts - Rotation).
5418 unsigned InvRotate = NumElts - Rotation;
5419
5420 SDValue Res = DAG.getUNDEF(ContainerVT);
5421 if (HiV) {
5422 // Even though we could use a smaller VL, don't to avoid a vsetivli
5423 // toggle.
5424 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5425 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5426 }
5427 if (LoV)
5428 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5429 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5431
5432 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5433 }
5434
5435 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5436 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5437
5438 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5439 // use shift and truncate to perform the shuffle.
5440 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5441 // shift-and-trunc reducing total cost for everything except an mf8 result.
5442 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5443 // to do the entire operation.
5444 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5445 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5446 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5447 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5448 unsigned Index = 0;
5449 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5450 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5451 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5452 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5453 }
5454 }
5455 }
5456
5457 if (SDValue V =
5458 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5459 return V;
5460
5461 // Detect an interleave shuffle and lower to
5462 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5463 int EvenSrc, OddSrc;
5464 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5465 // Extract the halves of the vectors.
5466 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5467
5468 // Recognize if one half is actually undef; the matching above will
5469 // otherwise reuse the even stream for the undef one. This improves
5470 // spread(2) shuffles.
5471 bool LaneIsUndef[2] = { true, true};
5472 for (unsigned i = 0; i < Mask.size(); i++)
5473 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5474
5475 int Size = Mask.size();
5476 SDValue EvenV, OddV;
5477 if (LaneIsUndef[0]) {
5478 EvenV = DAG.getUNDEF(HalfVT);
5479 } else {
5480 assert(EvenSrc >= 0 && "Undef source?");
5481 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5482 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5483 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5484 }
5485
5486 if (LaneIsUndef[1]) {
5487 OddV = DAG.getUNDEF(HalfVT);
5488 } else {
5489 assert(OddSrc >= 0 && "Undef source?");
5490 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5491 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5492 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5493 }
5494
5495 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5496 }
5497
5498
5499 // Handle any remaining single source shuffles
5500 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5501 if (V2.isUndef()) {
5502 // We might be able to express the shuffle as a bitrotate. But even if we
5503 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5504 // shifts and a vor will have a higher throughput than a vrgather.
5505 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5506 return V;
5507
5508 // Can we generate a vcompress instead of a vrgather? These scale better
5509 // at high LMUL, at the cost of not being able to fold a following select
5510 // into them. The mask constants are also smaller than the index vector
5511 // constants, and thus easier to materialize.
5512 if (isCompressMask(Mask)) {
5513 SmallVector<SDValue> MaskVals(NumElts,
5514 DAG.getConstant(false, DL, XLenVT));
5515 for (auto Idx : Mask) {
5516 if (Idx == -1)
5517 break;
5518 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5519 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5520 }
5521 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5522 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5523 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5524 DAG.getUNDEF(VT));
5525 }
5526
5527 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5528 // is fully covered in interleave(2) above, so it is ignored here.
5529 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5530 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5531 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5532 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5533 unsigned Index;
5534 if (isSpreadMask(Mask, Factor, Index)) {
5535 MVT NarrowVT =
5536 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5537 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5538 DAG.getVectorIdxConstant(0, DL));
5539 return getWideningSpread(Src, Factor, Index, DL, DAG);
5540 }
5541 }
5542 }
5543
5544 if (VT.getScalarSizeInBits() == 8 &&
5545 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5546 // On such a vector we're unable to use i8 as the index type.
5547 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5548 // may involve vector splitting if we're already at LMUL=8, or our
5549 // user-supplied maximum fixed-length LMUL.
5550 return SDValue();
5551 }
5552
5553 // Base case for the two operand recursion below - handle the worst case
5554 // single source shuffle.
5555 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5556 MVT IndexVT = VT.changeTypeToInteger();
5557 // Since we can't introduce illegal index types at this stage, use i16 and
5558 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5559 // than XLenVT.
5560 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5561 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5562 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5563 }
5564
5565 // If the mask allows, we can do all the index computation in 16 bits. This
5566 // requires less work and less register pressure at high LMUL, and creates
5567 // smaller constants which may be cheaper to materialize.
5568 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5569 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5570 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5571 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5572 }
5573
5574 MVT IndexContainerVT =
5575 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5576
5577 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5578 SmallVector<SDValue> GatherIndicesLHS;
5579 for (int MaskIndex : Mask) {
5580 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5581 GatherIndicesLHS.push_back(IsLHSIndex
5582 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5583 : DAG.getUNDEF(XLenVT));
5584 }
5585 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5586 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5587 Subtarget);
5588 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5589 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5590 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5591 }
5592
5593 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5594 // merged with a second vrgather.
5595 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5596
5597 // Now construct the mask that will be used by the blended vrgather operation.
5598 // Construct the appropriate indices into each vector.
5599 for (int MaskIndex : Mask) {
5600 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5601 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5602 ? MaskIndex : -1);
5603 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5604 }
5605
5606 // If the mask indices are disjoint between the two sources, we can lower it
5607 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5608 // operands may end up being lowered to something cheaper than a vrgather.vv.
5609 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5610 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5611 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5612 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5613 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5614 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5615 return V;
5616
5617 // Try to pick a profitable operand order.
5618 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5619 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5620
5621 // Recursively invoke lowering for each operand if we had two
5622 // independent single source shuffles, and then combine the result via a
5623 // vselect. Note that the vselect will likely be folded back into the
5624 // second permute (vrgather, or other) by the post-isel combine.
5625 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5626 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5627
5628 SmallVector<SDValue> MaskVals;
5629 for (int MaskIndex : Mask) {
5630 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5631 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5632 }
5633
5634 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5635 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5636 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5637
5638 if (SwapOps)
5639 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5640 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5641}
5642
5644 // Support splats for any type. These should type legalize well.
5645 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5646 return true;
5647
5648 // Only support legal VTs for other shuffles for now.
5649 if (!isTypeLegal(VT))
5650 return false;
5651
5652 MVT SVT = VT.getSimpleVT();
5653
5654 // Not for i1 vectors.
5655 if (SVT.getScalarType() == MVT::i1)
5656 return false;
5657
5658 int Dummy1, Dummy2;
5659 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5660 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5661}
5662
5663// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5664// the exponent.
5665SDValue
5666RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5667 SelectionDAG &DAG) const {
5668 MVT VT = Op.getSimpleValueType();
5669 unsigned EltSize = VT.getScalarSizeInBits();
5670 SDValue Src = Op.getOperand(0);
5671 SDLoc DL(Op);
5672 MVT ContainerVT = VT;
5673
5674 SDValue Mask, VL;
5675 if (Op->isVPOpcode()) {
5676 Mask = Op.getOperand(1);
5677 if (VT.isFixedLengthVector())
5678 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5679 Subtarget);
5680 VL = Op.getOperand(2);
5681 }
5682
5683 // We choose FP type that can represent the value if possible. Otherwise, we
5684 // use rounding to zero conversion for correct exponent of the result.
5685 // TODO: Use f16 for i8 when possible?
5686 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5687 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5688 FloatEltVT = MVT::f32;
5689 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5690
5691 // Legal types should have been checked in the RISCVTargetLowering
5692 // constructor.
5693 // TODO: Splitting may make sense in some cases.
5694 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5695 "Expected legal float type!");
5696
5697 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5698 // The trailing zero count is equal to log2 of this single bit value.
5699 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5700 SDValue Neg = DAG.getNegative(Src, DL, VT);
5701 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5702 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5703 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5704 Src, Mask, VL);
5705 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5706 }
5707
5708 // We have a legal FP type, convert to it.
5709 SDValue FloatVal;
5710 if (FloatVT.bitsGT(VT)) {
5711 if (Op->isVPOpcode())
5712 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5713 else
5714 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5715 } else {
5716 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5717 if (VT.isFixedLengthVector()) {
5718 ContainerVT = getContainerForFixedLengthVector(VT);
5719 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5720 }
5721 if (!Op->isVPOpcode())
5722 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5723 SDValue RTZRM =
5725 MVT ContainerFloatVT =
5726 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5727 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5728 Src, Mask, RTZRM, VL);
5729 if (VT.isFixedLengthVector())
5730 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5731 }
5732 // Bitcast to integer and shift the exponent to the LSB.
5733 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5734 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5735 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5736
5737 SDValue Exp;
5738 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5739 if (Op->isVPOpcode()) {
5740 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5741 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5742 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5743 } else {
5744 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5745 DAG.getConstant(ShiftAmt, DL, IntVT));
5746 if (IntVT.bitsLT(VT))
5747 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5748 else if (IntVT.bitsGT(VT))
5749 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5750 }
5751
5752 // The exponent contains log2 of the value in biased form.
5753 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5754 // For trailing zeros, we just need to subtract the bias.
5755 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5756 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5757 DAG.getConstant(ExponentBias, DL, VT));
5758 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5759 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5760 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5761
5762 // For leading zeros, we need to remove the bias and convert from log2 to
5763 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5764 unsigned Adjust = ExponentBias + (EltSize - 1);
5765 SDValue Res;
5766 if (Op->isVPOpcode())
5767 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5768 Mask, VL);
5769 else
5770 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5771
5772 // The above result with zero input equals to Adjust which is greater than
5773 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5774 if (Op.getOpcode() == ISD::CTLZ)
5775 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5776 else if (Op.getOpcode() == ISD::VP_CTLZ)
5777 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5778 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5779 return Res;
5780}
5781
5782SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5783 SelectionDAG &DAG) const {
5784 SDLoc DL(Op);
5785 MVT XLenVT = Subtarget.getXLenVT();
5786 SDValue Source = Op->getOperand(0);
5787 MVT SrcVT = Source.getSimpleValueType();
5788 SDValue Mask = Op->getOperand(1);
5789 SDValue EVL = Op->getOperand(2);
5790
5791 if (SrcVT.isFixedLengthVector()) {
5792 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5793 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5794 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5795 Subtarget);
5796 SrcVT = ContainerVT;
5797 }
5798
5799 // Convert to boolean vector.
5800 if (SrcVT.getScalarType() != MVT::i1) {
5801 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5802 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5803 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5804 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5805 DAG.getUNDEF(SrcVT), Mask, EVL});
5806 }
5807
5808 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5809 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5810 // In this case, we can interpret poison as -1, so nothing to do further.
5811 return Res;
5812
5813 // Convert -1 to VL.
5814 SDValue SetCC =
5815 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5816 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5817 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5818}
5819
5820// While RVV has alignment restrictions, we should always be able to load as a
5821// legal equivalently-sized byte-typed vector instead. This method is
5822// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5823// the load is already correctly-aligned, it returns SDValue().
5824SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5825 SelectionDAG &DAG) const {
5826 auto *Load = cast<LoadSDNode>(Op);
5827 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5828
5830 Load->getMemoryVT(),
5831 *Load->getMemOperand()))
5832 return SDValue();
5833
5834 SDLoc DL(Op);
5835 MVT VT = Op.getSimpleValueType();
5836 unsigned EltSizeBits = VT.getScalarSizeInBits();
5837 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5838 "Unexpected unaligned RVV load type");
5839 MVT NewVT =
5840 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5841 assert(NewVT.isValid() &&
5842 "Expecting equally-sized RVV vector types to be legal");
5843 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5844 Load->getPointerInfo(), Load->getOriginalAlign(),
5845 Load->getMemOperand()->getFlags());
5846 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5847}
5848
5849// While RVV has alignment restrictions, we should always be able to store as a
5850// legal equivalently-sized byte-typed vector instead. This method is
5851// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5852// returns SDValue() if the store is already correctly aligned.
5853SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5854 SelectionDAG &DAG) const {
5855 auto *Store = cast<StoreSDNode>(Op);
5856 assert(Store && Store->getValue().getValueType().isVector() &&
5857 "Expected vector store");
5858
5860 Store->getMemoryVT(),
5861 *Store->getMemOperand()))
5862 return SDValue();
5863
5864 SDLoc DL(Op);
5865 SDValue StoredVal = Store->getValue();
5866 MVT VT = StoredVal.getSimpleValueType();
5867 unsigned EltSizeBits = VT.getScalarSizeInBits();
5868 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5869 "Unexpected unaligned RVV store type");
5870 MVT NewVT =
5871 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5872 assert(NewVT.isValid() &&
5873 "Expecting equally-sized RVV vector types to be legal");
5874 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5875 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5876 Store->getPointerInfo(), Store->getOriginalAlign(),
5877 Store->getMemOperand()->getFlags());
5878}
5879
5881 const RISCVSubtarget &Subtarget) {
5882 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5883
5884 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5885
5886 // All simm32 constants should be handled by isel.
5887 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5888 // this check redundant, but small immediates are common so this check
5889 // should have better compile time.
5890 if (isInt<32>(Imm))
5891 return Op;
5892
5893 // We only need to cost the immediate, if constant pool lowering is enabled.
5894 if (!Subtarget.useConstantPoolForLargeInts())
5895 return Op;
5896
5898 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5899 return Op;
5900
5901 // Optimizations below are disabled for opt size. If we're optimizing for
5902 // size, use a constant pool.
5903 if (DAG.shouldOptForSize())
5904 return SDValue();
5905
5906 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5907 // that if it will avoid a constant pool.
5908 // It will require an extra temporary register though.
5909 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5910 // low and high 32 bits are the same and bit 31 and 63 are set.
5911 unsigned ShiftAmt, AddOpc;
5912 RISCVMatInt::InstSeq SeqLo =
5913 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5914 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5915 return Op;
5916
5917 return SDValue();
5918}
5919
5920SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
5921 SelectionDAG &DAG) const {
5922 MVT VT = Op.getSimpleValueType();
5923 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
5924
5925 // Can this constant be selected by a Zfa FLI instruction?
5926 bool Negate = false;
5927 int Index = getLegalZfaFPImm(Imm, VT);
5928
5929 // If the constant is negative, try negating.
5930 if (Index < 0 && Imm.isNegative()) {
5931 Index = getLegalZfaFPImm(-Imm, VT);
5932 Negate = true;
5933 }
5934
5935 // If we couldn't find a FLI lowering, fall back to generic code.
5936 if (Index < 0)
5937 return SDValue();
5938
5939 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
5940 SDLoc DL(Op);
5941 SDValue Const =
5942 DAG.getNode(RISCVISD::FLI, DL, VT,
5943 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
5944 if (!Negate)
5945 return Const;
5946
5947 return DAG.getNode(ISD::FNEG, DL, VT, Const);
5948}
5949
5951 const RISCVSubtarget &Subtarget) {
5952 SDLoc dl(Op);
5953 AtomicOrdering FenceOrdering =
5954 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5955 SyncScope::ID FenceSSID =
5956 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5957
5958 if (Subtarget.hasStdExtZtso()) {
5959 // The only fence that needs an instruction is a sequentially-consistent
5960 // cross-thread fence.
5961 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5962 FenceSSID == SyncScope::System)
5963 return Op;
5964
5965 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5966 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5967 }
5968
5969 // singlethread fences only synchronize with signal handlers on the same
5970 // thread and thus only need to preserve instruction order, not actually
5971 // enforce memory ordering.
5972 if (FenceSSID == SyncScope::SingleThread)
5973 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5974 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5975
5976 return Op;
5977}
5978
5979SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5980 SelectionDAG &DAG) const {
5981 SDLoc DL(Op);
5982 MVT VT = Op.getSimpleValueType();
5983 MVT XLenVT = Subtarget.getXLenVT();
5984 unsigned Check = Op.getConstantOperandVal(1);
5985 unsigned TDCMask = 0;
5986 if (Check & fcSNan)
5987 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5988 if (Check & fcQNan)
5989 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5990 if (Check & fcPosInf)
5992 if (Check & fcNegInf)
5994 if (Check & fcPosNormal)
5996 if (Check & fcNegNormal)
5998 if (Check & fcPosSubnormal)
6000 if (Check & fcNegSubnormal)
6002 if (Check & fcPosZero)
6003 TDCMask |= RISCV::FPMASK_Positive_Zero;
6004 if (Check & fcNegZero)
6005 TDCMask |= RISCV::FPMASK_Negative_Zero;
6006
6007 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6008
6009 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6010
6011 if (VT.isVector()) {
6012 SDValue Op0 = Op.getOperand(0);
6013 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6014
6015 if (VT.isScalableVector()) {
6017 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6018 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6019 Mask = Op.getOperand(2);
6020 VL = Op.getOperand(3);
6021 }
6022 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6023 VL, Op->getFlags());
6024 if (IsOneBitMask)
6025 return DAG.getSetCC(DL, VT, FPCLASS,
6026 DAG.getConstant(TDCMask, DL, DstVT),
6028 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6029 DAG.getConstant(TDCMask, DL, DstVT));
6030 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6031 ISD::SETNE);
6032 }
6033
6034 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6035 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6036 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6037 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6038 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6039 Mask = Op.getOperand(2);
6040 MVT MaskContainerVT =
6041 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6042 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6043 VL = Op.getOperand(3);
6044 }
6045 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6046
6047 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6048 Mask, VL, Op->getFlags());
6049
6050 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6051 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6052 if (IsOneBitMask) {
6053 SDValue VMSEQ =
6054 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6055 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6056 DAG.getUNDEF(ContainerVT), Mask, VL});
6057 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6058 }
6059 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6060 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6061
6062 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6063 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6064 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6065
6066 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6067 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6068 DAG.getUNDEF(ContainerVT), Mask, VL});
6069 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6070 }
6071
6072 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6073 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6074 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6076 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6077}
6078
6079// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6080// operations propagate nans.
6082 const RISCVSubtarget &Subtarget) {
6083 SDLoc DL(Op);
6084 MVT VT = Op.getSimpleValueType();
6085
6086 SDValue X = Op.getOperand(0);
6087 SDValue Y = Op.getOperand(1);
6088
6089 if (!VT.isVector()) {
6090 MVT XLenVT = Subtarget.getXLenVT();
6091
6092 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6093 // ensures that when one input is a nan, the other will also be a nan
6094 // allowing the nan to propagate. If both inputs are nan, this will swap the
6095 // inputs which is harmless.
6096
6097 SDValue NewY = Y;
6098 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6099 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6100 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6101 }
6102
6103 SDValue NewX = X;
6104 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6105 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6106 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6107 }
6108
6109 unsigned Opc =
6110 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6111 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6112 }
6113
6114 // Check no NaNs before converting to fixed vector scalable.
6115 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6116 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6117
6118 MVT ContainerVT = VT;
6119 if (VT.isFixedLengthVector()) {
6120 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6121 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6122 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6123 }
6124
6125 SDValue Mask, VL;
6126 if (Op->isVPOpcode()) {
6127 Mask = Op.getOperand(2);
6128 if (VT.isFixedLengthVector())
6129 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6130 Subtarget);
6131 VL = Op.getOperand(3);
6132 } else {
6133 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6134 }
6135
6136 SDValue NewY = Y;
6137 if (!XIsNeverNan) {
6138 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6139 {X, X, DAG.getCondCode(ISD::SETOEQ),
6140 DAG.getUNDEF(ContainerVT), Mask, VL});
6141 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6142 DAG.getUNDEF(ContainerVT), VL);
6143 }
6144
6145 SDValue NewX = X;
6146 if (!YIsNeverNan) {
6147 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6148 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6149 DAG.getUNDEF(ContainerVT), Mask, VL});
6150 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6151 DAG.getUNDEF(ContainerVT), VL);
6152 }
6153
6154 unsigned Opc =
6155 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6158 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6159 DAG.getUNDEF(ContainerVT), Mask, VL);
6160 if (VT.isFixedLengthVector())
6161 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6162 return Res;
6163}
6164
6166 const RISCVSubtarget &Subtarget) {
6167 bool IsFABS = Op.getOpcode() == ISD::FABS;
6168 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6169 "Wrong opcode for lowering FABS or FNEG.");
6170
6171 MVT XLenVT = Subtarget.getXLenVT();
6172 MVT VT = Op.getSimpleValueType();
6173 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6174
6175 SDLoc DL(Op);
6176 SDValue Fmv =
6177 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6178
6179 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6180 Mask = Mask.sext(Subtarget.getXLen());
6181
6182 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6183 SDValue Logic =
6184 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6185 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6186}
6187
6189 const RISCVSubtarget &Subtarget) {
6190 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6191
6192 MVT XLenVT = Subtarget.getXLenVT();
6193 MVT VT = Op.getSimpleValueType();
6194 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6195
6196 SDValue Mag = Op.getOperand(0);
6197 SDValue Sign = Op.getOperand(1);
6198
6199 SDLoc DL(Op);
6200
6201 // Get sign bit into an integer value.
6202 SDValue SignAsInt;
6203 unsigned SignSize = Sign.getValueSizeInBits();
6204 if (SignSize == Subtarget.getXLen()) {
6205 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6206 } else if (SignSize == 16) {
6207 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6208 } else if (SignSize == 32) {
6209 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6210 } else if (SignSize == 64) {
6211 assert(XLenVT == MVT::i32 && "Unexpected type");
6212 // Copy the upper word to integer.
6213 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6214 .getValue(1);
6215 SignSize = 32;
6216 } else
6217 llvm_unreachable("Unexpected sign size");
6218
6219 // Get the signbit at the right position for MagAsInt.
6220 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6221 if (ShiftAmount > 0) {
6222 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6223 DAG.getConstant(ShiftAmount, DL, XLenVT));
6224 } else if (ShiftAmount < 0) {
6225 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6226 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6227 }
6228
6229 // Mask the sign bit and any bits above it. The extra bits will be dropped
6230 // when we convert back to FP.
6231 SDValue SignMask = DAG.getConstant(
6232 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6233 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6234
6235 // Transform Mag value to integer, and clear the sign bit.
6236 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6237 SDValue ClearSignMask = DAG.getConstant(
6238 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6239 SDValue ClearedSign =
6240 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6241
6242 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6244
6245 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6246}
6247
6248/// Get a RISC-V target specified VL op for a given SDNode.
6249static unsigned getRISCVVLOp(SDValue Op) {
6250#define OP_CASE(NODE) \
6251 case ISD::NODE: \
6252 return RISCVISD::NODE##_VL;
6253#define VP_CASE(NODE) \
6254 case ISD::VP_##NODE: \
6255 return RISCVISD::NODE##_VL;
6256 // clang-format off
6257 switch (Op.getOpcode()) {
6258 default:
6259 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6260 OP_CASE(ADD)
6261 OP_CASE(SUB)
6262 OP_CASE(MUL)
6263 OP_CASE(MULHS)
6264 OP_CASE(MULHU)
6265 OP_CASE(SDIV)
6266 OP_CASE(SREM)
6267 OP_CASE(UDIV)
6268 OP_CASE(UREM)
6269 OP_CASE(SHL)
6270 OP_CASE(SRA)
6271 OP_CASE(SRL)
6272 OP_CASE(ROTL)
6273 OP_CASE(ROTR)
6274 OP_CASE(BSWAP)
6275 OP_CASE(CTTZ)
6276 OP_CASE(CTLZ)
6277 OP_CASE(CTPOP)
6278 OP_CASE(BITREVERSE)
6279 OP_CASE(SADDSAT)
6280 OP_CASE(UADDSAT)
6281 OP_CASE(SSUBSAT)
6282 OP_CASE(USUBSAT)
6283 OP_CASE(AVGFLOORS)
6284 OP_CASE(AVGFLOORU)
6285 OP_CASE(AVGCEILS)
6286 OP_CASE(AVGCEILU)
6287 OP_CASE(FADD)
6288 OP_CASE(FSUB)
6289 OP_CASE(FMUL)
6290 OP_CASE(FDIV)
6291 OP_CASE(FNEG)
6292 OP_CASE(FABS)
6293 OP_CASE(FSQRT)
6294 OP_CASE(SMIN)
6295 OP_CASE(SMAX)
6296 OP_CASE(UMIN)
6297 OP_CASE(UMAX)
6298 OP_CASE(STRICT_FADD)
6299 OP_CASE(STRICT_FSUB)
6300 OP_CASE(STRICT_FMUL)
6301 OP_CASE(STRICT_FDIV)
6302 OP_CASE(STRICT_FSQRT)
6303 VP_CASE(ADD) // VP_ADD
6304 VP_CASE(SUB) // VP_SUB
6305 VP_CASE(MUL) // VP_MUL
6306 VP_CASE(SDIV) // VP_SDIV
6307 VP_CASE(SREM) // VP_SREM
6308 VP_CASE(UDIV) // VP_UDIV
6309 VP_CASE(UREM) // VP_UREM
6310 VP_CASE(SHL) // VP_SHL
6311 VP_CASE(FADD) // VP_FADD
6312 VP_CASE(FSUB) // VP_FSUB
6313 VP_CASE(FMUL) // VP_FMUL
6314 VP_CASE(FDIV) // VP_FDIV
6315 VP_CASE(FNEG) // VP_FNEG
6316 VP_CASE(FABS) // VP_FABS
6317 VP_CASE(SMIN) // VP_SMIN
6318 VP_CASE(SMAX) // VP_SMAX
6319 VP_CASE(UMIN) // VP_UMIN
6320 VP_CASE(UMAX) // VP_UMAX
6321 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6322 VP_CASE(SETCC) // VP_SETCC
6323 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6324 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6325 VP_CASE(BITREVERSE) // VP_BITREVERSE
6326 VP_CASE(SADDSAT) // VP_SADDSAT
6327 VP_CASE(UADDSAT) // VP_UADDSAT
6328 VP_CASE(SSUBSAT) // VP_SSUBSAT
6329 VP_CASE(USUBSAT) // VP_USUBSAT
6330 VP_CASE(BSWAP) // VP_BSWAP
6331 VP_CASE(CTLZ) // VP_CTLZ
6332 VP_CASE(CTTZ) // VP_CTTZ
6333 VP_CASE(CTPOP) // VP_CTPOP
6335 case ISD::VP_CTLZ_ZERO_UNDEF:
6336 return RISCVISD::CTLZ_VL;
6338 case ISD::VP_CTTZ_ZERO_UNDEF:
6339 return RISCVISD::CTTZ_VL;
6340 case ISD::FMA:
6341 case ISD::VP_FMA:
6342 return RISCVISD::VFMADD_VL;
6343 case ISD::STRICT_FMA:
6345 case ISD::AND:
6346 case ISD::VP_AND:
6347 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6348 return RISCVISD::VMAND_VL;
6349 return RISCVISD::AND_VL;
6350 case ISD::OR:
6351 case ISD::VP_OR:
6352 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6353 return RISCVISD::VMOR_VL;
6354 return RISCVISD::OR_VL;
6355 case ISD::XOR:
6356 case ISD::VP_XOR:
6357 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6358 return RISCVISD::VMXOR_VL;
6359 return RISCVISD::XOR_VL;
6360 case ISD::VP_SELECT:
6361 case ISD::VP_MERGE:
6362 return RISCVISD::VMERGE_VL;
6363 case ISD::VP_SRA:
6364 return RISCVISD::SRA_VL;
6365 case ISD::VP_SRL:
6366 return RISCVISD::SRL_VL;
6367 case ISD::VP_SQRT:
6368 return RISCVISD::FSQRT_VL;
6369 case ISD::VP_SIGN_EXTEND:
6370 return RISCVISD::VSEXT_VL;
6371 case ISD::VP_ZERO_EXTEND:
6372 return RISCVISD::VZEXT_VL;
6373 case ISD::VP_FP_TO_SINT:
6375 case ISD::VP_FP_TO_UINT:
6377 case ISD::FMINNUM:
6378 case ISD::VP_FMINNUM:
6379 return RISCVISD::VFMIN_VL;
6380 case ISD::FMAXNUM:
6381 case ISD::VP_FMAXNUM:
6382 return RISCVISD::VFMAX_VL;
6383 case ISD::LRINT:
6384 case ISD::VP_LRINT:
6385 case ISD::LLRINT:
6386 case ISD::VP_LLRINT:
6388 }
6389 // clang-format on
6390#undef OP_CASE
6391#undef VP_CASE
6392}
6393
6394/// Return true if a RISC-V target specified op has a passthru operand.
6395static bool hasPassthruOp(unsigned Opcode) {
6396 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6398 "not a RISC-V target specific op");
6400 127 &&
6403 21 &&
6404 "adding target specific op should update this function");
6405 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6406 return true;
6407 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6408 return true;
6409 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6410 return true;
6411 if (Opcode == RISCVISD::SETCC_VL)
6412 return true;
6413 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6414 return true;
6415 if (Opcode == RISCVISD::VMERGE_VL)
6416 return true;
6417 return false;
6418}
6419
6420/// Return true if a RISC-V target specified op has a mask operand.
6421static bool hasMaskOp(unsigned Opcode) {
6422 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6424 "not a RISC-V target specific op");
6426 127 &&
6429 21 &&
6430 "adding target specific op should update this function");
6431 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6432 return true;
6433 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6434 return true;
6435 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6437 return true;
6438 return false;
6439}
6440
6442 const RISCVSubtarget &Subtarget) {
6443 if (Op.getValueType() == MVT::nxv32f16 &&
6444 (Subtarget.hasVInstructionsF16Minimal() &&
6445 !Subtarget.hasVInstructionsF16()))
6446 return true;
6447 if (Op.getValueType() == MVT::nxv32bf16)
6448 return true;
6449 return false;
6450}
6451
6453 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6454 SDLoc DL(Op);
6455
6458
6459 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6460 if (!Op.getOperand(j).getValueType().isVector()) {
6461 LoOperands[j] = Op.getOperand(j);
6462 HiOperands[j] = Op.getOperand(j);
6463 continue;
6464 }
6465 std::tie(LoOperands[j], HiOperands[j]) =
6466 DAG.SplitVector(Op.getOperand(j), DL);
6467 }
6468
6469 SDValue LoRes =
6470 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6471 SDValue HiRes =
6472 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6473
6474 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6475}
6476
6478 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6479 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6480 SDLoc DL(Op);
6481
6484
6485 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6486 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6487 std::tie(LoOperands[j], HiOperands[j]) =
6488 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6489 continue;
6490 }
6491 if (!Op.getOperand(j).getValueType().isVector()) {
6492 LoOperands[j] = Op.getOperand(j);
6493 HiOperands[j] = Op.getOperand(j);
6494 continue;
6495 }
6496 std::tie(LoOperands[j], HiOperands[j]) =
6497 DAG.SplitVector(Op.getOperand(j), DL);
6498 }
6499
6500 SDValue LoRes =
6501 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6502 SDValue HiRes =
6503 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6504
6505 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6506}
6507
6509 SDLoc DL(Op);
6510
6511 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6512 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6513 auto [EVLLo, EVLHi] =
6514 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6515
6516 SDValue ResLo =
6517 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6518 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6519 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6520 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6521}
6522
6524
6525 assert(Op->isStrictFPOpcode());
6526
6527 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6528
6529 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6530 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6531
6532 SDLoc DL(Op);
6533
6536
6537 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6538 if (!Op.getOperand(j).getValueType().isVector()) {
6539 LoOperands[j] = Op.getOperand(j);
6540 HiOperands[j] = Op.getOperand(j);
6541 continue;
6542 }
6543 std::tie(LoOperands[j], HiOperands[j]) =
6544 DAG.SplitVector(Op.getOperand(j), DL);
6545 }
6546
6547 SDValue LoRes =
6548 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6549 HiOperands[0] = LoRes.getValue(1);
6550 SDValue HiRes =
6551 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6552
6553 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6554 LoRes.getValue(0), HiRes.getValue(0));
6555 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6556}
6557
6559 SelectionDAG &DAG) const {
6560 switch (Op.getOpcode()) {
6561 default:
6562 report_fatal_error("unimplemented operand");
6563 case ISD::ATOMIC_FENCE:
6564 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6565 case ISD::GlobalAddress:
6566 return lowerGlobalAddress(Op, DAG);
6567 case ISD::BlockAddress:
6568 return lowerBlockAddress(Op, DAG);
6569 case ISD::ConstantPool:
6570 return lowerConstantPool(Op, DAG);
6571 case ISD::JumpTable:
6572 return lowerJumpTable(Op, DAG);
6574 return lowerGlobalTLSAddress(Op, DAG);
6575 case ISD::Constant:
6576 return lowerConstant(Op, DAG, Subtarget);
6577 case ISD::ConstantFP:
6578 return lowerConstantFP(Op, DAG);
6579 case ISD::SELECT:
6580 return lowerSELECT(Op, DAG);
6581 case ISD::BRCOND:
6582 return lowerBRCOND(Op, DAG);
6583 case ISD::VASTART:
6584 return lowerVASTART(Op, DAG);
6585 case ISD::FRAMEADDR:
6586 return lowerFRAMEADDR(Op, DAG);
6587 case ISD::RETURNADDR:
6588 return lowerRETURNADDR(Op, DAG);
6589 case ISD::SHL_PARTS:
6590 return lowerShiftLeftParts(Op, DAG);
6591 case ISD::SRA_PARTS:
6592 return lowerShiftRightParts(Op, DAG, true);
6593 case ISD::SRL_PARTS:
6594 return lowerShiftRightParts(Op, DAG, false);
6595 case ISD::ROTL:
6596 case ISD::ROTR:
6597 if (Op.getValueType().isFixedLengthVector()) {
6598 assert(Subtarget.hasStdExtZvkb());
6599 return lowerToScalableOp(Op, DAG);
6600 }
6601 assert(Subtarget.hasVendorXTHeadBb() &&
6602 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6603 "Unexpected custom legalization");
6604 // XTHeadBb only supports rotate by constant.
6605 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6606 return SDValue();
6607 return Op;
6608 case ISD::BITCAST: {
6609 SDLoc DL(Op);
6610 EVT VT = Op.getValueType();
6611 SDValue Op0 = Op.getOperand(0);
6612 EVT Op0VT = Op0.getValueType();
6613 MVT XLenVT = Subtarget.getXLenVT();
6614 if (Op0VT == MVT::i16 &&
6615 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6616 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6617 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6618 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6619 }
6620 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6621 Subtarget.hasStdExtFOrZfinx()) {
6622 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6623 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6624 }
6625 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6626 Subtarget.hasStdExtDOrZdinx()) {
6627 SDValue Lo, Hi;
6628 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6629 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6630 }
6631
6632 // Consider other scalar<->scalar casts as legal if the types are legal.
6633 // Otherwise expand them.
6634 if (!VT.isVector() && !Op0VT.isVector()) {
6635 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6636 return Op;
6637 return SDValue();
6638 }
6639
6640 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6641 "Unexpected types");
6642
6643 if (VT.isFixedLengthVector()) {
6644 // We can handle fixed length vector bitcasts with a simple replacement
6645 // in isel.
6646 if (Op0VT.isFixedLengthVector())
6647 return Op;
6648 // When bitcasting from scalar to fixed-length vector, insert the scalar
6649 // into a one-element vector of the result type, and perform a vector
6650 // bitcast.
6651 if (!Op0VT.isVector()) {
6652 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6653 if (!isTypeLegal(BVT))
6654 return SDValue();
6655 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6656 DAG.getUNDEF(BVT), Op0,
6657 DAG.getVectorIdxConstant(0, DL)));
6658 }
6659 return SDValue();
6660 }
6661 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6662 // thus: bitcast the vector to a one-element vector type whose element type
6663 // is the same as the result type, and extract the first element.
6664 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6665 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6666 if (!isTypeLegal(BVT))
6667 return SDValue();
6668 SDValue BVec = DAG.getBitcast(BVT, Op0);
6669 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6670 DAG.getVectorIdxConstant(0, DL));
6671 }
6672 return SDValue();
6673 }
6675 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6677 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6679 return LowerINTRINSIC_VOID(Op, DAG);
6680 case ISD::IS_FPCLASS:
6681 return LowerIS_FPCLASS(Op, DAG);
6682 case ISD::BITREVERSE: {
6683 MVT VT = Op.getSimpleValueType();
6684 if (VT.isFixedLengthVector()) {
6685 assert(Subtarget.hasStdExtZvbb());
6686 return lowerToScalableOp(Op, DAG);
6687 }
6688 SDLoc DL(Op);
6689 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6690 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6691 // Expand bitreverse to a bswap(rev8) followed by brev8.
6692 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6693 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6694 }
6695 case ISD::TRUNCATE:
6698 // Only custom-lower vector truncates
6699 if (!Op.getSimpleValueType().isVector())
6700 return Op;
6701 return lowerVectorTruncLike(Op, DAG);
6702 case ISD::ANY_EXTEND:
6703 case ISD::ZERO_EXTEND:
6704 if (Op.getOperand(0).getValueType().isVector() &&
6705 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6706 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6707 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6708 case ISD::SIGN_EXTEND:
6709 if (Op.getOperand(0).getValueType().isVector() &&
6710 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6711 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6712 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6714 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6716 return lowerINSERT_VECTOR_ELT(Op, DAG);
6718 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6719 case ISD::SCALAR_TO_VECTOR: {
6720 MVT VT = Op.getSimpleValueType();
6721 SDLoc DL(Op);
6722 SDValue Scalar = Op.getOperand(0);
6723 if (VT.getVectorElementType() == MVT::i1) {
6724 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6725 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6726 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6727 }
6728 MVT ContainerVT = VT;
6729 if (VT.isFixedLengthVector())
6730 ContainerVT = getContainerForFixedLengthVector(VT);
6731 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6732
6733 SDValue V;
6734 if (VT.isFloatingPoint()) {
6735 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6736 DAG.getUNDEF(ContainerVT), Scalar, VL);
6737 } else {
6738 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6739 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6740 DAG.getUNDEF(ContainerVT), Scalar, VL);
6741 }
6742 if (VT.isFixedLengthVector())
6743 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6744 return V;
6745 }
6746 case ISD::VSCALE: {
6747 MVT XLenVT = Subtarget.getXLenVT();
6748 MVT VT = Op.getSimpleValueType();
6749 SDLoc DL(Op);
6750 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6751 // We define our scalable vector types for lmul=1 to use a 64 bit known
6752 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6753 // vscale as VLENB / 8.
6754 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6755 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6756 report_fatal_error("Support for VLEN==32 is incomplete.");
6757 // We assume VLENB is a multiple of 8. We manually choose the best shift
6758 // here because SimplifyDemandedBits isn't always able to simplify it.
6759 uint64_t Val = Op.getConstantOperandVal(0);
6760 if (isPowerOf2_64(Val)) {
6761 uint64_t Log2 = Log2_64(Val);
6762 if (Log2 < 3)
6763 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6764 DAG.getConstant(3 - Log2, DL, VT));
6765 else if (Log2 > 3)
6766 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6767 DAG.getConstant(Log2 - 3, DL, XLenVT));
6768 } else if ((Val % 8) == 0) {
6769 // If the multiplier is a multiple of 8, scale it down to avoid needing
6770 // to shift the VLENB value.
6771 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6772 DAG.getConstant(Val / 8, DL, XLenVT));
6773 } else {
6774 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6775 DAG.getConstant(3, DL, XLenVT));
6776 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6777 DAG.getConstant(Val, DL, XLenVT));
6778 }
6779 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6780 }
6781 case ISD::FPOWI: {
6782 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6783 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6784 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6785 Op.getOperand(1).getValueType() == MVT::i32) {
6786 SDLoc DL(Op);
6787 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6788 SDValue Powi =
6789 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6790 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6791 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6792 }
6793 return SDValue();
6794 }
6795 case ISD::FMAXIMUM:
6796 case ISD::FMINIMUM:
6797 if (isPromotedOpNeedingSplit(Op, Subtarget))
6798 return SplitVectorOp(Op, DAG);
6799 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6800 case ISD::FP_EXTEND:
6801 case ISD::FP_ROUND:
6802 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6805 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6806 case ISD::SINT_TO_FP:
6807 case ISD::UINT_TO_FP:
6808 if (Op.getValueType().isVector() &&
6809 ((Op.getValueType().getScalarType() == MVT::f16 &&
6810 (Subtarget.hasVInstructionsF16Minimal() &&
6811 !Subtarget.hasVInstructionsF16())) ||
6812 Op.getValueType().getScalarType() == MVT::bf16)) {
6813 if (isPromotedOpNeedingSplit(Op, Subtarget))
6814 return SplitVectorOp(Op, DAG);
6815 // int -> f32
6816 SDLoc DL(Op);
6817 MVT NVT =
6818 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6819 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6820 // f32 -> [b]f16
6821 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6822 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6823 }
6824 [[fallthrough]];
6825 case ISD::FP_TO_SINT:
6826 case ISD::FP_TO_UINT:
6827 if (SDValue Op1 = Op.getOperand(0);
6828 Op1.getValueType().isVector() &&
6829 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6830 (Subtarget.hasVInstructionsF16Minimal() &&
6831 !Subtarget.hasVInstructionsF16())) ||
6832 Op1.getValueType().getScalarType() == MVT::bf16)) {
6833 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6834 return SplitVectorOp(Op, DAG);
6835 // [b]f16 -> f32
6836 SDLoc DL(Op);
6837 MVT NVT = MVT::getVectorVT(MVT::f32,
6838 Op1.getValueType().getVectorElementCount());
6839 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6840 // f32 -> int
6841 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6842 }
6843 [[fallthrough]];
6848 // RVV can only do fp<->int conversions to types half/double the size as
6849 // the source. We custom-lower any conversions that do two hops into
6850 // sequences.
6851 MVT VT = Op.getSimpleValueType();
6852 if (VT.isScalarInteger())
6853 return lowerFP_TO_INT(Op, DAG, Subtarget);
6854 bool IsStrict = Op->isStrictFPOpcode();
6855 SDValue Src = Op.getOperand(0 + IsStrict);
6856 MVT SrcVT = Src.getSimpleValueType();
6857 if (SrcVT.isScalarInteger())
6858 return lowerINT_TO_FP(Op, DAG, Subtarget);
6859 if (!VT.isVector())
6860 return Op;
6861 SDLoc DL(Op);
6862 MVT EltVT = VT.getVectorElementType();
6863 MVT SrcEltVT = SrcVT.getVectorElementType();
6864 unsigned EltSize = EltVT.getSizeInBits();
6865 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6866 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6867 "Unexpected vector element types");
6868
6869 bool IsInt2FP = SrcEltVT.isInteger();
6870 // Widening conversions
6871 if (EltSize > (2 * SrcEltSize)) {
6872 if (IsInt2FP) {
6873 // Do a regular integer sign/zero extension then convert to float.
6874 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6876 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6877 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6880 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6881 if (IsStrict)
6882 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6883 Op.getOperand(0), Ext);
6884 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6885 }
6886 // FP2Int
6887 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6888 // Do one doubling fp_extend then complete the operation by converting
6889 // to int.
6890 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6891 if (IsStrict) {
6892 auto [FExt, Chain] =
6893 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6894 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6895 }
6896 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6897 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6898 }
6899
6900 // Narrowing conversions
6901 if (SrcEltSize > (2 * EltSize)) {
6902 if (IsInt2FP) {
6903 // One narrowing int_to_fp, then an fp_round.
6904 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6905 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6906 if (IsStrict) {
6907 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6908 DAG.getVTList(InterimFVT, MVT::Other),
6909 Op.getOperand(0), Src);
6910 SDValue Chain = Int2FP.getValue(1);
6911 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6912 }
6913 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6914 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6915 }
6916 // FP2Int
6917 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6918 // representable by the integer, the result is poison.
6919 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6921 if (IsStrict) {
6922 SDValue FP2Int =
6923 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6924 Op.getOperand(0), Src);
6925 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6926 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6927 }
6928 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6929 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6930 }
6931
6932 // Scalable vectors can exit here. Patterns will handle equally-sized
6933 // conversions halving/doubling ones.
6934 if (!VT.isFixedLengthVector())
6935 return Op;
6936
6937 // For fixed-length vectors we lower to a custom "VL" node.
6938 unsigned RVVOpc = 0;
6939 switch (Op.getOpcode()) {
6940 default:
6941 llvm_unreachable("Impossible opcode");
6942 case ISD::FP_TO_SINT:
6944 break;
6945 case ISD::FP_TO_UINT:
6947 break;
6948 case ISD::SINT_TO_FP:
6949 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6950 break;
6951 case ISD::UINT_TO_FP:
6952 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6953 break;
6956 break;
6959 break;
6962 break;
6965 break;
6966 }
6967
6968 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6969 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6970 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6971 "Expected same element count");
6972
6973 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6974
6975 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6976 if (IsStrict) {
6977 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6978 Op.getOperand(0), Src, Mask, VL);
6979 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6980 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6981 }
6982 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6983 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6984 }
6987 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6988 case ISD::FP_TO_BF16: {
6989 // Custom lower to ensure the libcall return is passed in an FPR on hard
6990 // float ABIs.
6991 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6992 SDLoc DL(Op);
6993 MakeLibCallOptions CallOptions;
6994 RTLIB::Libcall LC =
6995 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6996 SDValue Res =
6997 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6998 if (Subtarget.is64Bit())
6999 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7000 return DAG.getBitcast(MVT::i32, Res);
7001 }
7002 case ISD::BF16_TO_FP: {
7003 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7004 MVT VT = Op.getSimpleValueType();
7005 SDLoc DL(Op);
7006 Op = DAG.getNode(
7007 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7008 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7009 SDValue Res = Subtarget.is64Bit()
7010 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7011 : DAG.getBitcast(MVT::f32, Op);
7012 // fp_extend if the target VT is bigger than f32.
7013 if (VT != MVT::f32)
7014 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7015 return Res;
7016 }
7018 case ISD::FP_TO_FP16: {
7019 // Custom lower to ensure the libcall return is passed in an FPR on hard
7020 // float ABIs.
7021 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7022 SDLoc DL(Op);
7023 MakeLibCallOptions CallOptions;
7024 bool IsStrict = Op->isStrictFPOpcode();
7025 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7026 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7027 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7028 SDValue Res;
7029 std::tie(Res, Chain) =
7030 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7031 if (Subtarget.is64Bit())
7032 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7033 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7034 if (IsStrict)
7035 return DAG.getMergeValues({Result, Chain}, DL);
7036 return Result;
7037 }
7039 case ISD::FP16_TO_FP: {
7040 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7041 // float ABIs.
7042 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7043 SDLoc DL(Op);
7044 MakeLibCallOptions CallOptions;
7045 bool IsStrict = Op->isStrictFPOpcode();
7046 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7047 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7048 SDValue Arg = Subtarget.is64Bit()
7049 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7050 : DAG.getBitcast(MVT::f32, Op0);
7051 SDValue Res;
7052 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7053 CallOptions, DL, Chain);
7054 if (IsStrict)
7055 return DAG.getMergeValues({Res, Chain}, DL);
7056 return Res;
7057 }
7058 case ISD::FTRUNC:
7059 case ISD::FCEIL:
7060 case ISD::FFLOOR:
7061 case ISD::FNEARBYINT:
7062 case ISD::FRINT:
7063 case ISD::FROUND:
7064 case ISD::FROUNDEVEN:
7065 if (isPromotedOpNeedingSplit(Op, Subtarget))
7066 return SplitVectorOp(Op, DAG);
7067 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7068 case ISD::LRINT:
7069 case ISD::LLRINT:
7070 if (Op.getValueType().isVector())
7071 return lowerVectorXRINT(Op, DAG, Subtarget);
7072 [[fallthrough]];
7073 case ISD::LROUND:
7074 case ISD::LLROUND: {
7075 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7076 "Unexpected custom legalisation");
7077 SDLoc DL(Op);
7078 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7079 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7080 }
7081 case ISD::STRICT_LRINT:
7082 case ISD::STRICT_LLRINT:
7083 case ISD::STRICT_LROUND:
7084 case ISD::STRICT_LLROUND: {
7085 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7086 "Unexpected custom legalisation");
7087 SDLoc DL(Op);
7088 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7089 {Op.getOperand(0), Op.getOperand(1)});
7090 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7091 {Ext.getValue(1), Ext.getValue(0)});
7092 }
7093 case ISD::VECREDUCE_ADD:
7098 return lowerVECREDUCE(Op, DAG);
7099 case ISD::VECREDUCE_AND:
7100 case ISD::VECREDUCE_OR:
7101 case ISD::VECREDUCE_XOR:
7102 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7103 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7104 return lowerVECREDUCE(Op, DAG);
7111 return lowerFPVECREDUCE(Op, DAG);
7112 case ISD::VP_REDUCE_ADD:
7113 case ISD::VP_REDUCE_UMAX:
7114 case ISD::VP_REDUCE_SMAX:
7115 case ISD::VP_REDUCE_UMIN:
7116 case ISD::VP_REDUCE_SMIN:
7117 case ISD::VP_REDUCE_FADD:
7118 case ISD::VP_REDUCE_SEQ_FADD:
7119 case ISD::VP_REDUCE_FMIN:
7120 case ISD::VP_REDUCE_FMAX:
7121 case ISD::VP_REDUCE_FMINIMUM:
7122 case ISD::VP_REDUCE_FMAXIMUM:
7123 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7124 return SplitVectorReductionOp(Op, DAG);
7125 return lowerVPREDUCE(Op, DAG);
7126 case ISD::VP_REDUCE_AND:
7127 case ISD::VP_REDUCE_OR:
7128 case ISD::VP_REDUCE_XOR:
7129 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7130 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7131 return lowerVPREDUCE(Op, DAG);
7132 case ISD::VP_CTTZ_ELTS:
7133 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7134 return lowerVPCttzElements(Op, DAG);
7135 case ISD::UNDEF: {
7136 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7137 return convertFromScalableVector(Op.getSimpleValueType(),
7138 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7139 }
7141 return lowerINSERT_SUBVECTOR(Op, DAG);
7143 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7145 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7147 return lowerVECTOR_INTERLEAVE(Op, DAG);
7148 case ISD::STEP_VECTOR:
7149 return lowerSTEP_VECTOR(Op, DAG);
7151 return lowerVECTOR_REVERSE(Op, DAG);
7152 case ISD::VECTOR_SPLICE:
7153 return lowerVECTOR_SPLICE(Op, DAG);
7154 case ISD::BUILD_VECTOR:
7155 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7156 case ISD::SPLAT_VECTOR: {
7157 MVT VT = Op.getSimpleValueType();
7158 MVT EltVT = VT.getVectorElementType();
7159 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7160 EltVT == MVT::bf16) {
7161 SDLoc DL(Op);
7162 SDValue Elt;
7163 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7164 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7165 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7166 Op.getOperand(0));
7167 else
7168 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7169 MVT IVT = VT.changeVectorElementType(MVT::i16);
7170 return DAG.getNode(ISD::BITCAST, DL, VT,
7171 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7172 }
7173
7174 if (EltVT == MVT::i1)
7175 return lowerVectorMaskSplat(Op, DAG);
7176 return SDValue();
7177 }
7179 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7180 case ISD::CONCAT_VECTORS: {
7181 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7182 // better than going through the stack, as the default expansion does.
7183 SDLoc DL(Op);
7184 MVT VT = Op.getSimpleValueType();
7185 MVT ContainerVT = VT;
7186 if (VT.isFixedLengthVector())
7187 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7188
7189 // Recursively split concat_vectors with more than 2 operands:
7190 //
7191 // concat_vector op1, op2, op3, op4
7192 // ->
7193 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7194 //
7195 // This reduces the length of the chain of vslideups and allows us to
7196 // perform the vslideups at a smaller LMUL, limited to MF2.
7197 if (Op.getNumOperands() > 2 &&
7198 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7199 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7201 size_t HalfNumOps = Op.getNumOperands() / 2;
7202 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7203 Op->ops().take_front(HalfNumOps));
7204 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7205 Op->ops().drop_front(HalfNumOps));
7206 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7207 }
7208
7209 unsigned NumOpElts =
7210 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7211 SDValue Vec = DAG.getUNDEF(VT);
7212 for (const auto &OpIdx : enumerate(Op->ops())) {
7213 SDValue SubVec = OpIdx.value();
7214 // Don't insert undef subvectors.
7215 if (SubVec.isUndef())
7216 continue;
7217 Vec =
7218 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7219 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7220 }
7221 return Vec;
7222 }
7223 case ISD::LOAD: {
7224 auto *Load = cast<LoadSDNode>(Op);
7225 EVT VecTy = Load->getMemoryVT();
7226 // Handle normal vector tuple load.
7227 if (VecTy.isRISCVVectorTuple()) {
7228 SDLoc DL(Op);
7229 MVT XLenVT = Subtarget.getXLenVT();
7230 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7231 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7232 unsigned NumElts = Sz / (NF * 8);
7233 int Log2LMUL = Log2_64(NumElts) - 3;
7234
7235 auto Flag = SDNodeFlags();
7236 Flag.setNoUnsignedWrap(true);
7237 SDValue Ret = DAG.getUNDEF(VecTy);
7238 SDValue BasePtr = Load->getBasePtr();
7239 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7240 VROffset =
7241 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7242 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7243 SmallVector<SDValue, 8> OutChains;
7244
7245 // Load NF vector registers and combine them to a vector tuple.
7246 for (unsigned i = 0; i < NF; ++i) {
7247 SDValue LoadVal = DAG.getLoad(
7248 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7249 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7250 OutChains.push_back(LoadVal.getValue(1));
7251 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7252 DAG.getVectorIdxConstant(i, DL));
7253 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7254 }
7255 return DAG.getMergeValues(
7256 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7257 }
7258
7259 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7260 return V;
7261 if (Op.getValueType().isFixedLengthVector())
7262 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7263 return Op;
7264 }
7265 case ISD::STORE: {
7266 auto *Store = cast<StoreSDNode>(Op);
7267 SDValue StoredVal = Store->getValue();
7268 EVT VecTy = StoredVal.getValueType();
7269 // Handle normal vector tuple store.
7270 if (VecTy.isRISCVVectorTuple()) {
7271 SDLoc DL(Op);
7272 MVT XLenVT = Subtarget.getXLenVT();
7273 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7274 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7275 unsigned NumElts = Sz / (NF * 8);
7276 int Log2LMUL = Log2_64(NumElts) - 3;
7277
7278 auto Flag = SDNodeFlags();
7279 Flag.setNoUnsignedWrap(true);
7280 SDValue Ret;
7281 SDValue Chain = Store->getChain();
7282 SDValue BasePtr = Store->getBasePtr();
7283 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7284 VROffset =
7285 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7286 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7287
7288 // Extract subregisters in a vector tuple and store them individually.
7289 for (unsigned i = 0; i < NF; ++i) {
7290 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7291 MVT::getScalableVectorVT(MVT::i8, NumElts),
7292 StoredVal, DAG.getVectorIdxConstant(i, DL));
7293 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7294 MachinePointerInfo(Store->getAddressSpace()),
7295 Store->getOriginalAlign(),
7296 Store->getMemOperand()->getFlags());
7297 Chain = Ret.getValue(0);
7298 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7299 }
7300 return Ret;
7301 }
7302
7303 if (auto V = expandUnalignedRVVStore(Op, DAG))
7304 return V;
7305 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7306 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7307 return Op;
7308 }
7309 case ISD::MLOAD:
7310 case ISD::VP_LOAD:
7311 return lowerMaskedLoad(Op, DAG);
7312 case ISD::MSTORE:
7313 case ISD::VP_STORE:
7314 return lowerMaskedStore(Op, DAG);
7316 return lowerVectorCompress(Op, DAG);
7317 case ISD::SELECT_CC: {
7318 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7319 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7320 // into separate SETCC+SELECT just like LegalizeDAG.
7321 SDValue Tmp1 = Op.getOperand(0);
7322 SDValue Tmp2 = Op.getOperand(1);
7323 SDValue True = Op.getOperand(2);
7324 SDValue False = Op.getOperand(3);
7325 EVT VT = Op.getValueType();
7326 SDValue CC = Op.getOperand(4);
7327 EVT CmpVT = Tmp1.getValueType();
7328 EVT CCVT =
7329 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7330 SDLoc DL(Op);
7331 SDValue Cond =
7332 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7333 return DAG.getSelect(DL, VT, Cond, True, False);
7334 }
7335 case ISD::SETCC: {
7336 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7337 if (OpVT.isScalarInteger()) {
7338 MVT VT = Op.getSimpleValueType();
7339 SDValue LHS = Op.getOperand(0);
7340 SDValue RHS = Op.getOperand(1);
7341 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7342 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7343 "Unexpected CondCode");
7344
7345 SDLoc DL(Op);
7346
7347 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7348 // convert this to the equivalent of (set(u)ge X, C+1) by using
7349 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7350 // in a register.
7351 if (isa<ConstantSDNode>(RHS)) {
7352 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7353 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7354 // If this is an unsigned compare and the constant is -1, incrementing
7355 // the constant would change behavior. The result should be false.
7356 if (CCVal == ISD::SETUGT && Imm == -1)
7357 return DAG.getConstant(0, DL, VT);
7358 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7359 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7360 SDValue SetCC = DAG.getSetCC(
7361 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7362 return DAG.getLogicalNOT(DL, SetCC, VT);
7363 }
7364 }
7365
7366 // Not a constant we could handle, swap the operands and condition code to
7367 // SETLT/SETULT.
7368 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7369 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7370 }
7371
7372 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7373 return SplitVectorOp(Op, DAG);
7374
7375 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7376 }
7377 case ISD::ADD:
7378 case ISD::SUB:
7379 case ISD::MUL:
7380 case ISD::MULHS:
7381 case ISD::MULHU:
7382 case ISD::AND:
7383 case ISD::OR:
7384 case ISD::XOR:
7385 case ISD::SDIV:
7386 case ISD::SREM:
7387 case ISD::UDIV:
7388 case ISD::UREM:
7389 case ISD::BSWAP:
7390 case ISD::CTPOP:
7391 return lowerToScalableOp(Op, DAG);
7392 case ISD::SHL:
7393 case ISD::SRA:
7394 case ISD::SRL:
7395 if (Op.getSimpleValueType().isFixedLengthVector())
7396 return lowerToScalableOp(Op, DAG);
7397 // This can be called for an i32 shift amount that needs to be promoted.
7398 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7399 "Unexpected custom legalisation");
7400 return SDValue();
7401 case ISD::FABS:
7402 case ISD::FNEG:
7403 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7404 return lowerFABSorFNEG(Op, DAG, Subtarget);
7405 [[fallthrough]];
7406 case ISD::FADD:
7407 case ISD::FSUB:
7408 case ISD::FMUL:
7409 case ISD::FDIV:
7410 case ISD::FSQRT:
7411 case ISD::FMA:
7412 case ISD::FMINNUM:
7413 case ISD::FMAXNUM:
7414 if (isPromotedOpNeedingSplit(Op, Subtarget))
7415 return SplitVectorOp(Op, DAG);
7416 [[fallthrough]];
7417 case ISD::AVGFLOORS:
7418 case ISD::AVGFLOORU:
7419 case ISD::AVGCEILS:
7420 case ISD::AVGCEILU:
7421 case ISD::SMIN:
7422 case ISD::SMAX:
7423 case ISD::UMIN:
7424 case ISD::UMAX:
7425 case ISD::UADDSAT:
7426 case ISD::USUBSAT:
7427 case ISD::SADDSAT:
7428 case ISD::SSUBSAT:
7429 return lowerToScalableOp(Op, DAG);
7430 case ISD::ABDS:
7431 case ISD::ABDU: {
7432 SDLoc dl(Op);
7433 EVT VT = Op->getValueType(0);
7434 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7435 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7436 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7437
7438 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7439 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7440 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7441 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7442 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7443 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7444 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7445 }
7446 case ISD::ABS:
7447 case ISD::VP_ABS:
7448 return lowerABS(Op, DAG);
7449 case ISD::CTLZ:
7451 case ISD::CTTZ:
7453 if (Subtarget.hasStdExtZvbb())
7454 return lowerToScalableOp(Op, DAG);
7455 assert(Op.getOpcode() != ISD::CTTZ);
7456 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7457 case ISD::VSELECT:
7458 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7459 case ISD::FCOPYSIGN:
7460 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7461 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7462 if (isPromotedOpNeedingSplit(Op, Subtarget))
7463 return SplitVectorOp(Op, DAG);
7464 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7465 case ISD::STRICT_FADD:
7466 case ISD::STRICT_FSUB:
7467 case ISD::STRICT_FMUL:
7468 case ISD::STRICT_FDIV:
7469 case ISD::STRICT_FSQRT:
7470 case ISD::STRICT_FMA:
7471 if (isPromotedOpNeedingSplit(Op, Subtarget))
7472 return SplitStrictFPVectorOp(Op, DAG);
7473 return lowerToScalableOp(Op, DAG);
7474 case ISD::STRICT_FSETCC:
7476 return lowerVectorStrictFSetcc(Op, DAG);
7477 case ISD::STRICT_FCEIL:
7478 case ISD::STRICT_FRINT:
7479 case ISD::STRICT_FFLOOR:
7480 case ISD::STRICT_FTRUNC:
7482 case ISD::STRICT_FROUND:
7484 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7485 case ISD::MGATHER:
7486 case ISD::VP_GATHER:
7487 return lowerMaskedGather(Op, DAG);
7488 case ISD::MSCATTER:
7489 case ISD::VP_SCATTER:
7490 return lowerMaskedScatter(Op, DAG);
7491 case ISD::GET_ROUNDING:
7492 return lowerGET_ROUNDING(Op, DAG);
7493 case ISD::SET_ROUNDING:
7494 return lowerSET_ROUNDING(Op, DAG);
7495 case ISD::EH_DWARF_CFA:
7496 return lowerEH_DWARF_CFA(Op, DAG);
7497 case ISD::VP_MERGE:
7498 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7499 return lowerVPMergeMask(Op, DAG);
7500 [[fallthrough]];
7501 case ISD::VP_SELECT:
7502 case ISD::VP_ADD:
7503 case ISD::VP_SUB:
7504 case ISD::VP_MUL:
7505 case ISD::VP_SDIV:
7506 case ISD::VP_UDIV:
7507 case ISD::VP_SREM:
7508 case ISD::VP_UREM:
7509 case ISD::VP_UADDSAT:
7510 case ISD::VP_USUBSAT:
7511 case ISD::VP_SADDSAT:
7512 case ISD::VP_SSUBSAT:
7513 case ISD::VP_LRINT:
7514 case ISD::VP_LLRINT:
7515 return lowerVPOp(Op, DAG);
7516 case ISD::VP_AND:
7517 case ISD::VP_OR:
7518 case ISD::VP_XOR:
7519 return lowerLogicVPOp(Op, DAG);
7520 case ISD::VP_FADD:
7521 case ISD::VP_FSUB:
7522 case ISD::VP_FMUL:
7523 case ISD::VP_FDIV:
7524 case ISD::VP_FNEG:
7525 case ISD::VP_FABS:
7526 case ISD::VP_SQRT:
7527 case ISD::VP_FMA:
7528 case ISD::VP_FMINNUM:
7529 case ISD::VP_FMAXNUM:
7530 case ISD::VP_FCOPYSIGN:
7531 if (isPromotedOpNeedingSplit(Op, Subtarget))
7532 return SplitVPOp(Op, DAG);
7533 [[fallthrough]];
7534 case ISD::VP_SRA:
7535 case ISD::VP_SRL:
7536 case ISD::VP_SHL:
7537 return lowerVPOp(Op, DAG);
7538 case ISD::VP_IS_FPCLASS:
7539 return LowerIS_FPCLASS(Op, DAG);
7540 case ISD::VP_SIGN_EXTEND:
7541 case ISD::VP_ZERO_EXTEND:
7542 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7543 return lowerVPExtMaskOp(Op, DAG);
7544 return lowerVPOp(Op, DAG);
7545 case ISD::VP_TRUNCATE:
7546 return lowerVectorTruncLike(Op, DAG);
7547 case ISD::VP_FP_EXTEND:
7548 case ISD::VP_FP_ROUND:
7549 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7550 case ISD::VP_SINT_TO_FP:
7551 case ISD::VP_UINT_TO_FP:
7552 if (Op.getValueType().isVector() &&
7553 ((Op.getValueType().getScalarType() == MVT::f16 &&
7554 (Subtarget.hasVInstructionsF16Minimal() &&
7555 !Subtarget.hasVInstructionsF16())) ||
7556 Op.getValueType().getScalarType() == MVT::bf16)) {
7557 if (isPromotedOpNeedingSplit(Op, Subtarget))
7558 return SplitVectorOp(Op, DAG);
7559 // int -> f32
7560 SDLoc DL(Op);
7561 MVT NVT =
7562 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7563 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7564 // f32 -> [b]f16
7565 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7566 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7567 }
7568 [[fallthrough]];
7569 case ISD::VP_FP_TO_SINT:
7570 case ISD::VP_FP_TO_UINT:
7571 if (SDValue Op1 = Op.getOperand(0);
7572 Op1.getValueType().isVector() &&
7573 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7574 (Subtarget.hasVInstructionsF16Minimal() &&
7575 !Subtarget.hasVInstructionsF16())) ||
7576 Op1.getValueType().getScalarType() == MVT::bf16)) {
7577 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7578 return SplitVectorOp(Op, DAG);
7579 // [b]f16 -> f32
7580 SDLoc DL(Op);
7581 MVT NVT = MVT::getVectorVT(MVT::f32,
7582 Op1.getValueType().getVectorElementCount());
7583 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7584 // f32 -> int
7585 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7586 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7587 }
7588 return lowerVPFPIntConvOp(Op, DAG);
7589 case ISD::VP_SETCC:
7590 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7591 return SplitVPOp(Op, DAG);
7592 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7593 return lowerVPSetCCMaskOp(Op, DAG);
7594 [[fallthrough]];
7595 case ISD::VP_SMIN:
7596 case ISD::VP_SMAX:
7597 case ISD::VP_UMIN:
7598 case ISD::VP_UMAX:
7599 case ISD::VP_BITREVERSE:
7600 case ISD::VP_BSWAP:
7601 return lowerVPOp(Op, DAG);
7602 case ISD::VP_CTLZ:
7603 case ISD::VP_CTLZ_ZERO_UNDEF:
7604 if (Subtarget.hasStdExtZvbb())
7605 return lowerVPOp(Op, DAG);
7606 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7607 case ISD::VP_CTTZ:
7608 case ISD::VP_CTTZ_ZERO_UNDEF:
7609 if (Subtarget.hasStdExtZvbb())
7610 return lowerVPOp(Op, DAG);
7611 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7612 case ISD::VP_CTPOP:
7613 return lowerVPOp(Op, DAG);
7614 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7615 return lowerVPStridedLoad(Op, DAG);
7616 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7617 return lowerVPStridedStore(Op, DAG);
7618 case ISD::VP_FCEIL:
7619 case ISD::VP_FFLOOR:
7620 case ISD::VP_FRINT:
7621 case ISD::VP_FNEARBYINT:
7622 case ISD::VP_FROUND:
7623 case ISD::VP_FROUNDEVEN:
7624 case ISD::VP_FROUNDTOZERO:
7625 if (isPromotedOpNeedingSplit(Op, Subtarget))
7626 return SplitVPOp(Op, DAG);
7627 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7628 case ISD::VP_FMAXIMUM:
7629 case ISD::VP_FMINIMUM:
7630 if (isPromotedOpNeedingSplit(Op, Subtarget))
7631 return SplitVPOp(Op, DAG);
7632 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7633 case ISD::EXPERIMENTAL_VP_SPLICE:
7634 return lowerVPSpliceExperimental(Op, DAG);
7635 case ISD::EXPERIMENTAL_VP_REVERSE:
7636 return lowerVPReverseExperimental(Op, DAG);
7637 case ISD::EXPERIMENTAL_VP_SPLAT:
7638 return lowerVPSplatExperimental(Op, DAG);
7639 case ISD::CLEAR_CACHE: {
7640 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7641 "llvm.clear_cache only needs custom lower on Linux targets");
7642 SDLoc DL(Op);
7643 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7644 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7645 Op.getOperand(2), Flags, DL);
7646 }
7648 return lowerINIT_TRAMPOLINE(Op, DAG);
7650 return lowerADJUST_TRAMPOLINE(Op, DAG);
7651 }
7652}
7653
7654SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7655 SDValue Start, SDValue End,
7656 SDValue Flags, SDLoc DL) const {
7657 MakeLibCallOptions CallOptions;
7658 std::pair<SDValue, SDValue> CallResult =
7659 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7660 {Start, End, Flags}, CallOptions, DL, InChain);
7661
7662 // This function returns void so only the out chain matters.
7663 return CallResult.second;
7664}
7665
7666SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7667 SelectionDAG &DAG) const {
7668 if (!Subtarget.is64Bit())
7669 llvm::report_fatal_error("Trampolines only implemented for RV64");
7670
7671 // Create an MCCodeEmitter to encode instructions.
7673 assert(TLO);
7674 MCContext &MCCtx = TLO->getContext();
7675
7676 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7677 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7678
7679 SDValue Root = Op.getOperand(0);
7680 SDValue Trmp = Op.getOperand(1); // trampoline
7681 SDLoc dl(Op);
7682
7683 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7684
7685 // We store in the trampoline buffer the following instructions and data.
7686 // Offset:
7687 // 0: auipc t2, 0
7688 // 4: ld t0, 24(t2)
7689 // 8: ld t2, 16(t2)
7690 // 12: jalr t0
7691 // 16: <StaticChainOffset>
7692 // 24: <FunctionAddressOffset>
7693 // 32:
7694
7695 constexpr unsigned StaticChainOffset = 16;
7696 constexpr unsigned FunctionAddressOffset = 24;
7697
7699 assert(STI);
7700 auto GetEncoding = [&](const MCInst &MC) {
7703 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7704 uint32_t Encoding = support::endian::read32le(CB.data());
7705 return Encoding;
7706 };
7707
7708 SDValue OutChains[6];
7709
7710 uint32_t Encodings[] = {
7711 // auipc t2, 0
7712 // Loads the current PC into t2.
7713 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7714 // ld t0, 24(t2)
7715 // Loads the function address into t0. Note that we are using offsets
7716 // pc-relative to the first instruction of the trampoline.
7717 GetEncoding(
7718 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7719 FunctionAddressOffset)),
7720 // ld t2, 16(t2)
7721 // Load the value of the static chain.
7722 GetEncoding(
7723 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7724 StaticChainOffset)),
7725 // jalr t0
7726 // Jump to the function.
7727 GetEncoding(MCInstBuilder(RISCV::JALR)
7728 .addReg(RISCV::X0)
7729 .addReg(RISCV::X5)
7730 .addImm(0))};
7731
7732 // Store encoded instructions.
7733 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7734 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7735 DAG.getConstant(Idx * 4, dl, MVT::i64))
7736 : Trmp;
7737 OutChains[Idx] = DAG.getTruncStore(
7738 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7739 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7740 }
7741
7742 // Now store the variable part of the trampoline.
7743 SDValue FunctionAddress = Op.getOperand(2);
7744 SDValue StaticChain = Op.getOperand(3);
7745
7746 // Store the given static chain and function pointer in the trampoline buffer.
7747 struct OffsetValuePair {
7748 const unsigned Offset;
7749 const SDValue Value;
7750 SDValue Addr = SDValue(); // Used to cache the address.
7751 } OffsetValues[] = {
7752 {StaticChainOffset, StaticChain},
7753 {FunctionAddressOffset, FunctionAddress},
7754 };
7755 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7756 SDValue Addr =
7757 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7758 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7759 OffsetValue.Addr = Addr;
7760 OutChains[Idx + 4] =
7761 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7762 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7763 }
7764
7765 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7766
7767 // The end of instructions of trampoline is the same as the static chain
7768 // address that we computed earlier.
7769 SDValue EndOfTrmp = OffsetValues[0].Addr;
7770
7771 // Call clear cache on the trampoline instructions.
7772 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7773 Trmp, EndOfTrmp);
7774
7775 return Chain;
7776}
7777
7778SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7779 SelectionDAG &DAG) const {
7780 if (!Subtarget.is64Bit())
7781 llvm::report_fatal_error("Trampolines only implemented for RV64");
7782
7783 return Op.getOperand(0);
7784}
7785
7787 SelectionDAG &DAG, unsigned Flags) {
7788 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7789}
7790
7792 SelectionDAG &DAG, unsigned Flags) {
7793 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7794 Flags);
7795}
7796
7798 SelectionDAG &DAG, unsigned Flags) {
7799 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7800 N->getOffset(), Flags);
7801}
7802
7804 SelectionDAG &DAG, unsigned Flags) {
7805 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7806}
7807
7809 EVT Ty, SelectionDAG &DAG) {
7811 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7812 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7813 return DAG.getLoad(
7814 Ty, DL, DAG.getEntryNode(), LC,
7816}
7817
7819 EVT Ty, SelectionDAG &DAG) {
7821 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7822 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7823 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7824 return DAG.getLoad(
7825 Ty, DL, DAG.getEntryNode(), LC,
7827}
7828
7829template <class NodeTy>
7830SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7831 bool IsLocal, bool IsExternWeak) const {
7832 SDLoc DL(N);
7833 EVT Ty = getPointerTy(DAG.getDataLayout());
7834
7835 // When HWASAN is used and tagging of global variables is enabled
7836 // they should be accessed via the GOT, since the tagged address of a global
7837 // is incompatible with existing code models. This also applies to non-pic
7838 // mode.
7839 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7840 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7841 if (IsLocal && !Subtarget.allowTaggedGlobals())
7842 // Use PC-relative addressing to access the symbol. This generates the
7843 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7844 // %pcrel_lo(auipc)).
7845 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7846
7847 // Use PC-relative addressing to access the GOT for this symbol, then load
7848 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7849 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7850 SDValue Load =
7851 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7857 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7858 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7859 return Load;
7860 }
7861
7862 switch (getTargetMachine().getCodeModel()) {
7863 default:
7864 report_fatal_error("Unsupported code model for lowering");
7865 case CodeModel::Small: {
7866 // Generate a sequence for accessing addresses within the first 2 GiB of
7867 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7868 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7869 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7870 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7871 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7872 }
7873 case CodeModel::Medium: {
7874 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7875 if (IsExternWeak) {
7876 // An extern weak symbol may be undefined, i.e. have value 0, which may
7877 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7878 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7879 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7880 SDValue Load =
7881 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7887 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7888 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7889 return Load;
7890 }
7891
7892 // Generate a sequence for accessing addresses within any 2GiB range within
7893 // the address space. This generates the pattern (PseudoLLA sym), which
7894 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7895 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7896 }
7897 case CodeModel::Large: {
7898 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
7899 return getLargeGlobalAddress(G, DL, Ty, DAG);
7900
7901 // Using pc-relative mode for other node type.
7902 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7903 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7904 }
7905 }
7906}
7907
7908SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7909 SelectionDAG &DAG) const {
7910 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7911 assert(N->getOffset() == 0 && "unexpected offset in global node");
7912 const GlobalValue *GV = N->getGlobal();
7913 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7914}
7915
7916SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7917 SelectionDAG &DAG) const {
7918 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7919
7920 return getAddr(N, DAG);
7921}
7922
7923SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7924 SelectionDAG &DAG) const {
7925 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7926
7927 return getAddr(N, DAG);
7928}
7929
7930SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7931 SelectionDAG &DAG) const {
7932 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7933
7934 return getAddr(N, DAG);
7935}
7936
7937SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7938 SelectionDAG &DAG,
7939 bool UseGOT) const {
7940 SDLoc DL(N);
7941 EVT Ty = getPointerTy(DAG.getDataLayout());
7942 const GlobalValue *GV = N->getGlobal();
7943 MVT XLenVT = Subtarget.getXLenVT();
7944
7945 if (UseGOT) {
7946 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7947 // load the address from the GOT and add the thread pointer. This generates
7948 // the pattern (PseudoLA_TLS_IE sym), which expands to
7949 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7950 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7951 SDValue Load =
7952 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7958 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7959 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7960
7961 // Add the thread pointer.
7962 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7963 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7964 }
7965
7966 // Generate a sequence for accessing the address relative to the thread
7967 // pointer, with the appropriate adjustment for the thread pointer offset.
7968 // This generates the pattern
7969 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7970 SDValue AddrHi =
7972 SDValue AddrAdd =
7974 SDValue AddrLo =
7976
7977 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7978 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7979 SDValue MNAdd =
7980 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7981 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7982}
7983
7984SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7985 SelectionDAG &DAG) const {
7986 SDLoc DL(N);
7987 EVT Ty = getPointerTy(DAG.getDataLayout());
7988 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7989 const GlobalValue *GV = N->getGlobal();
7990
7991 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7992 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7993 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7994 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7995 SDValue Load =
7996 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7997
7998 // Prepare argument list to generate call.
8000 ArgListEntry Entry;
8001 Entry.Node = Load;
8002 Entry.Ty = CallTy;
8003 Args.push_back(Entry);
8004
8005 // Setup call to __tls_get_addr.
8007 CLI.setDebugLoc(DL)
8008 .setChain(DAG.getEntryNode())
8009 .setLibCallee(CallingConv::C, CallTy,
8010 DAG.getExternalSymbol("__tls_get_addr", Ty),
8011 std::move(Args));
8012
8013 return LowerCallTo(CLI).first;
8014}
8015
8016SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8017 SelectionDAG &DAG) const {
8018 SDLoc DL(N);
8019 EVT Ty = getPointerTy(DAG.getDataLayout());
8020 const GlobalValue *GV = N->getGlobal();
8021
8022 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8023 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8024 //
8025 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8026 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8027 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8028 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8029 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8030 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8031}
8032
8033SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8034 SelectionDAG &DAG) const {
8035 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8036 assert(N->getOffset() == 0 && "unexpected offset in global node");
8037
8038 if (DAG.getTarget().useEmulatedTLS())
8039 return LowerToTLSEmulatedModel(N, DAG);
8040
8042
8045 report_fatal_error("In GHC calling convention TLS is not supported");
8046
8047 SDValue Addr;
8048 switch (Model) {
8050 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8051 break;
8053 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8054 break;
8057 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8058 : getDynamicTLSAddr(N, DAG);
8059 break;
8060 }
8061
8062 return Addr;
8063}
8064
8065// Return true if Val is equal to (setcc LHS, RHS, CC).
8066// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8067// Otherwise, return std::nullopt.
8068static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8069 ISD::CondCode CC, SDValue Val) {
8070 assert(Val->getOpcode() == ISD::SETCC);
8071 SDValue LHS2 = Val.getOperand(0);
8072 SDValue RHS2 = Val.getOperand(1);
8073 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8074
8075 if (LHS == LHS2 && RHS == RHS2) {
8076 if (CC == CC2)
8077 return true;
8078 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8079 return false;
8080 } else if (LHS == RHS2 && RHS == LHS2) {
8082 if (CC == CC2)
8083 return true;
8084 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8085 return false;
8086 }
8087
8088 return std::nullopt;
8089}
8090
8092 const RISCVSubtarget &Subtarget) {
8093 SDValue CondV = N->getOperand(0);
8094 SDValue TrueV = N->getOperand(1);
8095 SDValue FalseV = N->getOperand(2);
8096 MVT VT = N->getSimpleValueType(0);
8097 SDLoc DL(N);
8098
8099 if (!Subtarget.hasConditionalMoveFusion()) {
8100 // (select c, -1, y) -> -c | y
8101 if (isAllOnesConstant(TrueV)) {
8102 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8103 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8104 }
8105 // (select c, y, -1) -> (c-1) | y
8106 if (isAllOnesConstant(FalseV)) {
8107 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8108 DAG.getAllOnesConstant(DL, VT));
8109 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8110 }
8111
8112 // (select c, 0, y) -> (c-1) & y
8113 if (isNullConstant(TrueV)) {
8114 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8115 DAG.getAllOnesConstant(DL, VT));
8116 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8117 }
8118 // (select c, y, 0) -> -c & y
8119 if (isNullConstant(FalseV)) {
8120 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8121 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8122 }
8123 }
8124
8125 // select c, ~x, x --> xor -c, x
8126 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8127 const APInt &TrueVal = TrueV->getAsAPIntVal();
8128 const APInt &FalseVal = FalseV->getAsAPIntVal();
8129 if (~TrueVal == FalseVal) {
8130 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8131 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8132 }
8133 }
8134
8135 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8136 // when both truev and falsev are also setcc.
8137 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8138 FalseV.getOpcode() == ISD::SETCC) {
8139 SDValue LHS = CondV.getOperand(0);
8140 SDValue RHS = CondV.getOperand(1);
8141 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8142
8143 // (select x, x, y) -> x | y
8144 // (select !x, x, y) -> x & y
8145 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8146 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8147 DAG.getFreeze(FalseV));
8148 }
8149 // (select x, y, x) -> x & y
8150 // (select !x, y, x) -> x | y
8151 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8152 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8153 DAG.getFreeze(TrueV), FalseV);
8154 }
8155 }
8156
8157 return SDValue();
8158}
8159
8160// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8161// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8162// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8163// being `0` or `-1`. In such cases we can replace `select` with `and`.
8164// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8165// than `c0`?
8166static SDValue
8168 const RISCVSubtarget &Subtarget) {
8169 if (Subtarget.hasShortForwardBranchOpt())
8170 return SDValue();
8171
8172 unsigned SelOpNo = 0;
8173 SDValue Sel = BO->getOperand(0);
8174 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8175 SelOpNo = 1;
8176 Sel = BO->getOperand(1);
8177 }
8178
8179 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8180 return SDValue();
8181
8182 unsigned ConstSelOpNo = 1;
8183 unsigned OtherSelOpNo = 2;
8184 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8185 ConstSelOpNo = 2;
8186 OtherSelOpNo = 1;
8187 }
8188 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8189 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8190 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8191 return SDValue();
8192
8193 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8194 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8195 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8196 return SDValue();
8197
8198 SDLoc DL(Sel);
8199 EVT VT = BO->getValueType(0);
8200
8201 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8202 if (SelOpNo == 1)
8203 std::swap(NewConstOps[0], NewConstOps[1]);
8204
8205 SDValue NewConstOp =
8206 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8207 if (!NewConstOp)
8208 return SDValue();
8209
8210 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8211 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8212 return SDValue();
8213
8214 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8215 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8216 if (SelOpNo == 1)
8217 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8218 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8219
8220 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8221 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8222 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8223}
8224
8225SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8226 SDValue CondV = Op.getOperand(0);
8227 SDValue TrueV = Op.getOperand(1);
8228 SDValue FalseV = Op.getOperand(2);
8229 SDLoc DL(Op);
8230 MVT VT = Op.getSimpleValueType();
8231 MVT XLenVT = Subtarget.getXLenVT();
8232
8233 // Lower vector SELECTs to VSELECTs by splatting the condition.
8234 if (VT.isVector()) {
8235 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8236 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8237 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8238 }
8239
8240 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8241 // nodes to implement the SELECT. Performing the lowering here allows for
8242 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8243 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8244 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8245 VT.isScalarInteger()) {
8246 // (select c, t, 0) -> (czero_eqz t, c)
8247 if (isNullConstant(FalseV))
8248 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8249 // (select c, 0, f) -> (czero_nez f, c)
8250 if (isNullConstant(TrueV))
8251 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8252
8253 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8254 if (TrueV.getOpcode() == ISD::AND &&
8255 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8256 return DAG.getNode(
8257 ISD::OR, DL, VT, TrueV,
8258 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8259 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8260 if (FalseV.getOpcode() == ISD::AND &&
8261 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8262 return DAG.getNode(
8263 ISD::OR, DL, VT, FalseV,
8264 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8265
8266 // Try some other optimizations before falling back to generic lowering.
8267 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8268 return V;
8269
8270 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8271 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8272 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8273 const APInt &TrueVal = TrueV->getAsAPIntVal();
8274 const APInt &FalseVal = FalseV->getAsAPIntVal();
8275 const int TrueValCost = RISCVMatInt::getIntMatCost(
8276 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8277 const int FalseValCost = RISCVMatInt::getIntMatCost(
8278 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8279 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8280 SDValue LHSVal = DAG.getConstant(
8281 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8282 SDValue RHSVal =
8283 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8284 SDValue CMOV =
8286 DL, VT, LHSVal, CondV);
8287 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8288 }
8289
8290 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8291 // Unless we have the short forward branch optimization.
8292 if (!Subtarget.hasConditionalMoveFusion())
8293 return DAG.getNode(
8294 ISD::OR, DL, VT,
8295 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8296 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8297 }
8298
8299 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8300 return V;
8301
8302 if (Op.hasOneUse()) {
8303 unsigned UseOpc = Op->user_begin()->getOpcode();
8304 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8305 SDNode *BinOp = *Op->user_begin();
8306 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8307 DAG, Subtarget)) {
8308 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8309 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8310 // may return a constant node and cause crash in lowerSELECT.
8311 if (NewSel.getOpcode() == ISD::SELECT)
8312 return lowerSELECT(NewSel, DAG);
8313 return NewSel;
8314 }
8315 }
8316 }
8317
8318 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8319 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8320 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8321 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8322 if (FPTV && FPFV) {
8323 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8324 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8325 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8326 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8327 DAG.getConstant(1, DL, XLenVT));
8328 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8329 }
8330 }
8331
8332 // If the condition is not an integer SETCC which operates on XLenVT, we need
8333 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8334 // (select condv, truev, falsev)
8335 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8336 if (CondV.getOpcode() != ISD::SETCC ||
8337 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8338 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8339 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8340
8341 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8342
8343 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8344 }
8345
8346 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8347 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8348 // advantage of the integer compare+branch instructions. i.e.:
8349 // (select (setcc lhs, rhs, cc), truev, falsev)
8350 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8351 SDValue LHS = CondV.getOperand(0);
8352 SDValue RHS = CondV.getOperand(1);
8353 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8354
8355 // Special case for a select of 2 constants that have a diffence of 1.
8356 // Normally this is done by DAGCombine, but if the select is introduced by
8357 // type legalization or op legalization, we miss it. Restricting to SETLT
8358 // case for now because that is what signed saturating add/sub need.
8359 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8360 // but we would probably want to swap the true/false values if the condition
8361 // is SETGE/SETLE to avoid an XORI.
8362 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8363 CCVal == ISD::SETLT) {
8364 const APInt &TrueVal = TrueV->getAsAPIntVal();
8365 const APInt &FalseVal = FalseV->getAsAPIntVal();
8366 if (TrueVal - 1 == FalseVal)
8367 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8368 if (TrueVal + 1 == FalseVal)
8369 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8370 }
8371
8372 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8373 // 1 < x ? x : 1 -> 0 < x ? x : 1
8374 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8375 RHS == TrueV && LHS == FalseV) {
8376 LHS = DAG.getConstant(0, DL, VT);
8377 // 0 <u x is the same as x != 0.
8378 if (CCVal == ISD::SETULT) {
8379 std::swap(LHS, RHS);
8380 CCVal = ISD::SETNE;
8381 }
8382 }
8383
8384 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8385 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8386 RHS == FalseV) {
8387 RHS = DAG.getConstant(0, DL, VT);
8388 }
8389
8390 SDValue TargetCC = DAG.getCondCode(CCVal);
8391
8392 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8393 // (select (setcc lhs, rhs, CC), constant, falsev)
8394 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8395 std::swap(TrueV, FalseV);
8396 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8397 }
8398
8399 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8400 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8401}
8402
8403SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8404 SDValue CondV = Op.getOperand(1);
8405 SDLoc DL(Op);
8406 MVT XLenVT = Subtarget.getXLenVT();
8407
8408 if (CondV.getOpcode() == ISD::SETCC &&
8409 CondV.getOperand(0).getValueType() == XLenVT) {
8410 SDValue LHS = CondV.getOperand(0);
8411 SDValue RHS = CondV.getOperand(1);
8412 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8413
8414 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8415
8416 SDValue TargetCC = DAG.getCondCode(CCVal);
8417 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8418 LHS, RHS, TargetCC, Op.getOperand(2));
8419 }
8420
8421 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8422 CondV, DAG.getConstant(0, DL, XLenVT),
8423 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8424}
8425
8426SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8429
8430 SDLoc DL(Op);
8431 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8433
8434 // vastart just stores the address of the VarArgsFrameIndex slot into the
8435 // memory location argument.
8436 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8437 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8438 MachinePointerInfo(SV));
8439}
8440
8441SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8442 SelectionDAG &DAG) const {
8443 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8445 MachineFrameInfo &MFI = MF.getFrameInfo();
8446 MFI.setFrameAddressIsTaken(true);
8447 Register FrameReg = RI.getFrameRegister(MF);
8448 int XLenInBytes = Subtarget.getXLen() / 8;
8449
8450 EVT VT = Op.getValueType();
8451 SDLoc DL(Op);
8452 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8453 unsigned Depth = Op.getConstantOperandVal(0);
8454 while (Depth--) {
8455 int Offset = -(XLenInBytes * 2);
8456 SDValue Ptr = DAG.getNode(
8457 ISD::ADD, DL, VT, FrameAddr,
8459 FrameAddr =
8460 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8461 }
8462 return FrameAddr;
8463}
8464
8465SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8466 SelectionDAG &DAG) const {
8467 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8469 MachineFrameInfo &MFI = MF.getFrameInfo();
8470 MFI.setReturnAddressIsTaken(true);
8471 MVT XLenVT = Subtarget.getXLenVT();
8472 int XLenInBytes = Subtarget.getXLen() / 8;
8473
8475 return SDValue();
8476
8477 EVT VT = Op.getValueType();
8478 SDLoc DL(Op);
8479 unsigned Depth = Op.getConstantOperandVal(0);
8480 if (Depth) {
8481 int Off = -XLenInBytes;
8482 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8483 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8484 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8485 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8487 }
8488
8489 // Return the value of the return address register, marking it an implicit
8490 // live-in.
8491 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8492 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8493}
8494
8495SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8496 SelectionDAG &DAG) const {
8497 SDLoc DL(Op);
8498 SDValue Lo = Op.getOperand(0);
8499 SDValue Hi = Op.getOperand(1);
8500 SDValue Shamt = Op.getOperand(2);
8501 EVT VT = Lo.getValueType();
8502
8503 // if Shamt-XLEN < 0: // Shamt < XLEN
8504 // Lo = Lo << Shamt
8505 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8506 // else:
8507 // Lo = 0
8508 // Hi = Lo << (Shamt-XLEN)
8509
8510 SDValue Zero = DAG.getConstant(0, DL, VT);
8511 SDValue One = DAG.getConstant(1, DL, VT);
8512 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8513 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8514 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8515 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8516
8517 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8518 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8519 SDValue ShiftRightLo =
8520 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8521 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8522 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8523 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8524
8525 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8526
8527 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8528 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8529
8530 SDValue Parts[2] = {Lo, Hi};
8531 return DAG.getMergeValues(Parts, DL);
8532}
8533
8534SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8535 bool IsSRA) const {
8536 SDLoc DL(Op);
8537 SDValue Lo = Op.getOperand(0);
8538 SDValue Hi = Op.getOperand(1);
8539 SDValue Shamt = Op.getOperand(2);
8540 EVT VT = Lo.getValueType();
8541
8542 // SRA expansion:
8543 // if Shamt-XLEN < 0: // Shamt < XLEN
8544 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8545 // Hi = Hi >>s Shamt
8546 // else:
8547 // Lo = Hi >>s (Shamt-XLEN);
8548 // Hi = Hi >>s (XLEN-1)
8549 //
8550 // SRL expansion:
8551 // if Shamt-XLEN < 0: // Shamt < XLEN
8552 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8553 // Hi = Hi >>u Shamt
8554 // else:
8555 // Lo = Hi >>u (Shamt-XLEN);
8556 // Hi = 0;
8557
8558 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8559
8560 SDValue Zero = DAG.getConstant(0, DL, VT);
8561 SDValue One = DAG.getConstant(1, DL, VT);
8562 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8563 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8564 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8565 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8566
8567 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8568 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8569 SDValue ShiftLeftHi =
8570 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8571 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8572 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8573 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8574 SDValue HiFalse =
8575 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8576
8577 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8578
8579 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8580 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8581
8582 SDValue Parts[2] = {Lo, Hi};
8583 return DAG.getMergeValues(Parts, DL);
8584}
8585
8586// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8587// legal equivalently-sized i8 type, so we can use that as a go-between.
8588SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8589 SelectionDAG &DAG) const {
8590 SDLoc DL(Op);
8591 MVT VT = Op.getSimpleValueType();
8592 SDValue SplatVal = Op.getOperand(0);
8593 // All-zeros or all-ones splats are handled specially.
8594 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8595 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8596 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8597 }
8598 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8599 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8600 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8601 }
8602 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8603 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8604 DAG.getConstant(1, DL, SplatVal.getValueType()));
8605 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8606 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8607 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8608}
8609
8610// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8611// illegal (currently only vXi64 RV32).
8612// FIXME: We could also catch non-constant sign-extended i32 values and lower
8613// them to VMV_V_X_VL.
8614SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8615 SelectionDAG &DAG) const {
8616 SDLoc DL(Op);
8617 MVT VecVT = Op.getSimpleValueType();
8618 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8619 "Unexpected SPLAT_VECTOR_PARTS lowering");
8620
8621 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8622 SDValue Lo = Op.getOperand(0);
8623 SDValue Hi = Op.getOperand(1);
8624
8625 MVT ContainerVT = VecVT;
8626 if (VecVT.isFixedLengthVector())
8627 ContainerVT = getContainerForFixedLengthVector(VecVT);
8628
8629 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8630
8631 SDValue Res =
8632 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8633
8634 if (VecVT.isFixedLengthVector())
8635 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8636
8637 return Res;
8638}
8639
8640// Custom-lower extensions from mask vectors by using a vselect either with 1
8641// for zero/any-extension or -1 for sign-extension:
8642// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8643// Note that any-extension is lowered identically to zero-extension.
8644SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8645 int64_t ExtTrueVal) const {
8646 SDLoc DL(Op);
8647 MVT VecVT = Op.getSimpleValueType();
8648 SDValue Src = Op.getOperand(0);
8649 // Only custom-lower extensions from mask types
8650 assert(Src.getValueType().isVector() &&
8651 Src.getValueType().getVectorElementType() == MVT::i1);
8652
8653 if (VecVT.isScalableVector()) {
8654 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8655 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8656 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8657 }
8658
8659 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8660 MVT I1ContainerVT =
8661 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8662
8663 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8664
8665 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8666
8667 MVT XLenVT = Subtarget.getXLenVT();
8668 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8669 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8670
8671 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8672 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8673 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8674 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8675 SDValue Select =
8676 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8677 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8678
8679 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8680}
8681
8682SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8683 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8684 MVT ExtVT = Op.getSimpleValueType();
8685 // Only custom-lower extensions from fixed-length vector types.
8686 if (!ExtVT.isFixedLengthVector())
8687 return Op;
8688 MVT VT = Op.getOperand(0).getSimpleValueType();
8689 // Grab the canonical container type for the extended type. Infer the smaller
8690 // type from that to ensure the same number of vector elements, as we know
8691 // the LMUL will be sufficient to hold the smaller type.
8692 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8693 // Get the extended container type manually to ensure the same number of
8694 // vector elements between source and dest.
8695 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8696 ContainerExtVT.getVectorElementCount());
8697
8698 SDValue Op1 =
8699 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8700
8701 SDLoc DL(Op);
8702 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8703
8704 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8705
8706 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8707}
8708
8709// Custom-lower truncations from vectors to mask vectors by using a mask and a
8710// setcc operation:
8711// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8712SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8713 SelectionDAG &DAG) const {
8714 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8715 SDLoc DL(Op);
8716 EVT MaskVT = Op.getValueType();
8717 // Only expect to custom-lower truncations to mask types
8718 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8719 "Unexpected type for vector mask lowering");
8720 SDValue Src = Op.getOperand(0);
8721 MVT VecVT = Src.getSimpleValueType();
8722 SDValue Mask, VL;
8723 if (IsVPTrunc) {
8724 Mask = Op.getOperand(1);
8725 VL = Op.getOperand(2);
8726 }
8727 // If this is a fixed vector, we need to convert it to a scalable vector.
8728 MVT ContainerVT = VecVT;
8729
8730 if (VecVT.isFixedLengthVector()) {
8731 ContainerVT = getContainerForFixedLengthVector(VecVT);
8732 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8733 if (IsVPTrunc) {
8734 MVT MaskContainerVT =
8735 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8736 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8737 }
8738 }
8739
8740 if (!IsVPTrunc) {
8741 std::tie(Mask, VL) =
8742 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8743 }
8744
8745 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8746 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8747
8748 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8749 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8750 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8751 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8752
8753 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8754 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8755 DAG.getUNDEF(ContainerVT), Mask, VL);
8756 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8757 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8758 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8759 if (MaskVT.isFixedLengthVector())
8760 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8761 return Trunc;
8762}
8763
8764SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8765 SelectionDAG &DAG) const {
8766 unsigned Opc = Op.getOpcode();
8767 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8768 SDLoc DL(Op);
8769
8770 MVT VT = Op.getSimpleValueType();
8771 // Only custom-lower vector truncates
8772 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8773
8774 // Truncates to mask types are handled differently
8775 if (VT.getVectorElementType() == MVT::i1)
8776 return lowerVectorMaskTruncLike(Op, DAG);
8777
8778 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8779 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8780 // truncate by one power of two at a time.
8781 MVT DstEltVT = VT.getVectorElementType();
8782
8783 SDValue Src = Op.getOperand(0);
8784 MVT SrcVT = Src.getSimpleValueType();
8785 MVT SrcEltVT = SrcVT.getVectorElementType();
8786
8787 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8788 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8789 "Unexpected vector truncate lowering");
8790
8791 MVT ContainerVT = SrcVT;
8792 SDValue Mask, VL;
8793 if (IsVPTrunc) {
8794 Mask = Op.getOperand(1);
8795 VL = Op.getOperand(2);
8796 }
8797 if (SrcVT.isFixedLengthVector()) {
8798 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8799 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8800 if (IsVPTrunc) {
8801 MVT MaskVT = getMaskTypeFor(ContainerVT);
8802 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8803 }
8804 }
8805
8806 SDValue Result = Src;
8807 if (!IsVPTrunc) {
8808 std::tie(Mask, VL) =
8809 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8810 }
8811
8812 unsigned NewOpc;
8813 if (Opc == ISD::TRUNCATE_SSAT_S)
8815 else if (Opc == ISD::TRUNCATE_USAT_U)
8817 else
8819
8820 do {
8821 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8822 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8823 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8824 } while (SrcEltVT != DstEltVT);
8825
8826 if (SrcVT.isFixedLengthVector())
8827 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8828
8829 return Result;
8830}
8831
8832SDValue
8833RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8834 SelectionDAG &DAG) const {
8835 SDLoc DL(Op);
8836 SDValue Chain = Op.getOperand(0);
8837 SDValue Src = Op.getOperand(1);
8838 MVT VT = Op.getSimpleValueType();
8839 MVT SrcVT = Src.getSimpleValueType();
8840 MVT ContainerVT = VT;
8841 if (VT.isFixedLengthVector()) {
8842 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8843 ContainerVT =
8844 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8845 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8846 }
8847
8848 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8849
8850 // RVV can only widen/truncate fp to types double/half the size as the source.
8851 if ((VT.getVectorElementType() == MVT::f64 &&
8852 (SrcVT.getVectorElementType() == MVT::f16 ||
8853 SrcVT.getVectorElementType() == MVT::bf16)) ||
8854 ((VT.getVectorElementType() == MVT::f16 ||
8855 VT.getVectorElementType() == MVT::bf16) &&
8856 SrcVT.getVectorElementType() == MVT::f64)) {
8857 // For double rounding, the intermediate rounding should be round-to-odd.
8858 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8861 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8862 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8863 Chain, Src, Mask, VL);
8864 Chain = Src.getValue(1);
8865 }
8866
8867 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8870 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8871 Chain, Src, Mask, VL);
8872 if (VT.isFixedLengthVector()) {
8873 // StrictFP operations have two result values. Their lowered result should
8874 // have same result count.
8875 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8876 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8877 }
8878 return Res;
8879}
8880
8881SDValue
8882RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8883 SelectionDAG &DAG) const {
8884 bool IsVP =
8885 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8886 bool IsExtend =
8887 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8888 // RVV can only do truncate fp to types half the size as the source. We
8889 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8890 // conversion instruction.
8891 SDLoc DL(Op);
8892 MVT VT = Op.getSimpleValueType();
8893
8894 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8895
8896 SDValue Src = Op.getOperand(0);
8897 MVT SrcVT = Src.getSimpleValueType();
8898
8899 bool IsDirectExtend =
8900 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8901 (SrcVT.getVectorElementType() != MVT::f16 &&
8902 SrcVT.getVectorElementType() != MVT::bf16));
8903 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8904 VT.getVectorElementType() != MVT::bf16) ||
8905 SrcVT.getVectorElementType() != MVT::f64);
8906
8907 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8908
8909 // Prepare any fixed-length vector operands.
8910 MVT ContainerVT = VT;
8911 SDValue Mask, VL;
8912 if (IsVP) {
8913 Mask = Op.getOperand(1);
8914 VL = Op.getOperand(2);
8915 }
8916 if (VT.isFixedLengthVector()) {
8917 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8918 ContainerVT =
8919 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8920 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8921 if (IsVP) {
8922 MVT MaskVT = getMaskTypeFor(ContainerVT);
8923 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8924 }
8925 }
8926
8927 if (!IsVP)
8928 std::tie(Mask, VL) =
8929 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8930
8931 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8932
8933 if (IsDirectConv) {
8934 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8935 if (VT.isFixedLengthVector())
8936 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8937 return Src;
8938 }
8939
8940 unsigned InterConvOpc =
8942
8943 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8944 SDValue IntermediateConv =
8945 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8946 SDValue Result =
8947 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8948 if (VT.isFixedLengthVector())
8949 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8950 return Result;
8951}
8952
8953// Given a scalable vector type and an index into it, returns the type for the
8954// smallest subvector that the index fits in. This can be used to reduce LMUL
8955// for operations like vslidedown.
8956//
8957// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8958static std::optional<MVT>
8959getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8960 const RISCVSubtarget &Subtarget) {
8961 assert(VecVT.isScalableVector());
8962 const unsigned EltSize = VecVT.getScalarSizeInBits();
8963 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8964 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8965 MVT SmallerVT;
8966 if (MaxIdx < MinVLMAX)
8967 SmallerVT = getLMUL1VT(VecVT);
8968 else if (MaxIdx < MinVLMAX * 2)
8969 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8970 else if (MaxIdx < MinVLMAX * 4)
8971 SmallerVT = getLMUL1VT(VecVT)
8974 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8975 return std::nullopt;
8976 return SmallerVT;
8977}
8978
8979// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8980// first position of a vector, and that vector is slid up to the insert index.
8981// By limiting the active vector length to index+1 and merging with the
8982// original vector (with an undisturbed tail policy for elements >= VL), we
8983// achieve the desired result of leaving all elements untouched except the one
8984// at VL-1, which is replaced with the desired value.
8985SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8986 SelectionDAG &DAG) const {
8987 SDLoc DL(Op);
8988 MVT VecVT = Op.getSimpleValueType();
8989 MVT XLenVT = Subtarget.getXLenVT();
8990 SDValue Vec = Op.getOperand(0);
8991 SDValue Val = Op.getOperand(1);
8992 MVT ValVT = Val.getSimpleValueType();
8993 SDValue Idx = Op.getOperand(2);
8994
8995 if (VecVT.getVectorElementType() == MVT::i1) {
8996 // FIXME: For now we just promote to an i8 vector and insert into that,
8997 // but this is probably not optimal.
8998 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8999 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9000 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9001 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9002 }
9003
9004 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9005 ValVT == MVT::bf16) {
9006 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9007 MVT IntVT = VecVT.changeTypeToInteger();
9008 SDValue IntInsert = DAG.getNode(
9009 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9010 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9011 return DAG.getBitcast(VecVT, IntInsert);
9012 }
9013
9014 MVT ContainerVT = VecVT;
9015 // If the operand is a fixed-length vector, convert to a scalable one.
9016 if (VecVT.isFixedLengthVector()) {
9017 ContainerVT = getContainerForFixedLengthVector(VecVT);
9018 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9019 }
9020
9021 // If we know the index we're going to insert at, we can shrink Vec so that
9022 // we're performing the scalar inserts and slideup on a smaller LMUL.
9023 MVT OrigContainerVT = ContainerVT;
9024 SDValue OrigVec = Vec;
9025 SDValue AlignedIdx;
9026 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9027 const unsigned OrigIdx = IdxC->getZExtValue();
9028 // Do we know an upper bound on LMUL?
9029 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9030 DL, DAG, Subtarget)) {
9031 ContainerVT = *ShrunkVT;
9032 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9033 }
9034
9035 // If we're compiling for an exact VLEN value, we can always perform
9036 // the insert in m1 as we can determine the register corresponding to
9037 // the index in the register group.
9038 const MVT M1VT = getLMUL1VT(ContainerVT);
9039 if (auto VLEN = Subtarget.getRealVLen();
9040 VLEN && ContainerVT.bitsGT(M1VT)) {
9041 EVT ElemVT = VecVT.getVectorElementType();
9042 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9043 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9044 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9045 unsigned ExtractIdx =
9046 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9047 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9048 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9049 ContainerVT = M1VT;
9050 }
9051
9052 if (AlignedIdx)
9053 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9054 AlignedIdx);
9055 }
9056
9057 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9058 // Even i64-element vectors on RV32 can be lowered without scalar
9059 // legalization if the most-significant 32 bits of the value are not affected
9060 // by the sign-extension of the lower 32 bits.
9061 // TODO: We could also catch sign extensions of a 32-bit value.
9062 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9063 const auto *CVal = cast<ConstantSDNode>(Val);
9064 if (isInt<32>(CVal->getSExtValue())) {
9065 IsLegalInsert = true;
9066 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9067 }
9068 }
9069
9070 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9071
9072 SDValue ValInVec;
9073
9074 if (IsLegalInsert) {
9075 unsigned Opc =
9077 if (isNullConstant(Idx)) {
9078 if (!VecVT.isFloatingPoint())
9079 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9080 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9081
9082 if (AlignedIdx)
9083 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9084 Vec, AlignedIdx);
9085 if (!VecVT.isFixedLengthVector())
9086 return Vec;
9087 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9088 }
9089 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9090 } else {
9091 // On RV32, i64-element vectors must be specially handled to place the
9092 // value at element 0, by using two vslide1down instructions in sequence on
9093 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9094 // this.
9095 SDValue ValLo, ValHi;
9096 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9097 MVT I32ContainerVT =
9098 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9099 SDValue I32Mask =
9100 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9101 // Limit the active VL to two.
9102 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9103 // If the Idx is 0 we can insert directly into the vector.
9104 if (isNullConstant(Idx)) {
9105 // First slide in the lo value, then the hi in above it. We use slide1down
9106 // to avoid the register group overlap constraint of vslide1up.
9107 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9108 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9109 // If the source vector is undef don't pass along the tail elements from
9110 // the previous slide1down.
9111 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9112 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9113 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9114 // Bitcast back to the right container type.
9115 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9116
9117 if (AlignedIdx)
9118 ValInVec =
9119 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9120 ValInVec, AlignedIdx);
9121 if (!VecVT.isFixedLengthVector())
9122 return ValInVec;
9123 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9124 }
9125
9126 // First slide in the lo value, then the hi in above it. We use slide1down
9127 // to avoid the register group overlap constraint of vslide1up.
9128 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9129 DAG.getUNDEF(I32ContainerVT),
9130 DAG.getUNDEF(I32ContainerVT), ValLo,
9131 I32Mask, InsertI64VL);
9132 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9133 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9134 I32Mask, InsertI64VL);
9135 // Bitcast back to the right container type.
9136 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9137 }
9138
9139 // Now that the value is in a vector, slide it into position.
9140 SDValue InsertVL =
9141 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9142
9143 // Use tail agnostic policy if Idx is the last index of Vec.
9145 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9146 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9147 Policy = RISCVII::TAIL_AGNOSTIC;
9148 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9149 Idx, Mask, InsertVL, Policy);
9150
9151 if (AlignedIdx)
9152 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9153 Slideup, AlignedIdx);
9154 if (!VecVT.isFixedLengthVector())
9155 return Slideup;
9156 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9157}
9158
9159// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9160// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9161// types this is done using VMV_X_S to allow us to glean information about the
9162// sign bits of the result.
9163SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9164 SelectionDAG &DAG) const {
9165 SDLoc DL(Op);
9166 SDValue Idx = Op.getOperand(1);
9167 SDValue Vec = Op.getOperand(0);
9168 EVT EltVT = Op.getValueType();
9169 MVT VecVT = Vec.getSimpleValueType();
9170 MVT XLenVT = Subtarget.getXLenVT();
9171
9172 if (VecVT.getVectorElementType() == MVT::i1) {
9173 // Use vfirst.m to extract the first bit.
9174 if (isNullConstant(Idx)) {
9175 MVT ContainerVT = VecVT;
9176 if (VecVT.isFixedLengthVector()) {
9177 ContainerVT = getContainerForFixedLengthVector(VecVT);
9178 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9179 }
9180 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9181 SDValue Vfirst =
9182 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9183 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9184 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9185 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9186 }
9187 if (VecVT.isFixedLengthVector()) {
9188 unsigned NumElts = VecVT.getVectorNumElements();
9189 if (NumElts >= 8) {
9190 MVT WideEltVT;
9191 unsigned WidenVecLen;
9192 SDValue ExtractElementIdx;
9193 SDValue ExtractBitIdx;
9194 unsigned MaxEEW = Subtarget.getELen();
9195 MVT LargestEltVT = MVT::getIntegerVT(
9196 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9197 if (NumElts <= LargestEltVT.getSizeInBits()) {
9198 assert(isPowerOf2_32(NumElts) &&
9199 "the number of elements should be power of 2");
9200 WideEltVT = MVT::getIntegerVT(NumElts);
9201 WidenVecLen = 1;
9202 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9203 ExtractBitIdx = Idx;
9204 } else {
9205 WideEltVT = LargestEltVT;
9206 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9207 // extract element index = index / element width
9208 ExtractElementIdx = DAG.getNode(
9209 ISD::SRL, DL, XLenVT, Idx,
9210 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9211 // mask bit index = index % element width
9212 ExtractBitIdx = DAG.getNode(
9213 ISD::AND, DL, XLenVT, Idx,
9214 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9215 }
9216 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9217 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9218 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9219 Vec, ExtractElementIdx);
9220 // Extract the bit from GPR.
9221 SDValue ShiftRight =
9222 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9223 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9224 DAG.getConstant(1, DL, XLenVT));
9225 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9226 }
9227 }
9228 // Otherwise, promote to an i8 vector and extract from that.
9229 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9230 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9231 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9232 }
9233
9234 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9235 EltVT == MVT::bf16) {
9236 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9237 MVT IntVT = VecVT.changeTypeToInteger();
9238 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9239 SDValue IntExtract =
9240 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9241 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9242 }
9243
9244 // If this is a fixed vector, we need to convert it to a scalable vector.
9245 MVT ContainerVT = VecVT;
9246 if (VecVT.isFixedLengthVector()) {
9247 ContainerVT = getContainerForFixedLengthVector(VecVT);
9248 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9249 }
9250
9251 // If we're compiling for an exact VLEN value and we have a known
9252 // constant index, we can always perform the extract in m1 (or
9253 // smaller) as we can determine the register corresponding to
9254 // the index in the register group.
9255 const auto VLen = Subtarget.getRealVLen();
9256 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9257 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9258 MVT M1VT = getLMUL1VT(ContainerVT);
9259 unsigned OrigIdx = IdxC->getZExtValue();
9260 EVT ElemVT = VecVT.getVectorElementType();
9261 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9262 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9263 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9264 unsigned ExtractIdx =
9265 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9266 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9267 DAG.getVectorIdxConstant(ExtractIdx, DL));
9268 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9269 ContainerVT = M1VT;
9270 }
9271
9272 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9273 // contains our index.
9274 std::optional<uint64_t> MaxIdx;
9275 if (VecVT.isFixedLengthVector())
9276 MaxIdx = VecVT.getVectorNumElements() - 1;
9277 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9278 MaxIdx = IdxC->getZExtValue();
9279 if (MaxIdx) {
9280 if (auto SmallerVT =
9281 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9282 ContainerVT = *SmallerVT;
9283 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9284 DAG.getConstant(0, DL, XLenVT));
9285 }
9286 }
9287
9288 // If after narrowing, the required slide is still greater than LMUL2,
9289 // fallback to generic expansion and go through the stack. This is done
9290 // for a subtle reason: extracting *all* elements out of a vector is
9291 // widely expected to be linear in vector size, but because vslidedown
9292 // is linear in LMUL, performing N extracts using vslidedown becomes
9293 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9294 // seems to have the same problem (the store is linear in LMUL), but the
9295 // generic expansion *memoizes* the store, and thus for many extracts of
9296 // the same vector we end up with one store and a bunch of loads.
9297 // TODO: We don't have the same code for insert_vector_elt because we
9298 // have BUILD_VECTOR and handle the degenerate case there. Should we
9299 // consider adding an inverse BUILD_VECTOR node?
9300 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9301 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9302 return SDValue();
9303
9304 // If the index is 0, the vector is already in the right position.
9305 if (!isNullConstant(Idx)) {
9306 // Use a VL of 1 to avoid processing more elements than we need.
9307 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9308 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9309 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9310 }
9311
9312 if (!EltVT.isInteger()) {
9313 // Floating-point extracts are handled in TableGen.
9314 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9315 DAG.getVectorIdxConstant(0, DL));
9316 }
9317
9318 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9319 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9320}
9321
9322// Some RVV intrinsics may claim that they want an integer operand to be
9323// promoted or expanded.
9325 const RISCVSubtarget &Subtarget) {
9326 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9327 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9328 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9329 "Unexpected opcode");
9330
9331 if (!Subtarget.hasVInstructions())
9332 return SDValue();
9333
9334 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9335 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9336 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9337
9338 SDLoc DL(Op);
9339
9341 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9342 if (!II || !II->hasScalarOperand())
9343 return SDValue();
9344
9345 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9346 assert(SplatOp < Op.getNumOperands());
9347
9349 SDValue &ScalarOp = Operands[SplatOp];
9350 MVT OpVT = ScalarOp.getSimpleValueType();
9351 MVT XLenVT = Subtarget.getXLenVT();
9352
9353 // If this isn't a scalar, or its type is XLenVT we're done.
9354 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9355 return SDValue();
9356
9357 // Simplest case is that the operand needs to be promoted to XLenVT.
9358 if (OpVT.bitsLT(XLenVT)) {
9359 // If the operand is a constant, sign extend to increase our chances
9360 // of being able to use a .vi instruction. ANY_EXTEND would become a
9361 // a zero extend and the simm5 check in isel would fail.
9362 // FIXME: Should we ignore the upper bits in isel instead?
9363 unsigned ExtOpc =
9364 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9365 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9366 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9367 }
9368
9369 // Use the previous operand to get the vXi64 VT. The result might be a mask
9370 // VT for compares. Using the previous operand assumes that the previous
9371 // operand will never have a smaller element size than a scalar operand and
9372 // that a widening operation never uses SEW=64.
9373 // NOTE: If this fails the below assert, we can probably just find the
9374 // element count from any operand or result and use it to construct the VT.
9375 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9376 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9377
9378 // The more complex case is when the scalar is larger than XLenVT.
9379 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9380 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9381
9382 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9383 // instruction to sign-extend since SEW>XLEN.
9384 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9385 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9386 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9387 }
9388
9389 switch (IntNo) {
9390 case Intrinsic::riscv_vslide1up:
9391 case Intrinsic::riscv_vslide1down:
9392 case Intrinsic::riscv_vslide1up_mask:
9393 case Intrinsic::riscv_vslide1down_mask: {
9394 // We need to special case these when the scalar is larger than XLen.
9395 unsigned NumOps = Op.getNumOperands();
9396 bool IsMasked = NumOps == 7;
9397
9398 // Convert the vector source to the equivalent nxvXi32 vector.
9399 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9400 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9401 SDValue ScalarLo, ScalarHi;
9402 std::tie(ScalarLo, ScalarHi) =
9403 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9404
9405 // Double the VL since we halved SEW.
9406 SDValue AVL = getVLOperand(Op);
9407 SDValue I32VL;
9408
9409 // Optimize for constant AVL
9410 if (isa<ConstantSDNode>(AVL)) {
9411 const auto [MinVLMAX, MaxVLMAX] =
9413
9414 uint64_t AVLInt = AVL->getAsZExtVal();
9415 if (AVLInt <= MinVLMAX) {
9416 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9417 } else if (AVLInt >= 2 * MaxVLMAX) {
9418 // Just set vl to VLMAX in this situation
9419 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9420 } else {
9421 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9422 // is related to the hardware implementation.
9423 // So let the following code handle
9424 }
9425 }
9426 if (!I32VL) {
9428 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9429 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9430 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9431 SDValue SETVL =
9432 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9433 // Using vsetvli instruction to get actually used length which related to
9434 // the hardware implementation
9435 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9436 SEW, LMUL);
9437 I32VL =
9438 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9439 }
9440
9441 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9442
9443 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9444 // instructions.
9445 SDValue Passthru;
9446 if (IsMasked)
9447 Passthru = DAG.getUNDEF(I32VT);
9448 else
9449 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9450
9451 if (IntNo == Intrinsic::riscv_vslide1up ||
9452 IntNo == Intrinsic::riscv_vslide1up_mask) {
9453 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9454 ScalarHi, I32Mask, I32VL);
9455 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9456 ScalarLo, I32Mask, I32VL);
9457 } else {
9458 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9459 ScalarLo, I32Mask, I32VL);
9460 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9461 ScalarHi, I32Mask, I32VL);
9462 }
9463
9464 // Convert back to nxvXi64.
9465 Vec = DAG.getBitcast(VT, Vec);
9466
9467 if (!IsMasked)
9468 return Vec;
9469 // Apply mask after the operation.
9470 SDValue Mask = Operands[NumOps - 3];
9471 SDValue MaskedOff = Operands[1];
9472 // Assume Policy operand is the last operand.
9473 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9474 // We don't need to select maskedoff if it's undef.
9475 if (MaskedOff.isUndef())
9476 return Vec;
9477 // TAMU
9478 if (Policy == RISCVII::TAIL_AGNOSTIC)
9479 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9480 DAG.getUNDEF(VT), AVL);
9481 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9482 // It's fine because vmerge does not care mask policy.
9483 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9484 MaskedOff, AVL);
9485 }
9486 }
9487
9488 // We need to convert the scalar to a splat vector.
9489 SDValue VL = getVLOperand(Op);
9490 assert(VL.getValueType() == XLenVT);
9491 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9492 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9493}
9494
9495// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9496// scalable vector llvm.get.vector.length for now.
9497//
9498// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9499// (vscale * VF). The vscale and VF are independent of element width. We use
9500// SEW=8 for the vsetvli because it is the only element width that supports all
9501// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9502// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9503// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9504// SEW and LMUL are better for the surrounding vector instructions.
9506 const RISCVSubtarget &Subtarget) {
9507 MVT XLenVT = Subtarget.getXLenVT();
9508
9509 // The smallest LMUL is only valid for the smallest element width.
9510 const unsigned ElementWidth = 8;
9511
9512 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9513 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9514 // We don't support VF==1 with ELEN==32.
9515 [[maybe_unused]] unsigned MinVF =
9516 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9517
9518 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9519 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9520 "Unexpected VF");
9521
9522 bool Fractional = VF < LMul1VF;
9523 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9524 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9525 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9526
9527 SDLoc DL(N);
9528
9529 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9530 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9531
9532 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9533
9534 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9535 SDValue Res =
9536 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9537 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9538}
9539
9541 const RISCVSubtarget &Subtarget) {
9542 SDValue Op0 = N->getOperand(1);
9543 MVT OpVT = Op0.getSimpleValueType();
9544 MVT ContainerVT = OpVT;
9545 if (OpVT.isFixedLengthVector()) {
9546 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9547 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9548 }
9549 MVT XLenVT = Subtarget.getXLenVT();
9550 SDLoc DL(N);
9551 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9552 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9553 if (isOneConstant(N->getOperand(2)))
9554 return Res;
9555
9556 // Convert -1 to VL.
9557 SDValue Setcc =
9558 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9559 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9560 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9561}
9562
9563static inline void promoteVCIXScalar(const SDValue &Op,
9565 SelectionDAG &DAG) {
9566 const RISCVSubtarget &Subtarget =
9568
9569 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9570 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9571 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9572 SDLoc DL(Op);
9573
9575 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9576 if (!II || !II->hasScalarOperand())
9577 return;
9578
9579 unsigned SplatOp = II->ScalarOperand + 1;
9580 assert(SplatOp < Op.getNumOperands());
9581
9582 SDValue &ScalarOp = Operands[SplatOp];
9583 MVT OpVT = ScalarOp.getSimpleValueType();
9584 MVT XLenVT = Subtarget.getXLenVT();
9585
9586 // The code below is partially copied from lowerVectorIntrinsicScalars.
9587 // If this isn't a scalar, or its type is XLenVT we're done.
9588 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9589 return;
9590
9591 // Manually emit promote operation for scalar operation.
9592 if (OpVT.bitsLT(XLenVT)) {
9593 unsigned ExtOpc =
9594 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9595 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9596 }
9597}
9598
9599static void processVCIXOperands(SDValue &OrigOp,
9601 SelectionDAG &DAG) {
9602 promoteVCIXScalar(OrigOp, Operands, DAG);
9603 const RISCVSubtarget &Subtarget =
9605 for (SDValue &V : Operands) {
9606 EVT ValType = V.getValueType();
9607 if (ValType.isVector() && ValType.isFloatingPoint()) {
9608 MVT InterimIVT =
9609 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9610 ValType.getVectorElementCount());
9611 V = DAG.getBitcast(InterimIVT, V);
9612 }
9613 if (ValType.isFixedLengthVector()) {
9614 MVT OpContainerVT = getContainerForFixedLengthVector(
9615 DAG, V.getSimpleValueType(), Subtarget);
9616 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9617 }
9618 }
9619}
9620
9621// LMUL * VLEN should be greater than or equal to EGS * SEW
9622static inline bool isValidEGW(int EGS, EVT VT,
9623 const RISCVSubtarget &Subtarget) {
9624 return (Subtarget.getRealMinVLen() *
9626 EGS * VT.getScalarSizeInBits();
9627}
9628
9629SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9630 SelectionDAG &DAG) const {
9631 unsigned IntNo = Op.getConstantOperandVal(0);
9632 SDLoc DL(Op);
9633 MVT XLenVT = Subtarget.getXLenVT();
9634
9635 switch (IntNo) {
9636 default:
9637 break; // Don't custom lower most intrinsics.
9638 case Intrinsic::riscv_tuple_insert: {
9639 SDValue Vec = Op.getOperand(1);
9640 SDValue SubVec = Op.getOperand(2);
9641 SDValue Index = Op.getOperand(3);
9642
9643 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9644 SubVec, Index);
9645 }
9646 case Intrinsic::riscv_tuple_extract: {
9647 SDValue Vec = Op.getOperand(1);
9648 SDValue Index = Op.getOperand(2);
9649
9650 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9651 Index);
9652 }
9653 case Intrinsic::thread_pointer: {
9654 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9655 return DAG.getRegister(RISCV::X4, PtrVT);
9656 }
9657 case Intrinsic::riscv_orc_b:
9658 case Intrinsic::riscv_brev8:
9659 case Intrinsic::riscv_sha256sig0:
9660 case Intrinsic::riscv_sha256sig1:
9661 case Intrinsic::riscv_sha256sum0:
9662 case Intrinsic::riscv_sha256sum1:
9663 case Intrinsic::riscv_sm3p0:
9664 case Intrinsic::riscv_sm3p1: {
9665 unsigned Opc;
9666 switch (IntNo) {
9667 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9668 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9669 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9670 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9671 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9672 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9673 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9674 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9675 }
9676
9677 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9678 }
9679 case Intrinsic::riscv_sm4ks:
9680 case Intrinsic::riscv_sm4ed: {
9681 unsigned Opc =
9682 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9683
9684 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9685 Op.getOperand(3));
9686 }
9687 case Intrinsic::riscv_zip:
9688 case Intrinsic::riscv_unzip: {
9689 unsigned Opc =
9690 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9691 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9692 }
9693 case Intrinsic::riscv_mopr:
9694 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9695 Op.getOperand(2));
9696
9697 case Intrinsic::riscv_moprr: {
9698 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9699 Op.getOperand(2), Op.getOperand(3));
9700 }
9701 case Intrinsic::riscv_clmul:
9702 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9703 Op.getOperand(2));
9704 case Intrinsic::riscv_clmulh:
9705 case Intrinsic::riscv_clmulr: {
9706 unsigned Opc =
9707 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9708 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9709 }
9710 case Intrinsic::experimental_get_vector_length:
9711 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9712 case Intrinsic::experimental_cttz_elts:
9713 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9714 case Intrinsic::riscv_vmv_x_s: {
9715 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9716 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9717 }
9718 case Intrinsic::riscv_vfmv_f_s:
9719 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9720 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9721 case Intrinsic::riscv_vmv_v_x:
9722 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9723 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9724 Subtarget);
9725 case Intrinsic::riscv_vfmv_v_f:
9726 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9727 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9728 case Intrinsic::riscv_vmv_s_x: {
9729 SDValue Scalar = Op.getOperand(2);
9730
9731 if (Scalar.getValueType().bitsLE(XLenVT)) {
9732 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9733 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9734 Op.getOperand(1), Scalar, Op.getOperand(3));
9735 }
9736
9737 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9738
9739 // This is an i64 value that lives in two scalar registers. We have to
9740 // insert this in a convoluted way. First we build vXi64 splat containing
9741 // the two values that we assemble using some bit math. Next we'll use
9742 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9743 // to merge element 0 from our splat into the source vector.
9744 // FIXME: This is probably not the best way to do this, but it is
9745 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9746 // point.
9747 // sw lo, (a0)
9748 // sw hi, 4(a0)
9749 // vlse vX, (a0)
9750 //
9751 // vid.v vVid
9752 // vmseq.vx mMask, vVid, 0
9753 // vmerge.vvm vDest, vSrc, vVal, mMask
9754 MVT VT = Op.getSimpleValueType();
9755 SDValue Vec = Op.getOperand(1);
9756 SDValue VL = getVLOperand(Op);
9757
9758 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9759 if (Op.getOperand(1).isUndef())
9760 return SplattedVal;
9761 SDValue SplattedIdx =
9762 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9763 DAG.getConstant(0, DL, MVT::i32), VL);
9764
9765 MVT MaskVT = getMaskTypeFor(VT);
9766 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9767 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9768 SDValue SelectCond =
9769 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9770 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9771 DAG.getUNDEF(MaskVT), Mask, VL});
9772 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9773 Vec, DAG.getUNDEF(VT), VL);
9774 }
9775 case Intrinsic::riscv_vfmv_s_f:
9776 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9777 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9778 // EGS * EEW >= 128 bits
9779 case Intrinsic::riscv_vaesdf_vv:
9780 case Intrinsic::riscv_vaesdf_vs:
9781 case Intrinsic::riscv_vaesdm_vv:
9782 case Intrinsic::riscv_vaesdm_vs:
9783 case Intrinsic::riscv_vaesef_vv:
9784 case Intrinsic::riscv_vaesef_vs:
9785 case Intrinsic::riscv_vaesem_vv:
9786 case Intrinsic::riscv_vaesem_vs:
9787 case Intrinsic::riscv_vaeskf1:
9788 case Intrinsic::riscv_vaeskf2:
9789 case Intrinsic::riscv_vaesz_vs:
9790 case Intrinsic::riscv_vsm4k:
9791 case Intrinsic::riscv_vsm4r_vv:
9792 case Intrinsic::riscv_vsm4r_vs: {
9793 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9794 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9795 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9796 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9797 return Op;
9798 }
9799 // EGS * EEW >= 256 bits
9800 case Intrinsic::riscv_vsm3c:
9801 case Intrinsic::riscv_vsm3me: {
9802 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9803 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9804 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9805 return Op;
9806 }
9807 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9808 case Intrinsic::riscv_vsha2ch:
9809 case Intrinsic::riscv_vsha2cl:
9810 case Intrinsic::riscv_vsha2ms: {
9811 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9812 !Subtarget.hasStdExtZvknhb())
9813 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9814 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9815 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9816 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9817 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9818 return Op;
9819 }
9820 case Intrinsic::riscv_sf_vc_v_x:
9821 case Intrinsic::riscv_sf_vc_v_i:
9822 case Intrinsic::riscv_sf_vc_v_xv:
9823 case Intrinsic::riscv_sf_vc_v_iv:
9824 case Intrinsic::riscv_sf_vc_v_vv:
9825 case Intrinsic::riscv_sf_vc_v_fv:
9826 case Intrinsic::riscv_sf_vc_v_xvv:
9827 case Intrinsic::riscv_sf_vc_v_ivv:
9828 case Intrinsic::riscv_sf_vc_v_vvv:
9829 case Intrinsic::riscv_sf_vc_v_fvv:
9830 case Intrinsic::riscv_sf_vc_v_xvw:
9831 case Intrinsic::riscv_sf_vc_v_ivw:
9832 case Intrinsic::riscv_sf_vc_v_vvw:
9833 case Intrinsic::riscv_sf_vc_v_fvw: {
9834 MVT VT = Op.getSimpleValueType();
9835
9836 SmallVector<SDValue> Operands{Op->op_values()};
9838
9839 MVT RetVT = VT;
9840 if (VT.isFixedLengthVector())
9842 else if (VT.isFloatingPoint())
9845
9846 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9847
9848 if (VT.isFixedLengthVector())
9849 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9850 else if (VT.isFloatingPoint())
9851 NewNode = DAG.getBitcast(VT, NewNode);
9852
9853 if (Op == NewNode)
9854 break;
9855
9856 return NewNode;
9857 }
9858 }
9859
9860 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9861}
9862
9864 unsigned Type) {
9865 SDLoc DL(Op);
9866 SmallVector<SDValue> Operands{Op->op_values()};
9867 Operands.erase(Operands.begin() + 1);
9868
9869 const RISCVSubtarget &Subtarget =
9871 MVT VT = Op.getSimpleValueType();
9872 MVT RetVT = VT;
9873 MVT FloatVT = VT;
9874
9875 if (VT.isFloatingPoint()) {
9876 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9878 FloatVT = RetVT;
9879 }
9880 if (VT.isFixedLengthVector())
9882 Subtarget);
9883
9885
9886 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9887 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9888 SDValue Chain = NewNode.getValue(1);
9889
9890 if (VT.isFixedLengthVector())
9891 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9892 if (VT.isFloatingPoint())
9893 NewNode = DAG.getBitcast(VT, NewNode);
9894
9895 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9896
9897 return NewNode;
9898}
9899
9901 unsigned Type) {
9902 SmallVector<SDValue> Operands{Op->op_values()};
9903 Operands.erase(Operands.begin() + 1);
9905
9906 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9907}
9908
9909SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9910 SelectionDAG &DAG) const {
9911 unsigned IntNo = Op.getConstantOperandVal(1);
9912 switch (IntNo) {
9913 default:
9914 break;
9915 case Intrinsic::riscv_seg2_load:
9916 case Intrinsic::riscv_seg3_load:
9917 case Intrinsic::riscv_seg4_load:
9918 case Intrinsic::riscv_seg5_load:
9919 case Intrinsic::riscv_seg6_load:
9920 case Intrinsic::riscv_seg7_load:
9921 case Intrinsic::riscv_seg8_load: {
9922 SDLoc DL(Op);
9923 static const Intrinsic::ID VlsegInts[7] = {
9924 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9925 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9926 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9927 Intrinsic::riscv_vlseg8};
9928 unsigned NF = Op->getNumValues() - 1;
9929 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9930 MVT XLenVT = Subtarget.getXLenVT();
9931 MVT VT = Op->getSimpleValueType(0);
9932 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9933 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
9934 ContainerVT.getScalarSizeInBits();
9935 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
9936
9937 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9938 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9939 auto *Load = cast<MemIntrinsicSDNode>(Op);
9940
9941 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
9942 SDValue Ops[] = {
9943 Load->getChain(),
9944 IntID,
9945 DAG.getUNDEF(VecTupTy),
9946 Op.getOperand(2),
9947 VL,
9948 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
9949 SDValue Result =
9951 Load->getMemoryVT(), Load->getMemOperand());
9953 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
9954 SDValue SubVec =
9955 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
9956 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
9957 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
9958 }
9959 Results.push_back(Result.getValue(1));
9960 return DAG.getMergeValues(Results, DL);
9961 }
9962 case Intrinsic::riscv_sf_vc_v_x_se:
9964 case Intrinsic::riscv_sf_vc_v_i_se:
9966 case Intrinsic::riscv_sf_vc_v_xv_se:
9968 case Intrinsic::riscv_sf_vc_v_iv_se:
9970 case Intrinsic::riscv_sf_vc_v_vv_se:
9972 case Intrinsic::riscv_sf_vc_v_fv_se:
9974 case Intrinsic::riscv_sf_vc_v_xvv_se:
9976 case Intrinsic::riscv_sf_vc_v_ivv_se:
9978 case Intrinsic::riscv_sf_vc_v_vvv_se:
9980 case Intrinsic::riscv_sf_vc_v_fvv_se:
9982 case Intrinsic::riscv_sf_vc_v_xvw_se:
9984 case Intrinsic::riscv_sf_vc_v_ivw_se:
9986 case Intrinsic::riscv_sf_vc_v_vvw_se:
9988 case Intrinsic::riscv_sf_vc_v_fvw_se:
9990 }
9991
9992 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9993}
9994
9995SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9996 SelectionDAG &DAG) const {
9997 unsigned IntNo = Op.getConstantOperandVal(1);
9998 switch (IntNo) {
9999 default:
10000 break;
10001 case Intrinsic::riscv_seg2_store:
10002 case Intrinsic::riscv_seg3_store:
10003 case Intrinsic::riscv_seg4_store:
10004 case Intrinsic::riscv_seg5_store:
10005 case Intrinsic::riscv_seg6_store:
10006 case Intrinsic::riscv_seg7_store:
10007 case Intrinsic::riscv_seg8_store: {
10008 SDLoc DL(Op);
10009 static const Intrinsic::ID VssegInts[] = {
10010 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10011 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10012 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10013 Intrinsic::riscv_vsseg8};
10014 // Operands are (chain, int_id, vec*, ptr, vl)
10015 unsigned NF = Op->getNumOperands() - 4;
10016 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10017 MVT XLenVT = Subtarget.getXLenVT();
10018 MVT VT = Op->getOperand(2).getSimpleValueType();
10019 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10020 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10021 ContainerVT.getScalarSizeInBits();
10022 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10023
10024 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10025 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10026 SDValue Ptr = Op->getOperand(NF + 2);
10027
10028 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10029
10030 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10031 for (unsigned i = 0; i < NF; i++)
10032 StoredVal = DAG.getNode(
10033 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10035 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10036 DAG.getVectorIdxConstant(i, DL));
10037
10038 SDValue Ops[] = {
10039 FixedIntrinsic->getChain(),
10040 IntID,
10041 StoredVal,
10042 Ptr,
10043 VL,
10044 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10045
10046 return DAG.getMemIntrinsicNode(
10047 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10048 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10049 }
10050 case Intrinsic::riscv_sf_vc_xv_se:
10052 case Intrinsic::riscv_sf_vc_iv_se:
10054 case Intrinsic::riscv_sf_vc_vv_se:
10056 case Intrinsic::riscv_sf_vc_fv_se:
10058 case Intrinsic::riscv_sf_vc_xvv_se:
10060 case Intrinsic::riscv_sf_vc_ivv_se:
10062 case Intrinsic::riscv_sf_vc_vvv_se:
10064 case Intrinsic::riscv_sf_vc_fvv_se:
10066 case Intrinsic::riscv_sf_vc_xvw_se:
10068 case Intrinsic::riscv_sf_vc_ivw_se:
10070 case Intrinsic::riscv_sf_vc_vvw_se:
10072 case Intrinsic::riscv_sf_vc_fvw_se:
10074 }
10075
10076 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10077}
10078
10079static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10080 switch (ISDOpcode) {
10081 default:
10082 llvm_unreachable("Unhandled reduction");
10083 case ISD::VP_REDUCE_ADD:
10084 case ISD::VECREDUCE_ADD:
10086 case ISD::VP_REDUCE_UMAX:
10089 case ISD::VP_REDUCE_SMAX:
10092 case ISD::VP_REDUCE_UMIN:
10095 case ISD::VP_REDUCE_SMIN:
10098 case ISD::VP_REDUCE_AND:
10099 case ISD::VECREDUCE_AND:
10101 case ISD::VP_REDUCE_OR:
10102 case ISD::VECREDUCE_OR:
10104 case ISD::VP_REDUCE_XOR:
10105 case ISD::VECREDUCE_XOR:
10107 case ISD::VP_REDUCE_FADD:
10109 case ISD::VP_REDUCE_SEQ_FADD:
10111 case ISD::VP_REDUCE_FMAX:
10112 case ISD::VP_REDUCE_FMAXIMUM:
10114 case ISD::VP_REDUCE_FMIN:
10115 case ISD::VP_REDUCE_FMINIMUM:
10117 }
10118
10119}
10120
10121SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10122 SelectionDAG &DAG,
10123 bool IsVP) const {
10124 SDLoc DL(Op);
10125 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10126 MVT VecVT = Vec.getSimpleValueType();
10127 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10128 Op.getOpcode() == ISD::VECREDUCE_OR ||
10129 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10130 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10131 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10132 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10133 "Unexpected reduction lowering");
10134
10135 MVT XLenVT = Subtarget.getXLenVT();
10136
10137 MVT ContainerVT = VecVT;
10138 if (VecVT.isFixedLengthVector()) {
10139 ContainerVT = getContainerForFixedLengthVector(VecVT);
10140 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10141 }
10142
10143 SDValue Mask, VL;
10144 if (IsVP) {
10145 Mask = Op.getOperand(2);
10146 VL = Op.getOperand(3);
10147 } else {
10148 std::tie(Mask, VL) =
10149 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10150 }
10151
10153 switch (Op.getOpcode()) {
10154 default:
10155 llvm_unreachable("Unhandled reduction");
10156 case ISD::VECREDUCE_AND:
10157 case ISD::VP_REDUCE_AND: {
10158 // vcpop ~x == 0
10159 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10160 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10161 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10162 CC = ISD::SETEQ;
10163 break;
10164 }
10165 case ISD::VECREDUCE_OR:
10166 case ISD::VP_REDUCE_OR:
10167 // vcpop x != 0
10168 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10169 CC = ISD::SETNE;
10170 break;
10171 case ISD::VECREDUCE_XOR:
10172 case ISD::VP_REDUCE_XOR: {
10173 // ((vcpop x) & 1) != 0
10174 SDValue One = DAG.getConstant(1, DL, XLenVT);
10175 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10176 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10177 CC = ISD::SETNE;
10178 break;
10179 }
10180 }
10181
10182 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10183 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10184 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10185
10186 if (!IsVP)
10187 return SetCC;
10188
10189 // Now include the start value in the operation.
10190 // Note that we must return the start value when no elements are operated
10191 // upon. The vcpop instructions we've emitted in each case above will return
10192 // 0 for an inactive vector, and so we've already received the neutral value:
10193 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10194 // can simply include the start value.
10195 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10196 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10197}
10198
10199static bool isNonZeroAVL(SDValue AVL) {
10200 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10201 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10202 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10203 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10204}
10205
10206/// Helper to lower a reduction sequence of the form:
10207/// scalar = reduce_op vec, scalar_start
10208static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10209 SDValue StartValue, SDValue Vec, SDValue Mask,
10210 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10211 const RISCVSubtarget &Subtarget) {
10212 const MVT VecVT = Vec.getSimpleValueType();
10213 const MVT M1VT = getLMUL1VT(VecVT);
10214 const MVT XLenVT = Subtarget.getXLenVT();
10215 const bool NonZeroAVL = isNonZeroAVL(VL);
10216
10217 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10218 // or the original VT if fractional.
10219 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10220 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10221 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10222 // be the result of the reduction operation.
10223 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10224 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10225 DAG, Subtarget);
10226 if (M1VT != InnerVT)
10227 InitialValue =
10228 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10229 InitialValue, DAG.getVectorIdxConstant(0, DL));
10230 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10231 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10232 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10233 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10234 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10235 DAG.getVectorIdxConstant(0, DL));
10236}
10237
10238SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10239 SelectionDAG &DAG) const {
10240 SDLoc DL(Op);
10241 SDValue Vec = Op.getOperand(0);
10242 EVT VecEVT = Vec.getValueType();
10243
10244 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10245
10246 // Due to ordering in legalize types we may have a vector type that needs to
10247 // be split. Do that manually so we can get down to a legal type.
10248 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10250 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10251 VecEVT = Lo.getValueType();
10252 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10253 }
10254
10255 // TODO: The type may need to be widened rather than split. Or widened before
10256 // it can be split.
10257 if (!isTypeLegal(VecEVT))
10258 return SDValue();
10259
10260 MVT VecVT = VecEVT.getSimpleVT();
10261 MVT VecEltVT = VecVT.getVectorElementType();
10262 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10263
10264 MVT ContainerVT = VecVT;
10265 if (VecVT.isFixedLengthVector()) {
10266 ContainerVT = getContainerForFixedLengthVector(VecVT);
10267 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10268 }
10269
10270 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10271
10272 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10273 switch (BaseOpc) {
10274 case ISD::AND:
10275 case ISD::OR:
10276 case ISD::UMAX:
10277 case ISD::UMIN:
10278 case ISD::SMAX:
10279 case ISD::SMIN:
10280 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10281 DAG.getVectorIdxConstant(0, DL));
10282 }
10283 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10284 Mask, VL, DL, DAG, Subtarget);
10285}
10286
10287// Given a reduction op, this function returns the matching reduction opcode,
10288// the vector SDValue and the scalar SDValue required to lower this to a
10289// RISCVISD node.
10290static std::tuple<unsigned, SDValue, SDValue>
10292 const RISCVSubtarget &Subtarget) {
10293 SDLoc DL(Op);
10294 auto Flags = Op->getFlags();
10295 unsigned Opcode = Op.getOpcode();
10296 switch (Opcode) {
10297 default:
10298 llvm_unreachable("Unhandled reduction");
10299 case ISD::VECREDUCE_FADD: {
10300 // Use positive zero if we can. It is cheaper to materialize.
10301 SDValue Zero =
10302 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10303 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10304 }
10306 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10307 Op.getOperand(0));
10311 case ISD::VECREDUCE_FMAX: {
10312 SDValue Front =
10313 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10314 DAG.getVectorIdxConstant(0, DL));
10315 unsigned RVVOpc =
10316 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10319 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10320 }
10321 }
10322}
10323
10324SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10325 SelectionDAG &DAG) const {
10326 SDLoc DL(Op);
10327 MVT VecEltVT = Op.getSimpleValueType();
10328
10329 unsigned RVVOpcode;
10330 SDValue VectorVal, ScalarVal;
10331 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10332 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10333 MVT VecVT = VectorVal.getSimpleValueType();
10334
10335 MVT ContainerVT = VecVT;
10336 if (VecVT.isFixedLengthVector()) {
10337 ContainerVT = getContainerForFixedLengthVector(VecVT);
10338 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10339 }
10340
10341 MVT ResVT = Op.getSimpleValueType();
10342 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10343 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10344 VL, DL, DAG, Subtarget);
10345 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10346 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10347 return Res;
10348
10349 if (Op->getFlags().hasNoNaNs())
10350 return Res;
10351
10352 // Force output to NaN if any element is Nan.
10353 SDValue IsNan =
10354 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10355 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10356 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10357 MVT XLenVT = Subtarget.getXLenVT();
10358 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10359 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10360 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10361 return DAG.getSelect(
10362 DL, ResVT, NoNaNs, Res,
10363 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10364}
10365
10366SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10367 SelectionDAG &DAG) const {
10368 SDLoc DL(Op);
10369 unsigned Opc = Op.getOpcode();
10370 SDValue Start = Op.getOperand(0);
10371 SDValue Vec = Op.getOperand(1);
10372 EVT VecEVT = Vec.getValueType();
10373 MVT XLenVT = Subtarget.getXLenVT();
10374
10375 // TODO: The type may need to be widened rather than split. Or widened before
10376 // it can be split.
10377 if (!isTypeLegal(VecEVT))
10378 return SDValue();
10379
10380 MVT VecVT = VecEVT.getSimpleVT();
10381 unsigned RVVOpcode = getRVVReductionOp(Opc);
10382
10383 if (VecVT.isFixedLengthVector()) {
10384 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10385 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10386 }
10387
10388 SDValue VL = Op.getOperand(3);
10389 SDValue Mask = Op.getOperand(2);
10390 SDValue Res =
10391 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10392 Vec, Mask, VL, DL, DAG, Subtarget);
10393 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10394 Op->getFlags().hasNoNaNs())
10395 return Res;
10396
10397 // Propagate NaNs.
10398 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10399 // Check if any of the elements in Vec is NaN.
10400 SDValue IsNaN = DAG.getNode(
10401 RISCVISD::SETCC_VL, DL, PredVT,
10402 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10403 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10404 // Check if the start value is NaN.
10405 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10406 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10407 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10408 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10409 MVT ResVT = Res.getSimpleValueType();
10410 return DAG.getSelect(
10411 DL, ResVT, NoNaNs, Res,
10412 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10413}
10414
10415SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10416 SelectionDAG &DAG) const {
10417 SDValue Vec = Op.getOperand(0);
10418 SDValue SubVec = Op.getOperand(1);
10419 MVT VecVT = Vec.getSimpleValueType();
10420 MVT SubVecVT = SubVec.getSimpleValueType();
10421
10422 SDLoc DL(Op);
10423 MVT XLenVT = Subtarget.getXLenVT();
10424 unsigned OrigIdx = Op.getConstantOperandVal(2);
10425 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10426
10427 if (OrigIdx == 0 && Vec.isUndef())
10428 return Op;
10429
10430 // We don't have the ability to slide mask vectors up indexed by their i1
10431 // elements; the smallest we can do is i8. Often we are able to bitcast to
10432 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10433 // into a scalable one, we might not necessarily have enough scalable
10434 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10435 if (SubVecVT.getVectorElementType() == MVT::i1) {
10436 if (VecVT.getVectorMinNumElements() >= 8 &&
10437 SubVecVT.getVectorMinNumElements() >= 8) {
10438 assert(OrigIdx % 8 == 0 && "Invalid index");
10439 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10440 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10441 "Unexpected mask vector lowering");
10442 OrigIdx /= 8;
10443 SubVecVT =
10444 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10445 SubVecVT.isScalableVector());
10446 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10447 VecVT.isScalableVector());
10448 Vec = DAG.getBitcast(VecVT, Vec);
10449 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10450 } else {
10451 // We can't slide this mask vector up indexed by its i1 elements.
10452 // This poses a problem when we wish to insert a scalable vector which
10453 // can't be re-expressed as a larger type. Just choose the slow path and
10454 // extend to a larger type, then truncate back down.
10455 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10456 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10457 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10458 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10459 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10460 Op.getOperand(2));
10461 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10462 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10463 }
10464 }
10465
10466 // If the subvector vector is a fixed-length type and we don't know VLEN
10467 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10468 // don't know which register of a LMUL group contains the specific subvector
10469 // as we only know the minimum register size. Therefore we must slide the
10470 // vector group up the full amount.
10471 const auto VLen = Subtarget.getRealVLen();
10472 if (SubVecVT.isFixedLengthVector() && !VLen) {
10473 MVT ContainerVT = VecVT;
10474 if (VecVT.isFixedLengthVector()) {
10475 ContainerVT = getContainerForFixedLengthVector(VecVT);
10476 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10477 }
10478
10479 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10480 DAG.getUNDEF(ContainerVT), SubVec,
10481 DAG.getVectorIdxConstant(0, DL));
10482
10483 SDValue Mask =
10484 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10485 // Set the vector length to only the number of elements we care about. Note
10486 // that for slideup this includes the offset.
10487 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10488 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10489
10490 // Use tail agnostic policy if we're inserting over Vec's tail.
10492 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10493 Policy = RISCVII::TAIL_AGNOSTIC;
10494
10495 // If we're inserting into the lowest elements, use a tail undisturbed
10496 // vmv.v.v.
10497 if (OrigIdx == 0) {
10498 SubVec =
10499 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10500 } else {
10501 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10502 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10503 SlideupAmt, Mask, VL, Policy);
10504 }
10505
10506 if (VecVT.isFixedLengthVector())
10507 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10508 return DAG.getBitcast(Op.getValueType(), SubVec);
10509 }
10510
10511 MVT ContainerVecVT = VecVT;
10512 if (VecVT.isFixedLengthVector()) {
10513 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10514 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10515 }
10516
10517 MVT ContainerSubVecVT = SubVecVT;
10518 if (SubVecVT.isFixedLengthVector()) {
10519 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10520 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10521 }
10522
10523 unsigned SubRegIdx;
10524 ElementCount RemIdx;
10525 // insert_subvector scales the index by vscale if the subvector is scalable,
10526 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10527 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10528 if (SubVecVT.isFixedLengthVector()) {
10529 assert(VLen);
10530 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10531 auto Decompose =
10533 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10534 SubRegIdx = Decompose.first;
10535 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10536 (OrigIdx % Vscale));
10537 } else {
10538 auto Decompose =
10540 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10541 SubRegIdx = Decompose.first;
10542 RemIdx = ElementCount::getScalable(Decompose.second);
10543 }
10544
10547 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10548 bool ExactlyVecRegSized =
10549 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10550 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10551
10552 // 1. If the Idx has been completely eliminated and this subvector's size is
10553 // a vector register or a multiple thereof, or the surrounding elements are
10554 // undef, then this is a subvector insert which naturally aligns to a vector
10555 // register. These can easily be handled using subregister manipulation.
10556 // 2. If the subvector isn't an exact multiple of a valid register group size,
10557 // then the insertion must preserve the undisturbed elements of the register.
10558 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10559 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10560 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10561 // of that LMUL=1 type back into the larger vector (resolving to another
10562 // subregister operation). See below for how our VSLIDEUP works. We go via a
10563 // LMUL=1 type to avoid allocating a large register group to hold our
10564 // subvector.
10565 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10566 if (SubVecVT.isFixedLengthVector()) {
10567 // We may get NoSubRegister if inserting at index 0 and the subvec
10568 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10569 if (SubRegIdx == RISCV::NoSubRegister) {
10570 assert(OrigIdx == 0);
10571 return Op;
10572 }
10573
10574 // Use a insert_subvector that will resolve to an insert subreg.
10575 assert(VLen);
10576 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10577 SDValue Insert =
10578 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10579 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10580 if (VecVT.isFixedLengthVector())
10581 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10582 return Insert;
10583 }
10584 return Op;
10585 }
10586
10587 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10588 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10589 // (in our case undisturbed). This means we can set up a subvector insertion
10590 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10591 // size of the subvector.
10592 MVT InterSubVT = ContainerVecVT;
10593 SDValue AlignedExtract = Vec;
10594 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10595 if (SubVecVT.isFixedLengthVector()) {
10596 assert(VLen);
10597 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10598 }
10599 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10600 InterSubVT = getLMUL1VT(ContainerVecVT);
10601 // Extract a subvector equal to the nearest full vector register type. This
10602 // should resolve to a EXTRACT_SUBREG instruction.
10603 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10604 DAG.getVectorIdxConstant(AlignedIdx, DL));
10605 }
10606
10607 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10608 DAG.getUNDEF(InterSubVT), SubVec,
10609 DAG.getVectorIdxConstant(0, DL));
10610
10611 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10612
10613 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10614 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10615
10616 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10618 if (Subtarget.expandVScale(EndIndex) ==
10619 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10620 Policy = RISCVII::TAIL_AGNOSTIC;
10621
10622 // If we're inserting into the lowest elements, use a tail undisturbed
10623 // vmv.v.v.
10624 if (RemIdx.isZero()) {
10625 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10626 SubVec, VL);
10627 } else {
10628 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10629
10630 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10631 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10632
10633 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10634 SlideupAmt, Mask, VL, Policy);
10635 }
10636
10637 // If required, insert this subvector back into the correct vector register.
10638 // This should resolve to an INSERT_SUBREG instruction.
10639 if (ContainerVecVT.bitsGT(InterSubVT))
10640 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10641 DAG.getVectorIdxConstant(AlignedIdx, DL));
10642
10643 if (VecVT.isFixedLengthVector())
10644 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10645
10646 // We might have bitcast from a mask type: cast back to the original type if
10647 // required.
10648 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10649}
10650
10651SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10652 SelectionDAG &DAG) const {
10653 SDValue Vec = Op.getOperand(0);
10654 MVT SubVecVT = Op.getSimpleValueType();
10655 MVT VecVT = Vec.getSimpleValueType();
10656
10657 SDLoc DL(Op);
10658 MVT XLenVT = Subtarget.getXLenVT();
10659 unsigned OrigIdx = Op.getConstantOperandVal(1);
10660 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10661
10662 // With an index of 0 this is a cast-like subvector, which can be performed
10663 // with subregister operations.
10664 if (OrigIdx == 0)
10665 return Op;
10666
10667 // We don't have the ability to slide mask vectors down indexed by their i1
10668 // elements; the smallest we can do is i8. Often we are able to bitcast to
10669 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10670 // from a scalable one, we might not necessarily have enough scalable
10671 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10672 if (SubVecVT.getVectorElementType() == MVT::i1) {
10673 if (VecVT.getVectorMinNumElements() >= 8 &&
10674 SubVecVT.getVectorMinNumElements() >= 8) {
10675 assert(OrigIdx % 8 == 0 && "Invalid index");
10676 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10677 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10678 "Unexpected mask vector lowering");
10679 OrigIdx /= 8;
10680 SubVecVT =
10681 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10682 SubVecVT.isScalableVector());
10683 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10684 VecVT.isScalableVector());
10685 Vec = DAG.getBitcast(VecVT, Vec);
10686 } else {
10687 // We can't slide this mask vector down, indexed by its i1 elements.
10688 // This poses a problem when we wish to extract a scalable vector which
10689 // can't be re-expressed as a larger type. Just choose the slow path and
10690 // extend to a larger type, then truncate back down.
10691 // TODO: We could probably improve this when extracting certain fixed
10692 // from fixed, where we can extract as i8 and shift the correct element
10693 // right to reach the desired subvector?
10694 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10695 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10696 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10697 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10698 Op.getOperand(1));
10699 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10700 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10701 }
10702 }
10703
10704 const auto VLen = Subtarget.getRealVLen();
10705
10706 // If the subvector vector is a fixed-length type and we don't know VLEN
10707 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10708 // don't know which register of a LMUL group contains the specific subvector
10709 // as we only know the minimum register size. Therefore we must slide the
10710 // vector group down the full amount.
10711 if (SubVecVT.isFixedLengthVector() && !VLen) {
10712 MVT ContainerVT = VecVT;
10713 if (VecVT.isFixedLengthVector()) {
10714 ContainerVT = getContainerForFixedLengthVector(VecVT);
10715 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10716 }
10717
10718 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10719 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10720 if (auto ShrunkVT =
10721 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10722 ContainerVT = *ShrunkVT;
10723 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10724 DAG.getVectorIdxConstant(0, DL));
10725 }
10726
10727 SDValue Mask =
10728 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10729 // Set the vector length to only the number of elements we care about. This
10730 // avoids sliding down elements we're going to discard straight away.
10731 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10732 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10733 SDValue Slidedown =
10734 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10735 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10736 // Now we can use a cast-like subvector extract to get the result.
10737 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10738 DAG.getVectorIdxConstant(0, DL));
10739 return DAG.getBitcast(Op.getValueType(), Slidedown);
10740 }
10741
10742 if (VecVT.isFixedLengthVector()) {
10743 VecVT = getContainerForFixedLengthVector(VecVT);
10744 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10745 }
10746
10747 MVT ContainerSubVecVT = SubVecVT;
10748 if (SubVecVT.isFixedLengthVector())
10749 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10750
10751 unsigned SubRegIdx;
10752 ElementCount RemIdx;
10753 // extract_subvector scales the index by vscale if the subvector is scalable,
10754 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10755 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10756 if (SubVecVT.isFixedLengthVector()) {
10757 assert(VLen);
10758 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10759 auto Decompose =
10761 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10762 SubRegIdx = Decompose.first;
10763 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10764 (OrigIdx % Vscale));
10765 } else {
10766 auto Decompose =
10768 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10769 SubRegIdx = Decompose.first;
10770 RemIdx = ElementCount::getScalable(Decompose.second);
10771 }
10772
10773 // If the Idx has been completely eliminated then this is a subvector extract
10774 // which naturally aligns to a vector register. These can easily be handled
10775 // using subregister manipulation. We use an extract_subvector that will
10776 // resolve to an extract subreg.
10777 if (RemIdx.isZero()) {
10778 if (SubVecVT.isFixedLengthVector()) {
10779 assert(VLen);
10780 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10781 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10782 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10783 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10784 }
10785 return Op;
10786 }
10787
10788 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10789 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10790 // divide exactly.
10791 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10792 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10793
10794 // If the vector type is an LMUL-group type, extract a subvector equal to the
10795 // nearest full vector register type.
10796 MVT InterSubVT = VecVT;
10797 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10798 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10799 // we should have successfully decomposed the extract into a subregister.
10800 // We use an extract_subvector that will resolve to a subreg extract.
10801 assert(SubRegIdx != RISCV::NoSubRegister);
10802 (void)SubRegIdx;
10803 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10804 if (SubVecVT.isFixedLengthVector()) {
10805 assert(VLen);
10806 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10807 }
10808 InterSubVT = getLMUL1VT(VecVT);
10809 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10810 DAG.getConstant(Idx, DL, XLenVT));
10811 }
10812
10813 // Slide this vector register down by the desired number of elements in order
10814 // to place the desired subvector starting at element 0.
10815 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10816 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10817 if (SubVecVT.isFixedLengthVector())
10818 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10819 SDValue Slidedown =
10820 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10821 Vec, SlidedownAmt, Mask, VL);
10822
10823 // Now the vector is in the right position, extract our final subvector. This
10824 // should resolve to a COPY.
10825 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10826 DAG.getVectorIdxConstant(0, DL));
10827
10828 // We might have bitcast from a mask type: cast back to the original type if
10829 // required.
10830 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10831}
10832
10833// Widen a vector's operands to i8, then truncate its results back to the
10834// original type, typically i1. All operand and result types must be the same.
10836 SelectionDAG &DAG) {
10837 MVT VT = N.getSimpleValueType();
10838 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10840 for (SDValue Op : N->ops()) {
10841 assert(Op.getSimpleValueType() == VT &&
10842 "Operands and result must be same type");
10843 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10844 }
10845
10846 unsigned NumVals = N->getNumValues();
10847
10849 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10850 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10851 SmallVector<SDValue, 4> TruncVals;
10852 for (unsigned I = 0; I < NumVals; I++) {
10853 TruncVals.push_back(
10854 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10855 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10856 }
10857
10858 if (TruncVals.size() > 1)
10859 return DAG.getMergeValues(TruncVals, DL);
10860 return TruncVals.front();
10861}
10862
10863SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10864 SelectionDAG &DAG) const {
10865 SDLoc DL(Op);
10866 MVT VecVT = Op.getSimpleValueType();
10867
10868 assert(VecVT.isScalableVector() &&
10869 "vector_interleave on non-scalable vector!");
10870
10871 // 1 bit element vectors need to be widened to e8
10872 if (VecVT.getVectorElementType() == MVT::i1)
10873 return widenVectorOpsToi8(Op, DL, DAG);
10874
10875 // If the VT is LMUL=8, we need to split and reassemble.
10876 if (VecVT.getSizeInBits().getKnownMinValue() ==
10877 (8 * RISCV::RVVBitsPerBlock)) {
10878 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10879 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10880 EVT SplitVT = Op0Lo.getValueType();
10881
10883 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10885 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10886
10887 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10888 ResLo.getValue(0), ResHi.getValue(0));
10889 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10890 ResHi.getValue(1));
10891 return DAG.getMergeValues({Even, Odd}, DL);
10892 }
10893
10894 // Concatenate the two vectors as one vector to deinterleave
10895 MVT ConcatVT =
10898 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10899 Op.getOperand(0), Op.getOperand(1));
10900
10901 // We can deinterleave through vnsrl.wi if the element type is smaller than
10902 // ELEN
10903 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10904 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
10905 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
10906 return DAG.getMergeValues({Even, Odd}, DL);
10907 }
10908
10909 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
10910 // possibly mask vector, then extract the required subvector. Doing this
10911 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
10912 // creation to be rematerialized during register allocation to reduce
10913 // register pressure if needed.
10914
10915 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
10916
10917 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
10918 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
10919 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
10920 DAG.getVectorIdxConstant(0, DL));
10921
10922 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
10923 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
10924 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
10925 DAG.getVectorIdxConstant(0, DL));
10926
10927 // vcompress the even and odd elements into two separate vectors
10928 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
10929 EvenMask, DAG.getUNDEF(ConcatVT));
10930 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
10931 OddMask, DAG.getUNDEF(ConcatVT));
10932
10933 // Extract the result half of the gather for even and odd
10934 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10935 DAG.getVectorIdxConstant(0, DL));
10936 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10937 DAG.getVectorIdxConstant(0, DL));
10938
10939 return DAG.getMergeValues({Even, Odd}, DL);
10940}
10941
10942SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10943 SelectionDAG &DAG) const {
10944 SDLoc DL(Op);
10945 MVT VecVT = Op.getSimpleValueType();
10946
10947 assert(VecVT.isScalableVector() &&
10948 "vector_interleave on non-scalable vector!");
10949
10950 // i1 vectors need to be widened to i8
10951 if (VecVT.getVectorElementType() == MVT::i1)
10952 return widenVectorOpsToi8(Op, DL, DAG);
10953
10954 MVT XLenVT = Subtarget.getXLenVT();
10955 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10956
10957 // If the VT is LMUL=8, we need to split and reassemble.
10958 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10959 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10960 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10961 EVT SplitVT = Op0Lo.getValueType();
10962
10964 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10966 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10967
10968 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10969 ResLo.getValue(0), ResLo.getValue(1));
10970 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10971 ResHi.getValue(0), ResHi.getValue(1));
10972 return DAG.getMergeValues({Lo, Hi}, DL);
10973 }
10974
10975 SDValue Interleaved;
10976
10977 // If the element type is smaller than ELEN, then we can interleave with
10978 // vwaddu.vv and vwmaccu.vx
10979 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10980 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10981 DAG, Subtarget);
10982 } else {
10983 // Otherwise, fallback to using vrgathere16.vv
10984 MVT ConcatVT =
10987 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10988 Op.getOperand(0), Op.getOperand(1));
10989
10990 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10991
10992 // 0 1 2 3 4 5 6 7 ...
10993 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10994
10995 // 1 1 1 1 1 1 1 1 ...
10996 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10997
10998 // 1 0 1 0 1 0 1 0 ...
10999 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11000 OddMask = DAG.getSetCC(
11001 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11002 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11004
11005 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11006
11007 // Build up the index vector for interleaving the concatenated vector
11008 // 0 0 1 1 2 2 3 3 ...
11009 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11010 // 0 n 1 n+1 2 n+2 3 n+3 ...
11011 Idx =
11012 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11013
11014 // Then perform the interleave
11015 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11016 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11017 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11018 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11019 }
11020
11021 // Extract the two halves from the interleaved result
11022 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11023 DAG.getVectorIdxConstant(0, DL));
11024 SDValue Hi = DAG.getNode(
11025 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11027
11028 return DAG.getMergeValues({Lo, Hi}, DL);
11029}
11030
11031// Lower step_vector to the vid instruction. Any non-identity step value must
11032// be accounted for my manual expansion.
11033SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11034 SelectionDAG &DAG) const {
11035 SDLoc DL(Op);
11036 MVT VT = Op.getSimpleValueType();
11037 assert(VT.isScalableVector() && "Expected scalable vector");
11038 MVT XLenVT = Subtarget.getXLenVT();
11039 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11040 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11041 uint64_t StepValImm = Op.getConstantOperandVal(0);
11042 if (StepValImm != 1) {
11043 if (isPowerOf2_64(StepValImm)) {
11044 SDValue StepVal =
11045 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11046 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11047 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11048 } else {
11049 SDValue StepVal = lowerScalarSplat(
11050 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11051 VL, VT, DL, DAG, Subtarget);
11052 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11053 }
11054 }
11055 return StepVec;
11056}
11057
11058// Implement vector_reverse using vrgather.vv with indices determined by
11059// subtracting the id of each element from (VLMAX-1). This will convert
11060// the indices like so:
11061// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11062// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11063SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11064 SelectionDAG &DAG) const {
11065 SDLoc DL(Op);
11066 MVT VecVT = Op.getSimpleValueType();
11067 if (VecVT.getVectorElementType() == MVT::i1) {
11068 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11069 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11070 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11071 return DAG.getSetCC(DL, VecVT, Op2,
11072 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11073 }
11074
11075 MVT ContainerVT = VecVT;
11076 SDValue Vec = Op.getOperand(0);
11077 if (VecVT.isFixedLengthVector()) {
11078 ContainerVT = getContainerForFixedLengthVector(VecVT);
11079 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11080 }
11081
11082 MVT XLenVT = Subtarget.getXLenVT();
11083 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11084
11085 // On some uarchs vrgather.vv will read from every input register for each
11086 // output register, regardless of the indices. However to reverse a vector
11087 // each output register only needs to read from one register. So decompose it
11088 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11089 // O(LMUL^2).
11090 //
11091 // vsetvli a1, zero, e64, m4, ta, ma
11092 // vrgatherei16.vv v12, v8, v16
11093 // ->
11094 // vsetvli a1, zero, e64, m1, ta, ma
11095 // vrgather.vv v15, v8, v16
11096 // vrgather.vv v14, v9, v16
11097 // vrgather.vv v13, v10, v16
11098 // vrgather.vv v12, v11, v16
11099 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11100 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11101 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11102 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11103 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11104 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11105
11106 // Fixed length vectors might not fit exactly into their container, and so
11107 // leave a gap in the front of the vector after being reversed. Slide this
11108 // away.
11109 //
11110 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11111 // 0 1 2 3 x x x x <- reverse
11112 // x x x x 0 1 2 3 <- vslidedown.vx
11113 if (VecVT.isFixedLengthVector()) {
11114 SDValue Offset = DAG.getNode(
11115 ISD::SUB, DL, XLenVT,
11116 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11117 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11118 Concat =
11119 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11120 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11121 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11122 }
11123 return Concat;
11124 }
11125
11126 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11127 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11128 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11129 unsigned MaxVLMAX =
11130 VecVT.isFixedLengthVector()
11131 ? VecVT.getVectorNumElements()
11132 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11133
11134 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11135 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11136
11137 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11138 // to use vrgatherei16.vv.
11139 if (MaxVLMAX > 256 && EltSize == 8) {
11140 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11141 // Reverse each half, then reassemble them in reverse order.
11142 // NOTE: It's also possible that after splitting that VLMAX no longer
11143 // requires vrgatherei16.vv.
11144 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11145 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11146 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11147 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11148 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11149 // Reassemble the low and high pieces reversed.
11150 // FIXME: This is a CONCAT_VECTORS.
11151 SDValue Res =
11152 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11153 DAG.getVectorIdxConstant(0, DL));
11154 return DAG.getNode(
11155 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11156 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11157 }
11158
11159 // Just promote the int type to i16 which will double the LMUL.
11160 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11161 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11162 }
11163
11164 // At LMUL > 1, do the index computation in 16 bits to reduce register
11165 // pressure.
11166 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11167 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11168 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11169 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11170 IntVT = IntVT.changeVectorElementType(MVT::i16);
11171 }
11172
11173 // Calculate VLMAX-1 for the desired SEW.
11174 SDValue VLMinus1 = DAG.getNode(
11175 ISD::SUB, DL, XLenVT,
11176 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11177 DAG.getConstant(1, DL, XLenVT));
11178
11179 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11180 bool IsRV32E64 =
11181 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11182 SDValue SplatVL;
11183 if (!IsRV32E64)
11184 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11185 else
11186 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11187 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11188
11189 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11190 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11191 DAG.getUNDEF(IntVT), Mask, VL);
11192
11193 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11194 DAG.getUNDEF(ContainerVT), Mask, VL);
11195 if (VecVT.isFixedLengthVector())
11196 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11197 return Gather;
11198}
11199
11200SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11201 SelectionDAG &DAG) const {
11202 SDLoc DL(Op);
11203 SDValue V1 = Op.getOperand(0);
11204 SDValue V2 = Op.getOperand(1);
11205 MVT XLenVT = Subtarget.getXLenVT();
11206 MVT VecVT = Op.getSimpleValueType();
11207
11208 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11209
11210 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11211 SDValue DownOffset, UpOffset;
11212 if (ImmValue >= 0) {
11213 // The operand is a TargetConstant, we need to rebuild it as a regular
11214 // constant.
11215 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11216 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11217 } else {
11218 // The operand is a TargetConstant, we need to rebuild it as a regular
11219 // constant rather than negating the original operand.
11220 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11221 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11222 }
11223
11224 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11225
11226 SDValue SlideDown =
11227 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11228 DownOffset, TrueMask, UpOffset);
11229 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11230 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11232}
11233
11234SDValue
11235RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11236 SelectionDAG &DAG) const {
11237 SDLoc DL(Op);
11238 auto *Load = cast<LoadSDNode>(Op);
11239
11241 Load->getMemoryVT(),
11242 *Load->getMemOperand()) &&
11243 "Expecting a correctly-aligned load");
11244
11245 MVT VT = Op.getSimpleValueType();
11246 MVT XLenVT = Subtarget.getXLenVT();
11247 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11248
11249 // If we know the exact VLEN and our fixed length vector completely fills
11250 // the container, use a whole register load instead.
11251 const auto [MinVLMAX, MaxVLMAX] =
11252 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11253 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11254 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11255 MachineMemOperand *MMO = Load->getMemOperand();
11256 SDValue NewLoad =
11257 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11258 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11259 MMO->getAAInfo(), MMO->getRanges());
11260 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11261 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11262 }
11263
11264 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11265
11266 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11267 SDValue IntID = DAG.getTargetConstant(
11268 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11269 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11270 if (!IsMaskOp)
11271 Ops.push_back(DAG.getUNDEF(ContainerVT));
11272 Ops.push_back(Load->getBasePtr());
11273 Ops.push_back(VL);
11274 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11275 SDValue NewLoad =
11277 Load->getMemoryVT(), Load->getMemOperand());
11278
11279 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11280 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11281}
11282
11283SDValue
11284RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11285 SelectionDAG &DAG) const {
11286 SDLoc DL(Op);
11287 auto *Store = cast<StoreSDNode>(Op);
11288
11290 Store->getMemoryVT(),
11291 *Store->getMemOperand()) &&
11292 "Expecting a correctly-aligned store");
11293
11294 SDValue StoreVal = Store->getValue();
11295 MVT VT = StoreVal.getSimpleValueType();
11296 MVT XLenVT = Subtarget.getXLenVT();
11297
11298 // If the size less than a byte, we need to pad with zeros to make a byte.
11299 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11300 VT = MVT::v8i1;
11301 StoreVal =
11302 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11303 StoreVal, DAG.getVectorIdxConstant(0, DL));
11304 }
11305
11306 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11307
11308 SDValue NewValue =
11309 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11310
11311 // If we know the exact VLEN and our fixed length vector completely fills
11312 // the container, use a whole register store instead.
11313 const auto [MinVLMAX, MaxVLMAX] =
11314 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11315 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11316 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11317 MachineMemOperand *MMO = Store->getMemOperand();
11318 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11319 MMO->getPointerInfo(), MMO->getBaseAlign(),
11320 MMO->getFlags(), MMO->getAAInfo());
11321 }
11322
11323 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11324
11325 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11326 SDValue IntID = DAG.getTargetConstant(
11327 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11328 return DAG.getMemIntrinsicNode(
11329 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11330 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11331 Store->getMemoryVT(), Store->getMemOperand());
11332}
11333
11334SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11335 SelectionDAG &DAG) const {
11336 SDLoc DL(Op);
11337 MVT VT = Op.getSimpleValueType();
11338
11339 const auto *MemSD = cast<MemSDNode>(Op);
11340 EVT MemVT = MemSD->getMemoryVT();
11341 MachineMemOperand *MMO = MemSD->getMemOperand();
11342 SDValue Chain = MemSD->getChain();
11343 SDValue BasePtr = MemSD->getBasePtr();
11344
11345 SDValue Mask, PassThru, VL;
11346 bool IsExpandingLoad = false;
11347 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11348 Mask = VPLoad->getMask();
11349 PassThru = DAG.getUNDEF(VT);
11350 VL = VPLoad->getVectorLength();
11351 } else {
11352 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11353 Mask = MLoad->getMask();
11354 PassThru = MLoad->getPassThru();
11355 IsExpandingLoad = MLoad->isExpandingLoad();
11356 }
11357
11358 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11359
11360 MVT XLenVT = Subtarget.getXLenVT();
11361
11362 MVT ContainerVT = VT;
11363 if (VT.isFixedLengthVector()) {
11364 ContainerVT = getContainerForFixedLengthVector(VT);
11365 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11366 if (!IsUnmasked) {
11367 MVT MaskVT = getMaskTypeFor(ContainerVT);
11368 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11369 }
11370 }
11371
11372 if (!VL)
11373 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11374
11375 SDValue ExpandingVL;
11376 if (!IsUnmasked && IsExpandingLoad) {
11377 ExpandingVL = VL;
11378 VL =
11379 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11380 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11381 }
11382
11383 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11384 : Intrinsic::riscv_vle_mask;
11385 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11386 if (IntID == Intrinsic::riscv_vle)
11387 Ops.push_back(DAG.getUNDEF(ContainerVT));
11388 else
11389 Ops.push_back(PassThru);
11390 Ops.push_back(BasePtr);
11391 if (IntID == Intrinsic::riscv_vle_mask)
11392 Ops.push_back(Mask);
11393 Ops.push_back(VL);
11394 if (IntID == Intrinsic::riscv_vle_mask)
11395 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11396
11397 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11398
11399 SDValue Result =
11400 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11401 Chain = Result.getValue(1);
11402 if (ExpandingVL) {
11403 MVT IndexVT = ContainerVT;
11404 if (ContainerVT.isFloatingPoint())
11405 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11406
11407 MVT IndexEltVT = IndexVT.getVectorElementType();
11408 bool UseVRGATHEREI16 = false;
11409 // If index vector is an i8 vector and the element count exceeds 256, we
11410 // should change the element type of index vector to i16 to avoid
11411 // overflow.
11412 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11413 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11414 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11415 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11416 UseVRGATHEREI16 = true;
11417 }
11418
11419 SDValue Iota =
11420 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11421 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11422 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11423 Result =
11424 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11426 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11427 }
11428
11429 if (VT.isFixedLengthVector())
11430 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11431
11432 return DAG.getMergeValues({Result, Chain}, DL);
11433}
11434
11435SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11436 SelectionDAG &DAG) const {
11437 SDLoc DL(Op);
11438
11439 const auto *MemSD = cast<MemSDNode>(Op);
11440 EVT MemVT = MemSD->getMemoryVT();
11441 MachineMemOperand *MMO = MemSD->getMemOperand();
11442 SDValue Chain = MemSD->getChain();
11443 SDValue BasePtr = MemSD->getBasePtr();
11444 SDValue Val, Mask, VL;
11445
11446 bool IsCompressingStore = false;
11447 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11448 Val = VPStore->getValue();
11449 Mask = VPStore->getMask();
11450 VL = VPStore->getVectorLength();
11451 } else {
11452 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11453 Val = MStore->getValue();
11454 Mask = MStore->getMask();
11455 IsCompressingStore = MStore->isCompressingStore();
11456 }
11457
11458 bool IsUnmasked =
11459 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11460
11461 MVT VT = Val.getSimpleValueType();
11462 MVT XLenVT = Subtarget.getXLenVT();
11463
11464 MVT ContainerVT = VT;
11465 if (VT.isFixedLengthVector()) {
11466 ContainerVT = getContainerForFixedLengthVector(VT);
11467
11468 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11469 if (!IsUnmasked || IsCompressingStore) {
11470 MVT MaskVT = getMaskTypeFor(ContainerVT);
11471 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11472 }
11473 }
11474
11475 if (!VL)
11476 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11477
11478 if (IsCompressingStore) {
11479 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11480 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11481 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11482 VL =
11483 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11484 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11485 }
11486
11487 unsigned IntID =
11488 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11489 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11490 Ops.push_back(Val);
11491 Ops.push_back(BasePtr);
11492 if (!IsUnmasked)
11493 Ops.push_back(Mask);
11494 Ops.push_back(VL);
11495
11497 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11498}
11499
11500SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11501 SelectionDAG &DAG) const {
11502 SDLoc DL(Op);
11503 SDValue Val = Op.getOperand(0);
11504 SDValue Mask = Op.getOperand(1);
11505 SDValue Passthru = Op.getOperand(2);
11506
11507 MVT VT = Val.getSimpleValueType();
11508 MVT XLenVT = Subtarget.getXLenVT();
11509 MVT ContainerVT = VT;
11510 if (VT.isFixedLengthVector()) {
11511 ContainerVT = getContainerForFixedLengthVector(VT);
11512 MVT MaskVT = getMaskTypeFor(ContainerVT);
11513 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11514 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11515 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11516 }
11517
11518 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11519 SDValue Res =
11520 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11521 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11522 Passthru, Val, Mask, VL);
11523
11524 if (VT.isFixedLengthVector())
11525 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11526
11527 return Res;
11528}
11529
11530SDValue
11531RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11532 SelectionDAG &DAG) const {
11533 MVT InVT = Op.getOperand(0).getSimpleValueType();
11534 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11535
11536 MVT VT = Op.getSimpleValueType();
11537
11538 SDValue Op1 =
11539 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11540 SDValue Op2 =
11541 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11542
11543 SDLoc DL(Op);
11544 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11545 DAG, Subtarget);
11546 MVT MaskVT = getMaskTypeFor(ContainerVT);
11547
11548 SDValue Cmp =
11549 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11550 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11551
11552 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11553}
11554
11555SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11556 SelectionDAG &DAG) const {
11557 unsigned Opc = Op.getOpcode();
11558 SDLoc DL(Op);
11559 SDValue Chain = Op.getOperand(0);
11560 SDValue Op1 = Op.getOperand(1);
11561 SDValue Op2 = Op.getOperand(2);
11562 SDValue CC = Op.getOperand(3);
11563 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11564 MVT VT = Op.getSimpleValueType();
11565 MVT InVT = Op1.getSimpleValueType();
11566
11567 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11568 // condition code.
11569 if (Opc == ISD::STRICT_FSETCCS) {
11570 // Expand strict_fsetccs(x, oeq) to
11571 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11572 SDVTList VTList = Op->getVTList();
11573 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11574 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11575 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11576 Op2, OLECCVal);
11577 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11578 Op1, OLECCVal);
11579 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11580 Tmp1.getValue(1), Tmp2.getValue(1));
11581 // Tmp1 and Tmp2 might be the same node.
11582 if (Tmp1 != Tmp2)
11583 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11584 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11585 }
11586
11587 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11588 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11589 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11590 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11591 Op2, OEQCCVal);
11592 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11593 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11594 }
11595 }
11596
11597 MVT ContainerInVT = InVT;
11598 if (InVT.isFixedLengthVector()) {
11599 ContainerInVT = getContainerForFixedLengthVector(InVT);
11600 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11601 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11602 }
11603 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11604
11605 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11606
11607 SDValue Res;
11608 if (Opc == ISD::STRICT_FSETCC &&
11609 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11610 CCVal == ISD::SETOLE)) {
11611 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11612 // active when both input elements are ordered.
11613 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11614 SDValue OrderMask1 = DAG.getNode(
11615 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11616 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11617 True, VL});
11618 SDValue OrderMask2 = DAG.getNode(
11619 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11620 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11621 True, VL});
11622 Mask =
11623 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11624 // Use Mask as the passthru operand to let the result be 0 if either of the
11625 // inputs is unordered.
11627 DAG.getVTList(MaskVT, MVT::Other),
11628 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11629 } else {
11630 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11632 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11633 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11634 }
11635
11636 if (VT.isFixedLengthVector()) {
11637 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11638 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11639 }
11640 return Res;
11641}
11642
11643// Lower vector ABS to smax(X, sub(0, X)).
11644SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11645 SDLoc DL(Op);
11646 MVT VT = Op.getSimpleValueType();
11647 SDValue X = Op.getOperand(0);
11648
11649 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11650 "Unexpected type for ISD::ABS");
11651
11652 MVT ContainerVT = VT;
11653 if (VT.isFixedLengthVector()) {
11654 ContainerVT = getContainerForFixedLengthVector(VT);
11655 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11656 }
11657
11658 SDValue Mask, VL;
11659 if (Op->getOpcode() == ISD::VP_ABS) {
11660 Mask = Op->getOperand(1);
11661 if (VT.isFixedLengthVector())
11662 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11663 Subtarget);
11664 VL = Op->getOperand(2);
11665 } else
11666 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11667
11668 SDValue SplatZero = DAG.getNode(
11669 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11670 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11671 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11672 DAG.getUNDEF(ContainerVT), Mask, VL);
11673 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11674 DAG.getUNDEF(ContainerVT), Mask, VL);
11675
11676 if (VT.isFixedLengthVector())
11677 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11678 return Max;
11679}
11680
11681SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11682 SDValue Op, SelectionDAG &DAG) const {
11683 SDLoc DL(Op);
11684 MVT VT = Op.getSimpleValueType();
11685 SDValue Mag = Op.getOperand(0);
11686 SDValue Sign = Op.getOperand(1);
11687 assert(Mag.getValueType() == Sign.getValueType() &&
11688 "Can only handle COPYSIGN with matching types.");
11689
11690 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11691 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11692 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11693
11694 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11695
11696 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11697 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11698
11699 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11700}
11701
11702SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11703 SDValue Op, SelectionDAG &DAG) const {
11704 MVT VT = Op.getSimpleValueType();
11705 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11706
11707 MVT I1ContainerVT =
11708 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11709
11710 SDValue CC =
11711 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11712 SDValue Op1 =
11713 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11714 SDValue Op2 =
11715 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11716
11717 SDLoc DL(Op);
11718 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11719
11720 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11721 Op2, DAG.getUNDEF(ContainerVT), VL);
11722
11723 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11724}
11725
11726SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11727 SelectionDAG &DAG) const {
11728 unsigned NewOpc = getRISCVVLOp(Op);
11729 bool HasPassthruOp = hasPassthruOp(NewOpc);
11730 bool HasMask = hasMaskOp(NewOpc);
11731
11732 MVT VT = Op.getSimpleValueType();
11733 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11734
11735 // Create list of operands by converting existing ones to scalable types.
11737 for (const SDValue &V : Op->op_values()) {
11738 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11739
11740 // Pass through non-vector operands.
11741 if (!V.getValueType().isVector()) {
11742 Ops.push_back(V);
11743 continue;
11744 }
11745
11746 // "cast" fixed length vector to a scalable vector.
11747 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11748 "Only fixed length vectors are supported!");
11749 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11750 }
11751
11752 SDLoc DL(Op);
11753 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11754 if (HasPassthruOp)
11755 Ops.push_back(DAG.getUNDEF(ContainerVT));
11756 if (HasMask)
11757 Ops.push_back(Mask);
11758 Ops.push_back(VL);
11759
11760 // StrictFP operations have two result values. Their lowered result should
11761 // have same result count.
11762 if (Op->isStrictFPOpcode()) {
11763 SDValue ScalableRes =
11764 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11765 Op->getFlags());
11766 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11767 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11768 }
11769
11770 SDValue ScalableRes =
11771 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11772 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11773}
11774
11775// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11776// * Operands of each node are assumed to be in the same order.
11777// * The EVL operand is promoted from i32 to i64 on RV64.
11778// * Fixed-length vectors are converted to their scalable-vector container
11779// types.
11780SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11781 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11782 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11783
11784 SDLoc DL(Op);
11785 MVT VT = Op.getSimpleValueType();
11787
11788 MVT ContainerVT = VT;
11789 if (VT.isFixedLengthVector())
11790 ContainerVT = getContainerForFixedLengthVector(VT);
11791
11792 for (const auto &OpIdx : enumerate(Op->ops())) {
11793 SDValue V = OpIdx.value();
11794 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11795 // Add dummy passthru value before the mask. Or if there isn't a mask,
11796 // before EVL.
11797 if (HasPassthruOp) {
11798 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11799 if (MaskIdx) {
11800 if (*MaskIdx == OpIdx.index())
11801 Ops.push_back(DAG.getUNDEF(ContainerVT));
11802 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11803 OpIdx.index()) {
11804 if (Op.getOpcode() == ISD::VP_MERGE) {
11805 // For VP_MERGE, copy the false operand instead of an undef value.
11806 Ops.push_back(Ops.back());
11807 } else {
11808 assert(Op.getOpcode() == ISD::VP_SELECT);
11809 // For VP_SELECT, add an undef value.
11810 Ops.push_back(DAG.getUNDEF(ContainerVT));
11811 }
11812 }
11813 }
11814 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11815 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11816 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11818 Subtarget.getXLenVT()));
11819 // Pass through operands which aren't fixed-length vectors.
11820 if (!V.getValueType().isFixedLengthVector()) {
11821 Ops.push_back(V);
11822 continue;
11823 }
11824 // "cast" fixed length vector to a scalable vector.
11825 MVT OpVT = V.getSimpleValueType();
11826 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11827 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11828 "Only fixed length vectors are supported!");
11829 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11830 }
11831
11832 if (!VT.isFixedLengthVector())
11833 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11834
11835 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11836
11837 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11838}
11839
11840SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11841 SelectionDAG &DAG) const {
11842 SDLoc DL(Op);
11843 MVT VT = Op.getSimpleValueType();
11844
11845 SDValue Src = Op.getOperand(0);
11846 // NOTE: Mask is dropped.
11847 SDValue VL = Op.getOperand(2);
11848
11849 MVT ContainerVT = VT;
11850 if (VT.isFixedLengthVector()) {
11851 ContainerVT = getContainerForFixedLengthVector(VT);
11852 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11853 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11854 }
11855
11856 MVT XLenVT = Subtarget.getXLenVT();
11857 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11858 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11859 DAG.getUNDEF(ContainerVT), Zero, VL);
11860
11861 SDValue SplatValue = DAG.getSignedConstant(
11862 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11863 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11864 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11865
11866 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11867 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11868 if (!VT.isFixedLengthVector())
11869 return Result;
11870 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11871}
11872
11873SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11874 SelectionDAG &DAG) const {
11875 SDLoc DL(Op);
11876 MVT VT = Op.getSimpleValueType();
11877
11878 SDValue Op1 = Op.getOperand(0);
11879 SDValue Op2 = Op.getOperand(1);
11880 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11881 // NOTE: Mask is dropped.
11882 SDValue VL = Op.getOperand(4);
11883
11884 MVT ContainerVT = VT;
11885 if (VT.isFixedLengthVector()) {
11886 ContainerVT = getContainerForFixedLengthVector(VT);
11887 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11888 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11889 }
11890
11892 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11893
11894 switch (Condition) {
11895 default:
11896 break;
11897 // X != Y --> (X^Y)
11898 case ISD::SETNE:
11899 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11900 break;
11901 // X == Y --> ~(X^Y)
11902 case ISD::SETEQ: {
11903 SDValue Temp =
11904 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11905 Result =
11906 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11907 break;
11908 }
11909 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11910 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11911 case ISD::SETGT:
11912 case ISD::SETULT: {
11913 SDValue Temp =
11914 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11915 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11916 break;
11917 }
11918 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11919 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11920 case ISD::SETLT:
11921 case ISD::SETUGT: {
11922 SDValue Temp =
11923 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11924 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11925 break;
11926 }
11927 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11928 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11929 case ISD::SETGE:
11930 case ISD::SETULE: {
11931 SDValue Temp =
11932 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11933 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11934 break;
11935 }
11936 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11937 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11938 case ISD::SETLE:
11939 case ISD::SETUGE: {
11940 SDValue Temp =
11941 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11942 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11943 break;
11944 }
11945 }
11946
11947 if (!VT.isFixedLengthVector())
11948 return Result;
11949 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11950}
11951
11952// Lower Floating-Point/Integer Type-Convert VP SDNodes
11953SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11954 SelectionDAG &DAG) const {
11955 SDLoc DL(Op);
11956
11957 SDValue Src = Op.getOperand(0);
11958 SDValue Mask = Op.getOperand(1);
11959 SDValue VL = Op.getOperand(2);
11960 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11961
11962 MVT DstVT = Op.getSimpleValueType();
11963 MVT SrcVT = Src.getSimpleValueType();
11964 if (DstVT.isFixedLengthVector()) {
11965 DstVT = getContainerForFixedLengthVector(DstVT);
11966 SrcVT = getContainerForFixedLengthVector(SrcVT);
11967 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11968 MVT MaskVT = getMaskTypeFor(DstVT);
11969 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11970 }
11971
11972 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11973 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11974
11976 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11977 if (SrcVT.isInteger()) {
11978 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11979
11980 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11983
11984 // Do we need to do any pre-widening before converting?
11985 if (SrcEltSize == 1) {
11986 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11987 MVT XLenVT = Subtarget.getXLenVT();
11988 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11989 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11990 DAG.getUNDEF(IntVT), Zero, VL);
11991 SDValue One = DAG.getSignedConstant(
11992 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11993 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11994 DAG.getUNDEF(IntVT), One, VL);
11995 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11996 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11997 } else if (DstEltSize > (2 * SrcEltSize)) {
11998 // Widen before converting.
11999 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12000 DstVT.getVectorElementCount());
12001 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12002 }
12003
12004 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12005 } else {
12006 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12007 "Wrong input/output vector types");
12008
12009 // Convert f16 to f32 then convert f32 to i64.
12010 if (DstEltSize > (2 * SrcEltSize)) {
12011 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12012 MVT InterimFVT =
12013 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12014 Src =
12015 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12016 }
12017
12018 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12019 }
12020 } else { // Narrowing + Conversion
12021 if (SrcVT.isInteger()) {
12022 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12023 // First do a narrowing convert to an FP type half the size, then round
12024 // the FP type to a small FP type if needed.
12025
12026 MVT InterimFVT = DstVT;
12027 if (SrcEltSize > (2 * DstEltSize)) {
12028 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12029 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12030 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12031 }
12032
12033 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12034
12035 if (InterimFVT != DstVT) {
12036 Src = Result;
12037 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12038 }
12039 } else {
12040 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12041 "Wrong input/output vector types");
12042 // First do a narrowing conversion to an integer half the size, then
12043 // truncate if needed.
12044
12045 if (DstEltSize == 1) {
12046 // First convert to the same size integer, then convert to mask using
12047 // setcc.
12048 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12049 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12050 DstVT.getVectorElementCount());
12051 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12052
12053 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12054 // otherwise the conversion was undefined.
12055 MVT XLenVT = Subtarget.getXLenVT();
12056 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12057 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12058 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12059 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12060 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12061 DAG.getUNDEF(DstVT), Mask, VL});
12062 } else {
12063 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12064 DstVT.getVectorElementCount());
12065
12066 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12067
12068 while (InterimIVT != DstVT) {
12069 SrcEltSize /= 2;
12070 Src = Result;
12071 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12072 DstVT.getVectorElementCount());
12073 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12074 Src, Mask, VL);
12075 }
12076 }
12077 }
12078 }
12079
12080 MVT VT = Op.getSimpleValueType();
12081 if (!VT.isFixedLengthVector())
12082 return Result;
12083 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12084}
12085
12086SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12087 SelectionDAG &DAG) const {
12088 SDLoc DL(Op);
12089 MVT VT = Op.getSimpleValueType();
12090 MVT XLenVT = Subtarget.getXLenVT();
12091
12092 SDValue Mask = Op.getOperand(0);
12093 SDValue TrueVal = Op.getOperand(1);
12094 SDValue FalseVal = Op.getOperand(2);
12095 SDValue VL = Op.getOperand(3);
12096
12097 // Use default legalization if a vector of EVL type would be legal.
12098 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12100 if (isTypeLegal(EVLVecVT))
12101 return SDValue();
12102
12103 MVT ContainerVT = VT;
12104 if (VT.isFixedLengthVector()) {
12105 ContainerVT = getContainerForFixedLengthVector(VT);
12106 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12107 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12108 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12109 }
12110
12111 // Promote to a vector of i8.
12112 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12113
12114 // Promote TrueVal and FalseVal using VLMax.
12115 // FIXME: Is there a better way to do this?
12116 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12117 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12118 DAG.getUNDEF(PromotedVT),
12119 DAG.getConstant(1, DL, XLenVT), VLMax);
12120 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12121 DAG.getUNDEF(PromotedVT),
12122 DAG.getConstant(0, DL, XLenVT), VLMax);
12123 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12124 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12125 // Any element past VL uses FalseVal, so use VLMax
12126 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12127 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12128
12129 // VP_MERGE the two promoted values.
12130 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12131 TrueVal, FalseVal, FalseVal, VL);
12132
12133 // Convert back to mask.
12134 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12135 SDValue Result = DAG.getNode(
12136 RISCVISD::SETCC_VL, DL, ContainerVT,
12137 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12138 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12139
12140 if (VT.isFixedLengthVector())
12141 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12142 return Result;
12143}
12144
12145SDValue
12146RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12147 SelectionDAG &DAG) const {
12148 SDLoc DL(Op);
12149
12150 SDValue Op1 = Op.getOperand(0);
12151 SDValue Op2 = Op.getOperand(1);
12152 SDValue Offset = Op.getOperand(2);
12153 SDValue Mask = Op.getOperand(3);
12154 SDValue EVL1 = Op.getOperand(4);
12155 SDValue EVL2 = Op.getOperand(5);
12156
12157 const MVT XLenVT = Subtarget.getXLenVT();
12158 MVT VT = Op.getSimpleValueType();
12159 MVT ContainerVT = VT;
12160 if (VT.isFixedLengthVector()) {
12161 ContainerVT = getContainerForFixedLengthVector(VT);
12162 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12163 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12164 MVT MaskVT = getMaskTypeFor(ContainerVT);
12165 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12166 }
12167
12168 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12169 if (IsMaskVector) {
12170 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12171
12172 // Expand input operands
12173 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12174 DAG.getUNDEF(ContainerVT),
12175 DAG.getConstant(1, DL, XLenVT), EVL1);
12176 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12177 DAG.getUNDEF(ContainerVT),
12178 DAG.getConstant(0, DL, XLenVT), EVL1);
12179 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12180 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12181
12182 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12183 DAG.getUNDEF(ContainerVT),
12184 DAG.getConstant(1, DL, XLenVT), EVL2);
12185 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12186 DAG.getUNDEF(ContainerVT),
12187 DAG.getConstant(0, DL, XLenVT), EVL2);
12188 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12189 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12190 }
12191
12192 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12193 SDValue DownOffset, UpOffset;
12194 if (ImmValue >= 0) {
12195 // The operand is a TargetConstant, we need to rebuild it as a regular
12196 // constant.
12197 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12198 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12199 } else {
12200 // The operand is a TargetConstant, we need to rebuild it as a regular
12201 // constant rather than negating the original operand.
12202 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12203 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12204 }
12205
12206 SDValue SlideDown =
12207 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12208 Op1, DownOffset, Mask, UpOffset);
12209 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12210 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12211
12212 if (IsMaskVector) {
12213 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12214 Result = DAG.getNode(
12215 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12216 {Result, DAG.getConstant(0, DL, ContainerVT),
12217 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12218 Mask, EVL2});
12219 }
12220
12221 if (!VT.isFixedLengthVector())
12222 return Result;
12223 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12224}
12225
12226SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12227 SelectionDAG &DAG) const {
12228 SDLoc DL(Op);
12229 SDValue Val = Op.getOperand(0);
12230 SDValue Mask = Op.getOperand(1);
12231 SDValue VL = Op.getOperand(2);
12232 MVT VT = Op.getSimpleValueType();
12233
12234 MVT ContainerVT = VT;
12235 if (VT.isFixedLengthVector()) {
12236 ContainerVT = getContainerForFixedLengthVector(VT);
12237 MVT MaskVT = getMaskTypeFor(ContainerVT);
12238 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12239 }
12240
12241 SDValue Result =
12242 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12243
12244 if (!VT.isFixedLengthVector())
12245 return Result;
12246 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12247}
12248
12249SDValue
12250RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12251 SelectionDAG &DAG) const {
12252 SDLoc DL(Op);
12253 MVT VT = Op.getSimpleValueType();
12254 MVT XLenVT = Subtarget.getXLenVT();
12255
12256 SDValue Op1 = Op.getOperand(0);
12257 SDValue Mask = Op.getOperand(1);
12258 SDValue EVL = Op.getOperand(2);
12259
12260 MVT ContainerVT = VT;
12261 if (VT.isFixedLengthVector()) {
12262 ContainerVT = getContainerForFixedLengthVector(VT);
12263 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12264 MVT MaskVT = getMaskTypeFor(ContainerVT);
12265 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12266 }
12267
12268 MVT GatherVT = ContainerVT;
12269 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12270 // Check if we are working with mask vectors
12271 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12272 if (IsMaskVector) {
12273 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12274
12275 // Expand input operand
12276 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12277 DAG.getUNDEF(IndicesVT),
12278 DAG.getConstant(1, DL, XLenVT), EVL);
12279 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12280 DAG.getUNDEF(IndicesVT),
12281 DAG.getConstant(0, DL, XLenVT), EVL);
12282 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12283 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12284 }
12285
12286 unsigned EltSize = GatherVT.getScalarSizeInBits();
12287 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12288 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12289 unsigned MaxVLMAX =
12290 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12291
12292 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12293 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12294 // to use vrgatherei16.vv.
12295 // TODO: It's also possible to use vrgatherei16.vv for other types to
12296 // decrease register width for the index calculation.
12297 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12298 if (MaxVLMAX > 256 && EltSize == 8) {
12299 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12300 // Split the vector in half and reverse each half using a full register
12301 // reverse.
12302 // Swap the halves and concatenate them.
12303 // Slide the concatenated result by (VLMax - VL).
12304 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12305 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12306 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12307
12308 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12309 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12310
12311 // Reassemble the low and high pieces reversed.
12312 // NOTE: this Result is unmasked (because we do not need masks for
12313 // shuffles). If in the future this has to change, we can use a SELECT_VL
12314 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12315 SDValue Result =
12316 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12317
12318 // Slide off any elements from past EVL that were reversed into the low
12319 // elements.
12320 unsigned MinElts = GatherVT.getVectorMinNumElements();
12321 SDValue VLMax =
12322 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12323 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12324
12325 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12326 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12327
12328 if (IsMaskVector) {
12329 // Truncate Result back to a mask vector
12330 Result =
12331 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12332 {Result, DAG.getConstant(0, DL, GatherVT),
12334 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12335 }
12336
12337 if (!VT.isFixedLengthVector())
12338 return Result;
12339 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12340 }
12341
12342 // Just promote the int type to i16 which will double the LMUL.
12343 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12344 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12345 }
12346
12347 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12348 SDValue VecLen =
12349 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12350 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12351 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12352 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12353 DAG.getUNDEF(IndicesVT), Mask, EVL);
12354 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12355 DAG.getUNDEF(GatherVT), Mask, EVL);
12356
12357 if (IsMaskVector) {
12358 // Truncate Result back to a mask vector
12359 Result = DAG.getNode(
12360 RISCVISD::SETCC_VL, DL, ContainerVT,
12361 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12362 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12363 }
12364
12365 if (!VT.isFixedLengthVector())
12366 return Result;
12367 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12368}
12369
12370SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12371 SelectionDAG &DAG) const {
12372 MVT VT = Op.getSimpleValueType();
12373 if (VT.getVectorElementType() != MVT::i1)
12374 return lowerVPOp(Op, DAG);
12375
12376 // It is safe to drop mask parameter as masked-off elements are undef.
12377 SDValue Op1 = Op->getOperand(0);
12378 SDValue Op2 = Op->getOperand(1);
12379 SDValue VL = Op->getOperand(3);
12380
12381 MVT ContainerVT = VT;
12382 const bool IsFixed = VT.isFixedLengthVector();
12383 if (IsFixed) {
12384 ContainerVT = getContainerForFixedLengthVector(VT);
12385 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12386 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12387 }
12388
12389 SDLoc DL(Op);
12390 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12391 if (!IsFixed)
12392 return Val;
12393 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12394}
12395
12396SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12397 SelectionDAG &DAG) const {
12398 SDLoc DL(Op);
12399 MVT XLenVT = Subtarget.getXLenVT();
12400 MVT VT = Op.getSimpleValueType();
12401 MVT ContainerVT = VT;
12402 if (VT.isFixedLengthVector())
12403 ContainerVT = getContainerForFixedLengthVector(VT);
12404
12405 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12406
12407 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12408 // Check if the mask is known to be all ones
12409 SDValue Mask = VPNode->getMask();
12410 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12411
12412 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12413 : Intrinsic::riscv_vlse_mask,
12414 DL, XLenVT);
12415 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12416 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12417 VPNode->getStride()};
12418 if (!IsUnmasked) {
12419 if (VT.isFixedLengthVector()) {
12420 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12421 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12422 }
12423 Ops.push_back(Mask);
12424 }
12425 Ops.push_back(VPNode->getVectorLength());
12426 if (!IsUnmasked) {
12427 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12428 Ops.push_back(Policy);
12429 }
12430
12431 SDValue Result =
12433 VPNode->getMemoryVT(), VPNode->getMemOperand());
12434 SDValue Chain = Result.getValue(1);
12435
12436 if (VT.isFixedLengthVector())
12437 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12438
12439 return DAG.getMergeValues({Result, Chain}, DL);
12440}
12441
12442SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12443 SelectionDAG &DAG) const {
12444 SDLoc DL(Op);
12445 MVT XLenVT = Subtarget.getXLenVT();
12446
12447 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12448 SDValue StoreVal = VPNode->getValue();
12449 MVT VT = StoreVal.getSimpleValueType();
12450 MVT ContainerVT = VT;
12451 if (VT.isFixedLengthVector()) {
12452 ContainerVT = getContainerForFixedLengthVector(VT);
12453 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12454 }
12455
12456 // Check if the mask is known to be all ones
12457 SDValue Mask = VPNode->getMask();
12458 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12459
12460 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12461 : Intrinsic::riscv_vsse_mask,
12462 DL, XLenVT);
12463 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12464 VPNode->getBasePtr(), VPNode->getStride()};
12465 if (!IsUnmasked) {
12466 if (VT.isFixedLengthVector()) {
12467 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12468 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12469 }
12470 Ops.push_back(Mask);
12471 }
12472 Ops.push_back(VPNode->getVectorLength());
12473
12474 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12475 Ops, VPNode->getMemoryVT(),
12476 VPNode->getMemOperand());
12477}
12478
12479// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12480// matched to a RVV indexed load. The RVV indexed load instructions only
12481// support the "unsigned unscaled" addressing mode; indices are implicitly
12482// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12483// signed or scaled indexing is extended to the XLEN value type and scaled
12484// accordingly.
12485SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12486 SelectionDAG &DAG) const {
12487 SDLoc DL(Op);
12488 MVT VT = Op.getSimpleValueType();
12489
12490 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12491 EVT MemVT = MemSD->getMemoryVT();
12492 MachineMemOperand *MMO = MemSD->getMemOperand();
12493 SDValue Chain = MemSD->getChain();
12494 SDValue BasePtr = MemSD->getBasePtr();
12495
12496 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12497 SDValue Index, Mask, PassThru, VL;
12498
12499 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12500 Index = VPGN->getIndex();
12501 Mask = VPGN->getMask();
12502 PassThru = DAG.getUNDEF(VT);
12503 VL = VPGN->getVectorLength();
12504 // VP doesn't support extending loads.
12506 } else {
12507 // Else it must be a MGATHER.
12508 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12509 Index = MGN->getIndex();
12510 Mask = MGN->getMask();
12511 PassThru = MGN->getPassThru();
12512 LoadExtType = MGN->getExtensionType();
12513 }
12514
12515 MVT IndexVT = Index.getSimpleValueType();
12516 MVT XLenVT = Subtarget.getXLenVT();
12517
12519 "Unexpected VTs!");
12520 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12521 // Targets have to explicitly opt-in for extending vector loads.
12522 assert(LoadExtType == ISD::NON_EXTLOAD &&
12523 "Unexpected extending MGATHER/VP_GATHER");
12524
12525 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12526 // the selection of the masked intrinsics doesn't do this for us.
12527 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12528
12529 MVT ContainerVT = VT;
12530 if (VT.isFixedLengthVector()) {
12531 ContainerVT = getContainerForFixedLengthVector(VT);
12532 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12533 ContainerVT.getVectorElementCount());
12534
12535 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12536
12537 if (!IsUnmasked) {
12538 MVT MaskVT = getMaskTypeFor(ContainerVT);
12539 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12540 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12541 }
12542 }
12543
12544 if (!VL)
12545 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12546
12547 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12548 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12549 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12550 }
12551
12552 unsigned IntID =
12553 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12554 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12555 if (IsUnmasked)
12556 Ops.push_back(DAG.getUNDEF(ContainerVT));
12557 else
12558 Ops.push_back(PassThru);
12559 Ops.push_back(BasePtr);
12560 Ops.push_back(Index);
12561 if (!IsUnmasked)
12562 Ops.push_back(Mask);
12563 Ops.push_back(VL);
12564 if (!IsUnmasked)
12566
12567 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12568 SDValue Result =
12569 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12570 Chain = Result.getValue(1);
12571
12572 if (VT.isFixedLengthVector())
12573 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12574
12575 return DAG.getMergeValues({Result, Chain}, DL);
12576}
12577
12578// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12579// matched to a RVV indexed store. The RVV indexed store instructions only
12580// support the "unsigned unscaled" addressing mode; indices are implicitly
12581// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12582// signed or scaled indexing is extended to the XLEN value type and scaled
12583// accordingly.
12584SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12585 SelectionDAG &DAG) const {
12586 SDLoc DL(Op);
12587 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12588 EVT MemVT = MemSD->getMemoryVT();
12589 MachineMemOperand *MMO = MemSD->getMemOperand();
12590 SDValue Chain = MemSD->getChain();
12591 SDValue BasePtr = MemSD->getBasePtr();
12592
12593 [[maybe_unused]] bool IsTruncatingStore = false;
12594 SDValue Index, Mask, Val, VL;
12595
12596 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12597 Index = VPSN->getIndex();
12598 Mask = VPSN->getMask();
12599 Val = VPSN->getValue();
12600 VL = VPSN->getVectorLength();
12601 // VP doesn't support truncating stores.
12602 IsTruncatingStore = false;
12603 } else {
12604 // Else it must be a MSCATTER.
12605 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12606 Index = MSN->getIndex();
12607 Mask = MSN->getMask();
12608 Val = MSN->getValue();
12609 IsTruncatingStore = MSN->isTruncatingStore();
12610 }
12611
12612 MVT VT = Val.getSimpleValueType();
12613 MVT IndexVT = Index.getSimpleValueType();
12614 MVT XLenVT = Subtarget.getXLenVT();
12615
12617 "Unexpected VTs!");
12618 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12619 // Targets have to explicitly opt-in for extending vector loads and
12620 // truncating vector stores.
12621 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12622
12623 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12624 // the selection of the masked intrinsics doesn't do this for us.
12625 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12626
12627 MVT ContainerVT = VT;
12628 if (VT.isFixedLengthVector()) {
12629 ContainerVT = getContainerForFixedLengthVector(VT);
12630 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12631 ContainerVT.getVectorElementCount());
12632
12633 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12634 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12635
12636 if (!IsUnmasked) {
12637 MVT MaskVT = getMaskTypeFor(ContainerVT);
12638 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12639 }
12640 }
12641
12642 if (!VL)
12643 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12644
12645 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12646 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12647 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12648 }
12649
12650 unsigned IntID =
12651 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12652 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12653 Ops.push_back(Val);
12654 Ops.push_back(BasePtr);
12655 Ops.push_back(Index);
12656 if (!IsUnmasked)
12657 Ops.push_back(Mask);
12658 Ops.push_back(VL);
12659
12661 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12662}
12663
12664SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12665 SelectionDAG &DAG) const {
12666 const MVT XLenVT = Subtarget.getXLenVT();
12667 SDLoc DL(Op);
12668 SDValue Chain = Op->getOperand(0);
12669 SDValue SysRegNo = DAG.getTargetConstant(
12670 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12671 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12672 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12673
12674 // Encoding used for rounding mode in RISC-V differs from that used in
12675 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12676 // table, which consists of a sequence of 4-bit fields, each representing
12677 // corresponding FLT_ROUNDS mode.
12678 static const int Table =
12684
12685 SDValue Shift =
12686 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12687 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12688 DAG.getConstant(Table, DL, XLenVT), Shift);
12689 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12690 DAG.getConstant(7, DL, XLenVT));
12691
12692 return DAG.getMergeValues({Masked, Chain}, DL);
12693}
12694
12695SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12696 SelectionDAG &DAG) const {
12697 const MVT XLenVT = Subtarget.getXLenVT();
12698 SDLoc DL(Op);
12699 SDValue Chain = Op->getOperand(0);
12700 SDValue RMValue = Op->getOperand(1);
12701 SDValue SysRegNo = DAG.getTargetConstant(
12702 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12703
12704 // Encoding used for rounding mode in RISC-V differs from that used in
12705 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12706 // a table, which consists of a sequence of 4-bit fields, each representing
12707 // corresponding RISC-V mode.
12708 static const unsigned Table =
12714
12715 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12716
12717 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12718 DAG.getConstant(2, DL, XLenVT));
12719 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12720 DAG.getConstant(Table, DL, XLenVT), Shift);
12721 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12722 DAG.getConstant(0x7, DL, XLenVT));
12723 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12724 RMValue);
12725}
12726
12727SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12728 SelectionDAG &DAG) const {
12730
12731 bool isRISCV64 = Subtarget.is64Bit();
12732 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12733
12734 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12735 return DAG.getFrameIndex(FI, PtrVT);
12736}
12737
12738// Returns the opcode of the target-specific SDNode that implements the 32-bit
12739// form of the given Opcode.
12740static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12741 switch (Opcode) {
12742 default:
12743 llvm_unreachable("Unexpected opcode");
12744 case ISD::SHL:
12745 return RISCVISD::SLLW;
12746 case ISD::SRA:
12747 return RISCVISD::SRAW;
12748 case ISD::SRL:
12749 return RISCVISD::SRLW;
12750 case ISD::SDIV:
12751 return RISCVISD::DIVW;
12752 case ISD::UDIV:
12753 return RISCVISD::DIVUW;
12754 case ISD::UREM:
12755 return RISCVISD::REMUW;
12756 case ISD::ROTL:
12757 return RISCVISD::ROLW;
12758 case ISD::ROTR:
12759 return RISCVISD::RORW;
12760 }
12761}
12762
12763// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12764// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12765// otherwise be promoted to i64, making it difficult to select the
12766// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12767// type i8/i16/i32 is lost.
12769 unsigned ExtOpc = ISD::ANY_EXTEND) {
12770 SDLoc DL(N);
12771 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12772 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12773 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12774 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12775 // ReplaceNodeResults requires we maintain the same type for the return value.
12776 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12777}
12778
12779// Converts the given 32-bit operation to a i64 operation with signed extension
12780// semantic to reduce the signed extension instructions.
12782 SDLoc DL(N);
12783 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12784 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12785 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12786 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12787 DAG.getValueType(MVT::i32));
12788 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12789}
12790
12793 SelectionDAG &DAG) const {
12794 SDLoc DL(N);
12795 switch (N->getOpcode()) {
12796 default:
12797 llvm_unreachable("Don't know how to custom type legalize this operation!");
12800 case ISD::FP_TO_SINT:
12801 case ISD::FP_TO_UINT: {
12802 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12803 "Unexpected custom legalisation");
12804 bool IsStrict = N->isStrictFPOpcode();
12805 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12806 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12807 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12808 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12810 if (!isTypeLegal(Op0.getValueType()))
12811 return;
12812 if (IsStrict) {
12813 SDValue Chain = N->getOperand(0);
12814 // In absense of Zfh, promote f16 to f32, then convert.
12815 if (Op0.getValueType() == MVT::f16 &&
12816 !Subtarget.hasStdExtZfhOrZhinx()) {
12817 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12818 {Chain, Op0});
12819 Chain = Op0.getValue(1);
12820 }
12821 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12823 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12824 SDValue Res = DAG.getNode(
12825 Opc, DL, VTs, Chain, Op0,
12826 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12827 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12828 Results.push_back(Res.getValue(1));
12829 return;
12830 }
12831 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12832 // convert.
12833 if ((Op0.getValueType() == MVT::f16 &&
12834 !Subtarget.hasStdExtZfhOrZhinx()) ||
12835 Op0.getValueType() == MVT::bf16)
12836 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12837
12838 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12839 SDValue Res =
12840 DAG.getNode(Opc, DL, MVT::i64, Op0,
12841 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12842 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12843 return;
12844 }
12845 // If the FP type needs to be softened, emit a library call using the 'si'
12846 // version. If we left it to default legalization we'd end up with 'di'. If
12847 // the FP type doesn't need to be softened just let generic type
12848 // legalization promote the result type.
12849 RTLIB::Libcall LC;
12850 if (IsSigned)
12851 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12852 else
12853 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12854 MakeLibCallOptions CallOptions;
12855 EVT OpVT = Op0.getValueType();
12856 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12857 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12858 SDValue Result;
12859 std::tie(Result, Chain) =
12860 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12861 Results.push_back(Result);
12862 if (IsStrict)
12863 Results.push_back(Chain);
12864 break;
12865 }
12866 case ISD::LROUND: {
12867 SDValue Op0 = N->getOperand(0);
12868 EVT Op0VT = Op0.getValueType();
12869 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12871 if (!isTypeLegal(Op0VT))
12872 return;
12873
12874 // In absense of Zfh, promote f16 to f32, then convert.
12875 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12876 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12877
12878 SDValue Res =
12879 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12880 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12881 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12882 return;
12883 }
12884 // If the FP type needs to be softened, emit a library call to lround. We'll
12885 // need to truncate the result. We assume any value that doesn't fit in i32
12886 // is allowed to return an unspecified value.
12887 RTLIB::Libcall LC =
12888 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12889 MakeLibCallOptions CallOptions;
12890 EVT OpVT = Op0.getValueType();
12891 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12892 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12893 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12894 Results.push_back(Result);
12895 break;
12896 }
12899 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12900 "has custom type legalization on riscv32");
12901
12902 SDValue LoCounter, HiCounter;
12903 MVT XLenVT = Subtarget.getXLenVT();
12904 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12905 LoCounter = DAG.getTargetConstant(
12906 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12907 HiCounter = DAG.getTargetConstant(
12908 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12909 } else {
12910 LoCounter = DAG.getTargetConstant(
12911 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12912 HiCounter = DAG.getTargetConstant(
12913 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12914 }
12915 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12917 N->getOperand(0), LoCounter, HiCounter);
12918
12919 Results.push_back(
12920 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12921 Results.push_back(RCW.getValue(2));
12922 break;
12923 }
12924 case ISD::LOAD: {
12925 if (!ISD::isNON_EXTLoad(N))
12926 return;
12927
12928 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12929 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12930 LoadSDNode *Ld = cast<LoadSDNode>(N);
12931
12932 SDLoc dl(N);
12933 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12934 Ld->getBasePtr(), Ld->getMemoryVT(),
12935 Ld->getMemOperand());
12936 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12937 Results.push_back(Res.getValue(1));
12938 return;
12939 }
12940 case ISD::MUL: {
12941 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12942 unsigned XLen = Subtarget.getXLen();
12943 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12944 if (Size > XLen) {
12945 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12946 SDValue LHS = N->getOperand(0);
12947 SDValue RHS = N->getOperand(1);
12948 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12949
12950 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12951 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12952 // We need exactly one side to be unsigned.
12953 if (LHSIsU == RHSIsU)
12954 return;
12955
12956 auto MakeMULPair = [&](SDValue S, SDValue U) {
12957 MVT XLenVT = Subtarget.getXLenVT();
12958 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12959 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12960 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12961 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12962 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12963 };
12964
12965 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12966 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12967
12968 // The other operand should be signed, but still prefer MULH when
12969 // possible.
12970 if (RHSIsU && LHSIsS && !RHSIsS)
12971 Results.push_back(MakeMULPair(LHS, RHS));
12972 else if (LHSIsU && RHSIsS && !LHSIsS)
12973 Results.push_back(MakeMULPair(RHS, LHS));
12974
12975 return;
12976 }
12977 [[fallthrough]];
12978 }
12979 case ISD::ADD:
12980 case ISD::SUB:
12981 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12982 "Unexpected custom legalisation");
12983 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12984 break;
12985 case ISD::SHL:
12986 case ISD::SRA:
12987 case ISD::SRL:
12988 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12989 "Unexpected custom legalisation");
12990 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12991 // If we can use a BSET instruction, allow default promotion to apply.
12992 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12993 isOneConstant(N->getOperand(0)))
12994 break;
12995 Results.push_back(customLegalizeToWOp(N, DAG));
12996 break;
12997 }
12998
12999 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13000 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13001 // shift amount.
13002 if (N->getOpcode() == ISD::SHL) {
13003 SDLoc DL(N);
13004 SDValue NewOp0 =
13005 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13006 SDValue NewOp1 =
13007 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13008 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13009 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13010 DAG.getValueType(MVT::i32));
13011 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13012 }
13013
13014 break;
13015 case ISD::ROTL:
13016 case ISD::ROTR:
13017 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13018 "Unexpected custom legalisation");
13019 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13020 Subtarget.hasVendorXTHeadBb()) &&
13021 "Unexpected custom legalization");
13022 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13023 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13024 return;
13025 Results.push_back(customLegalizeToWOp(N, DAG));
13026 break;
13027 case ISD::CTTZ:
13029 case ISD::CTLZ:
13030 case ISD::CTLZ_ZERO_UNDEF: {
13031 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13032 "Unexpected custom legalisation");
13033
13034 SDValue NewOp0 =
13035 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13036 bool IsCTZ =
13037 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13038 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13039 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13040 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13041 return;
13042 }
13043 case ISD::SDIV:
13044 case ISD::UDIV:
13045 case ISD::UREM: {
13046 MVT VT = N->getSimpleValueType(0);
13047 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13048 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13049 "Unexpected custom legalisation");
13050 // Don't promote division/remainder by constant since we should expand those
13051 // to multiply by magic constant.
13053 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13054 !isIntDivCheap(N->getValueType(0), Attr))
13055 return;
13056
13057 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13058 // the upper 32 bits. For other types we need to sign or zero extend
13059 // based on the opcode.
13060 unsigned ExtOpc = ISD::ANY_EXTEND;
13061 if (VT != MVT::i32)
13062 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13064
13065 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13066 break;
13067 }
13068 case ISD::SADDO: {
13069 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13070 "Unexpected custom legalisation");
13071
13072 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13073 // use the default legalization.
13074 if (!isa<ConstantSDNode>(N->getOperand(1)))
13075 return;
13076
13077 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13078 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13079 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13080 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13081 DAG.getValueType(MVT::i32));
13082
13083 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13084
13085 // For an addition, the result should be less than one of the operands (LHS)
13086 // if and only if the other operand (RHS) is negative, otherwise there will
13087 // be overflow.
13088 // For a subtraction, the result should be less than one of the operands
13089 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13090 // otherwise there will be overflow.
13091 EVT OType = N->getValueType(1);
13092 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13093 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13094
13095 SDValue Overflow =
13096 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13097 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13098 Results.push_back(Overflow);
13099 return;
13100 }
13101 case ISD::UADDO:
13102 case ISD::USUBO: {
13103 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13104 "Unexpected custom legalisation");
13105 bool IsAdd = N->getOpcode() == ISD::UADDO;
13106 // Create an ADDW or SUBW.
13107 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13108 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13109 SDValue Res =
13110 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13111 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13112 DAG.getValueType(MVT::i32));
13113
13114 SDValue Overflow;
13115 if (IsAdd && isOneConstant(RHS)) {
13116 // Special case uaddo X, 1 overflowed if the addition result is 0.
13117 // The general case (X + C) < C is not necessarily beneficial. Although we
13118 // reduce the live range of X, we may introduce the materialization of
13119 // constant C, especially when the setcc result is used by branch. We have
13120 // no compare with constant and branch instructions.
13121 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13122 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13123 } else if (IsAdd && isAllOnesConstant(RHS)) {
13124 // Special case uaddo X, -1 overflowed if X != 0.
13125 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13126 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13127 } else {
13128 // Sign extend the LHS and perform an unsigned compare with the ADDW
13129 // result. Since the inputs are sign extended from i32, this is equivalent
13130 // to comparing the lower 32 bits.
13131 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13132 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13133 IsAdd ? ISD::SETULT : ISD::SETUGT);
13134 }
13135
13136 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13137 Results.push_back(Overflow);
13138 return;
13139 }
13140 case ISD::UADDSAT:
13141 case ISD::USUBSAT: {
13142 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13143 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13144 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13145 // promotion for UADDO/USUBO.
13146 Results.push_back(expandAddSubSat(N, DAG));
13147 return;
13148 }
13149 case ISD::SADDSAT:
13150 case ISD::SSUBSAT: {
13151 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13152 "Unexpected custom legalisation");
13153 Results.push_back(expandAddSubSat(N, DAG));
13154 return;
13155 }
13156 case ISD::ABS: {
13157 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13158 "Unexpected custom legalisation");
13159
13160 if (Subtarget.hasStdExtZbb()) {
13161 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13162 // This allows us to remember that the result is sign extended. Expanding
13163 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13164 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13165 N->getOperand(0));
13166 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13167 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13168 return;
13169 }
13170
13171 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13172 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13173
13174 // Freeze the source so we can increase it's use count.
13175 Src = DAG.getFreeze(Src);
13176
13177 // Copy sign bit to all bits using the sraiw pattern.
13178 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13179 DAG.getValueType(MVT::i32));
13180 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13181 DAG.getConstant(31, DL, MVT::i64));
13182
13183 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13184 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13185
13186 // NOTE: The result is only required to be anyextended, but sext is
13187 // consistent with type legalization of sub.
13188 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13189 DAG.getValueType(MVT::i32));
13190 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13191 return;
13192 }
13193 case ISD::BITCAST: {
13194 EVT VT = N->getValueType(0);
13195 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13196 SDValue Op0 = N->getOperand(0);
13197 EVT Op0VT = Op0.getValueType();
13198 MVT XLenVT = Subtarget.getXLenVT();
13199 if (VT == MVT::i16 &&
13200 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13201 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13202 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13203 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13204 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13205 Subtarget.hasStdExtFOrZfinx()) {
13206 SDValue FPConv =
13207 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13208 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13209 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13210 Subtarget.hasStdExtDOrZdinx()) {
13211 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13212 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13213 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13214 NewReg.getValue(0), NewReg.getValue(1));
13215 Results.push_back(RetReg);
13216 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13217 isTypeLegal(Op0VT)) {
13218 // Custom-legalize bitcasts from fixed-length vector types to illegal
13219 // scalar types in order to improve codegen. Bitcast the vector to a
13220 // one-element vector type whose element type is the same as the result
13221 // type, and extract the first element.
13222 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13223 if (isTypeLegal(BVT)) {
13224 SDValue BVec = DAG.getBitcast(BVT, Op0);
13225 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13226 DAG.getVectorIdxConstant(0, DL)));
13227 }
13228 }
13229 break;
13230 }
13231 case RISCVISD::BREV8:
13232 case RISCVISD::ORC_B: {
13233 MVT VT = N->getSimpleValueType(0);
13234 MVT XLenVT = Subtarget.getXLenVT();
13235 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13236 "Unexpected custom legalisation");
13237 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13238 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13239 "Unexpected extension");
13240 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13241 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13242 // ReplaceNodeResults requires we maintain the same type for the return
13243 // value.
13244 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13245 break;
13246 }
13248 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13249 // type is illegal (currently only vXi64 RV32).
13250 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13251 // transferred to the destination register. We issue two of these from the
13252 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13253 // first element.
13254 SDValue Vec = N->getOperand(0);
13255 SDValue Idx = N->getOperand(1);
13256
13257 // The vector type hasn't been legalized yet so we can't issue target
13258 // specific nodes if it needs legalization.
13259 // FIXME: We would manually legalize if it's important.
13260 if (!isTypeLegal(Vec.getValueType()))
13261 return;
13262
13263 MVT VecVT = Vec.getSimpleValueType();
13264
13265 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13266 VecVT.getVectorElementType() == MVT::i64 &&
13267 "Unexpected EXTRACT_VECTOR_ELT legalization");
13268
13269 // If this is a fixed vector, we need to convert it to a scalable vector.
13270 MVT ContainerVT = VecVT;
13271 if (VecVT.isFixedLengthVector()) {
13272 ContainerVT = getContainerForFixedLengthVector(VecVT);
13273 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13274 }
13275
13276 MVT XLenVT = Subtarget.getXLenVT();
13277
13278 // Use a VL of 1 to avoid processing more elements than we need.
13279 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13280
13281 // Unless the index is known to be 0, we must slide the vector down to get
13282 // the desired element into index 0.
13283 if (!isNullConstant(Idx)) {
13284 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13285 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13286 }
13287
13288 // Extract the lower XLEN bits of the correct vector element.
13289 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13290
13291 // To extract the upper XLEN bits of the vector element, shift the first
13292 // element right by 32 bits and re-extract the lower XLEN bits.
13293 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13294 DAG.getUNDEF(ContainerVT),
13295 DAG.getConstant(32, DL, XLenVT), VL);
13296 SDValue LShr32 =
13297 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13298 DAG.getUNDEF(ContainerVT), Mask, VL);
13299
13300 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13301
13302 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13303 break;
13304 }
13306 unsigned IntNo = N->getConstantOperandVal(0);
13307 switch (IntNo) {
13308 default:
13310 "Don't know how to custom type legalize this intrinsic!");
13311 case Intrinsic::experimental_get_vector_length: {
13312 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13313 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13314 return;
13315 }
13316 case Intrinsic::experimental_cttz_elts: {
13317 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13318 Results.push_back(
13319 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13320 return;
13321 }
13322 case Intrinsic::riscv_orc_b:
13323 case Intrinsic::riscv_brev8:
13324 case Intrinsic::riscv_sha256sig0:
13325 case Intrinsic::riscv_sha256sig1:
13326 case Intrinsic::riscv_sha256sum0:
13327 case Intrinsic::riscv_sha256sum1:
13328 case Intrinsic::riscv_sm3p0:
13329 case Intrinsic::riscv_sm3p1: {
13330 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13331 return;
13332 unsigned Opc;
13333 switch (IntNo) {
13334 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13335 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13336 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13337 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13338 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13339 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13340 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13341 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13342 }
13343
13344 SDValue NewOp =
13345 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13346 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13347 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13348 return;
13349 }
13350 case Intrinsic::riscv_sm4ks:
13351 case Intrinsic::riscv_sm4ed: {
13352 unsigned Opc =
13353 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13354 SDValue NewOp0 =
13355 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13356 SDValue NewOp1 =
13357 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13358 SDValue Res =
13359 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13360 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13361 return;
13362 }
13363 case Intrinsic::riscv_mopr: {
13364 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13365 return;
13366 SDValue NewOp =
13367 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13368 SDValue Res = DAG.getNode(
13369 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13370 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13371 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13372 return;
13373 }
13374 case Intrinsic::riscv_moprr: {
13375 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13376 return;
13377 SDValue NewOp0 =
13378 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13379 SDValue NewOp1 =
13380 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13381 SDValue Res = DAG.getNode(
13382 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13383 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13384 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13385 return;
13386 }
13387 case Intrinsic::riscv_clmul: {
13388 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13389 return;
13390
13391 SDValue NewOp0 =
13392 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13393 SDValue NewOp1 =
13394 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13395 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13396 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13397 return;
13398 }
13399 case Intrinsic::riscv_clmulh:
13400 case Intrinsic::riscv_clmulr: {
13401 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13402 return;
13403
13404 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13405 // to the full 128-bit clmul result of multiplying two xlen values.
13406 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13407 // upper 32 bits.
13408 //
13409 // The alternative is to mask the inputs to 32 bits and use clmul, but
13410 // that requires two shifts to mask each input without zext.w.
13411 // FIXME: If the inputs are known zero extended or could be freely
13412 // zero extended, the mask form would be better.
13413 SDValue NewOp0 =
13414 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13415 SDValue NewOp1 =
13416 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13417 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13418 DAG.getConstant(32, DL, MVT::i64));
13419 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13420 DAG.getConstant(32, DL, MVT::i64));
13421 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13423 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13424 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13425 DAG.getConstant(32, DL, MVT::i64));
13426 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13427 return;
13428 }
13429 case Intrinsic::riscv_vmv_x_s: {
13430 EVT VT = N->getValueType(0);
13431 MVT XLenVT = Subtarget.getXLenVT();
13432 if (VT.bitsLT(XLenVT)) {
13433 // Simple case just extract using vmv.x.s and truncate.
13434 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13435 Subtarget.getXLenVT(), N->getOperand(1));
13436 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13437 return;
13438 }
13439
13440 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13441 "Unexpected custom legalization");
13442
13443 // We need to do the move in two steps.
13444 SDValue Vec = N->getOperand(1);
13445 MVT VecVT = Vec.getSimpleValueType();
13446
13447 // First extract the lower XLEN bits of the element.
13448 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13449
13450 // To extract the upper XLEN bits of the vector element, shift the first
13451 // element right by 32 bits and re-extract the lower XLEN bits.
13452 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13453
13454 SDValue ThirtyTwoV =
13455 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13456 DAG.getConstant(32, DL, XLenVT), VL);
13457 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13458 DAG.getUNDEF(VecVT), Mask, VL);
13459 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13460
13461 Results.push_back(
13462 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13463 break;
13464 }
13465 }
13466 break;
13467 }
13468 case ISD::VECREDUCE_ADD:
13469 case ISD::VECREDUCE_AND:
13470 case ISD::VECREDUCE_OR:
13471 case ISD::VECREDUCE_XOR:
13476 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13477 Results.push_back(V);
13478 break;
13479 case ISD::VP_REDUCE_ADD:
13480 case ISD::VP_REDUCE_AND:
13481 case ISD::VP_REDUCE_OR:
13482 case ISD::VP_REDUCE_XOR:
13483 case ISD::VP_REDUCE_SMAX:
13484 case ISD::VP_REDUCE_UMAX:
13485 case ISD::VP_REDUCE_SMIN:
13486 case ISD::VP_REDUCE_UMIN:
13487 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13488 Results.push_back(V);
13489 break;
13490 case ISD::GET_ROUNDING: {
13491 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13492 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13493 Results.push_back(Res.getValue(0));
13494 Results.push_back(Res.getValue(1));
13495 break;
13496 }
13497 }
13498}
13499
13500/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13501/// which corresponds to it.
13502static unsigned getVecReduceOpcode(unsigned Opc) {
13503 switch (Opc) {
13504 default:
13505 llvm_unreachable("Unhandled binary to transfrom reduction");
13506 case ISD::ADD:
13507 return ISD::VECREDUCE_ADD;
13508 case ISD::UMAX:
13509 return ISD::VECREDUCE_UMAX;
13510 case ISD::SMAX:
13511 return ISD::VECREDUCE_SMAX;
13512 case ISD::UMIN:
13513 return ISD::VECREDUCE_UMIN;
13514 case ISD::SMIN:
13515 return ISD::VECREDUCE_SMIN;
13516 case ISD::AND:
13517 return ISD::VECREDUCE_AND;
13518 case ISD::OR:
13519 return ISD::VECREDUCE_OR;
13520 case ISD::XOR:
13521 return ISD::VECREDUCE_XOR;
13522 case ISD::FADD:
13523 // Note: This is the associative form of the generic reduction opcode.
13524 return ISD::VECREDUCE_FADD;
13525 }
13526}
13527
13528/// Perform two related transforms whose purpose is to incrementally recognize
13529/// an explode_vector followed by scalar reduction as a vector reduction node.
13530/// This exists to recover from a deficiency in SLP which can't handle
13531/// forests with multiple roots sharing common nodes. In some cases, one
13532/// of the trees will be vectorized, and the other will remain (unprofitably)
13533/// scalarized.
13534static SDValue
13536 const RISCVSubtarget &Subtarget) {
13537
13538 // This transforms need to run before all integer types have been legalized
13539 // to i64 (so that the vector element type matches the add type), and while
13540 // it's safe to introduce odd sized vector types.
13542 return SDValue();
13543
13544 // Without V, this transform isn't useful. We could form the (illegal)
13545 // operations and let them be scalarized again, but there's really no point.
13546 if (!Subtarget.hasVInstructions())
13547 return SDValue();
13548
13549 const SDLoc DL(N);
13550 const EVT VT = N->getValueType(0);
13551 const unsigned Opc = N->getOpcode();
13552
13553 // For FADD, we only handle the case with reassociation allowed. We
13554 // could handle strict reduction order, but at the moment, there's no
13555 // known reason to, and the complexity isn't worth it.
13556 // TODO: Handle fminnum and fmaxnum here
13557 if (!VT.isInteger() &&
13558 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13559 return SDValue();
13560
13561 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13562 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13563 "Inconsistent mappings");
13564 SDValue LHS = N->getOperand(0);
13565 SDValue RHS = N->getOperand(1);
13566
13567 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13568 return SDValue();
13569
13570 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13571 std::swap(LHS, RHS);
13572
13573 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13574 !isa<ConstantSDNode>(RHS.getOperand(1)))
13575 return SDValue();
13576
13577 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13578 SDValue SrcVec = RHS.getOperand(0);
13579 EVT SrcVecVT = SrcVec.getValueType();
13580 assert(SrcVecVT.getVectorElementType() == VT);
13581 if (SrcVecVT.isScalableVector())
13582 return SDValue();
13583
13584 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13585 return SDValue();
13586
13587 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13588 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13589 // root of our reduction tree. TODO: We could extend this to any two
13590 // adjacent aligned constant indices if desired.
13591 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13592 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13593 uint64_t LHSIdx =
13594 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13595 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13596 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13597 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13598 DAG.getVectorIdxConstant(0, DL));
13599 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13600 }
13601 }
13602
13603 // Match (binop (reduce (extract_subvector V, 0),
13604 // (extract_vector_elt V, sizeof(SubVec))))
13605 // into a reduction of one more element from the original vector V.
13606 if (LHS.getOpcode() != ReduceOpc)
13607 return SDValue();
13608
13609 SDValue ReduceVec = LHS.getOperand(0);
13610 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13611 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13612 isNullConstant(ReduceVec.getOperand(1)) &&
13613 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13614 // For illegal types (e.g. 3xi32), most will be combined again into a
13615 // wider (hopefully legal) type. If this is a terminal state, we are
13616 // relying on type legalization here to produce something reasonable
13617 // and this lowering quality could probably be improved. (TODO)
13618 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13619 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13620 DAG.getVectorIdxConstant(0, DL));
13621 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13622 ReduceVec->getFlags() & N->getFlags());
13623 }
13624
13625 return SDValue();
13626}
13627
13628
13629// Try to fold (<bop> x, (reduction.<bop> vec, start))
13631 const RISCVSubtarget &Subtarget) {
13632 auto BinOpToRVVReduce = [](unsigned Opc) {
13633 switch (Opc) {
13634 default:
13635 llvm_unreachable("Unhandled binary to transfrom reduction");
13636 case ISD::ADD:
13638 case ISD::UMAX:
13640 case ISD::SMAX:
13642 case ISD::UMIN:
13644 case ISD::SMIN:
13646 case ISD::AND:
13648 case ISD::OR:
13650 case ISD::XOR:
13652 case ISD::FADD:
13654 case ISD::FMAXNUM:
13656 case ISD::FMINNUM:
13658 }
13659 };
13660
13661 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13662 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13663 isNullConstant(V.getOperand(1)) &&
13664 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13665 };
13666
13667 unsigned Opc = N->getOpcode();
13668 unsigned ReduceIdx;
13669 if (IsReduction(N->getOperand(0), Opc))
13670 ReduceIdx = 0;
13671 else if (IsReduction(N->getOperand(1), Opc))
13672 ReduceIdx = 1;
13673 else
13674 return SDValue();
13675
13676 // Skip if FADD disallows reassociation but the combiner needs.
13677 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13678 return SDValue();
13679
13680 SDValue Extract = N->getOperand(ReduceIdx);
13681 SDValue Reduce = Extract.getOperand(0);
13682 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13683 return SDValue();
13684
13685 SDValue ScalarV = Reduce.getOperand(2);
13686 EVT ScalarVT = ScalarV.getValueType();
13687 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13688 ScalarV.getOperand(0)->isUndef() &&
13689 isNullConstant(ScalarV.getOperand(2)))
13690 ScalarV = ScalarV.getOperand(1);
13691
13692 // Make sure that ScalarV is a splat with VL=1.
13693 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13694 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13695 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13696 return SDValue();
13697
13698 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13699 return SDValue();
13700
13701 // Check the scalar of ScalarV is neutral element
13702 // TODO: Deal with value other than neutral element.
13703 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13704 0))
13705 return SDValue();
13706
13707 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13708 // FIXME: We might be able to improve this if operand 0 is undef.
13709 if (!isNonZeroAVL(Reduce.getOperand(5)))
13710 return SDValue();
13711
13712 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13713
13714 SDLoc DL(N);
13715 SDValue NewScalarV =
13716 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13717 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13718
13719 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13720 if (ScalarVT != ScalarV.getValueType())
13721 NewScalarV =
13722 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13723 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13724
13725 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13726 NewScalarV, Reduce.getOperand(3),
13727 Reduce.getOperand(4), Reduce.getOperand(5)};
13728 SDValue NewReduce =
13729 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13730 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13731 Extract.getOperand(1));
13732}
13733
13734// Optimize (add (shl x, c0), (shl y, c1)) ->
13735// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13737 const RISCVSubtarget &Subtarget) {
13738 // Perform this optimization only in the zba extension.
13739 if (!Subtarget.hasStdExtZba())
13740 return SDValue();
13741
13742 // Skip for vector types and larger types.
13743 EVT VT = N->getValueType(0);
13744 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13745 return SDValue();
13746
13747 // The two operand nodes must be SHL and have no other use.
13748 SDValue N0 = N->getOperand(0);
13749 SDValue N1 = N->getOperand(1);
13750 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13751 !N0->hasOneUse() || !N1->hasOneUse())
13752 return SDValue();
13753
13754 // Check c0 and c1.
13755 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13756 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13757 if (!N0C || !N1C)
13758 return SDValue();
13759 int64_t C0 = N0C->getSExtValue();
13760 int64_t C1 = N1C->getSExtValue();
13761 if (C0 <= 0 || C1 <= 0)
13762 return SDValue();
13763
13764 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13765 int64_t Bits = std::min(C0, C1);
13766 int64_t Diff = std::abs(C0 - C1);
13767 if (Diff != 1 && Diff != 2 && Diff != 3)
13768 return SDValue();
13769
13770 // Build nodes.
13771 SDLoc DL(N);
13772 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13773 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13774 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13775 DAG.getConstant(Diff, DL, VT), NS);
13776 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13777}
13778
13779// Combine a constant select operand into its use:
13780//
13781// (and (select cond, -1, c), x)
13782// -> (select cond, x, (and x, c)) [AllOnes=1]
13783// (or (select cond, 0, c), x)
13784// -> (select cond, x, (or x, c)) [AllOnes=0]
13785// (xor (select cond, 0, c), x)
13786// -> (select cond, x, (xor x, c)) [AllOnes=0]
13787// (add (select cond, 0, c), x)
13788// -> (select cond, x, (add x, c)) [AllOnes=0]
13789// (sub x, (select cond, 0, c))
13790// -> (select cond, x, (sub x, c)) [AllOnes=0]
13792 SelectionDAG &DAG, bool AllOnes,
13793 const RISCVSubtarget &Subtarget) {
13794 EVT VT = N->getValueType(0);
13795
13796 // Skip vectors.
13797 if (VT.isVector())
13798 return SDValue();
13799
13800 if (!Subtarget.hasConditionalMoveFusion()) {
13801 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13802 if ((!Subtarget.hasStdExtZicond() &&
13803 !Subtarget.hasVendorXVentanaCondOps()) ||
13804 N->getOpcode() != ISD::AND)
13805 return SDValue();
13806
13807 // Maybe harmful when condition code has multiple use.
13808 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13809 return SDValue();
13810
13811 // Maybe harmful when VT is wider than XLen.
13812 if (VT.getSizeInBits() > Subtarget.getXLen())
13813 return SDValue();
13814 }
13815
13816 if ((Slct.getOpcode() != ISD::SELECT &&
13817 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13818 !Slct.hasOneUse())
13819 return SDValue();
13820
13821 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13823 };
13824
13825 bool SwapSelectOps;
13826 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13827 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13828 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13829 SDValue NonConstantVal;
13830 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13831 SwapSelectOps = false;
13832 NonConstantVal = FalseVal;
13833 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13834 SwapSelectOps = true;
13835 NonConstantVal = TrueVal;
13836 } else
13837 return SDValue();
13838
13839 // Slct is now know to be the desired identity constant when CC is true.
13840 TrueVal = OtherOp;
13841 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13842 // Unless SwapSelectOps says the condition should be false.
13843 if (SwapSelectOps)
13844 std::swap(TrueVal, FalseVal);
13845
13846 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13847 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13848 {Slct.getOperand(0), Slct.getOperand(1),
13849 Slct.getOperand(2), TrueVal, FalseVal});
13850
13851 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13852 {Slct.getOperand(0), TrueVal, FalseVal});
13853}
13854
13855// Attempt combineSelectAndUse on each operand of a commutative operator N.
13857 bool AllOnes,
13858 const RISCVSubtarget &Subtarget) {
13859 SDValue N0 = N->getOperand(0);
13860 SDValue N1 = N->getOperand(1);
13861 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13862 return Result;
13863 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13864 return Result;
13865 return SDValue();
13866}
13867
13868// Transform (add (mul x, c0), c1) ->
13869// (add (mul (add x, c1/c0), c0), c1%c0).
13870// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13871// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13872// to an infinite loop in DAGCombine if transformed.
13873// Or transform (add (mul x, c0), c1) ->
13874// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13875// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13876// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13877// lead to an infinite loop in DAGCombine if transformed.
13878// Or transform (add (mul x, c0), c1) ->
13879// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13880// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13881// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13882// lead to an infinite loop in DAGCombine if transformed.
13883// Or transform (add (mul x, c0), c1) ->
13884// (mul (add x, c1/c0), c0).
13885// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13887 const RISCVSubtarget &Subtarget) {
13888 // Skip for vector types and larger types.
13889 EVT VT = N->getValueType(0);
13890 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13891 return SDValue();
13892 // The first operand node must be a MUL and has no other use.
13893 SDValue N0 = N->getOperand(0);
13894 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13895 return SDValue();
13896 // Check if c0 and c1 match above conditions.
13897 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13898 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13899 if (!N0C || !N1C)
13900 return SDValue();
13901 // If N0C has multiple uses it's possible one of the cases in
13902 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13903 // in an infinite loop.
13904 if (!N0C->hasOneUse())
13905 return SDValue();
13906 int64_t C0 = N0C->getSExtValue();
13907 int64_t C1 = N1C->getSExtValue();
13908 int64_t CA, CB;
13909 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13910 return SDValue();
13911 // Search for proper CA (non-zero) and CB that both are simm12.
13912 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13913 !isInt<12>(C0 * (C1 / C0))) {
13914 CA = C1 / C0;
13915 CB = C1 % C0;
13916 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13917 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13918 CA = C1 / C0 + 1;
13919 CB = C1 % C0 - C0;
13920 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13921 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13922 CA = C1 / C0 - 1;
13923 CB = C1 % C0 + C0;
13924 } else
13925 return SDValue();
13926 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13927 SDLoc DL(N);
13928 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13929 DAG.getSignedConstant(CA, DL, VT));
13930 SDValue New1 =
13931 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
13932 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
13933}
13934
13935// add (zext, zext) -> zext (add (zext, zext))
13936// sub (zext, zext) -> sext (sub (zext, zext))
13937// mul (zext, zext) -> zext (mul (zext, zext))
13938// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13939// udiv (zext, zext) -> zext (udiv (zext, zext))
13940// srem (zext, zext) -> zext (srem (zext, zext))
13941// urem (zext, zext) -> zext (urem (zext, zext))
13942//
13943// where the sum of the extend widths match, and the the range of the bin op
13944// fits inside the width of the narrower bin op. (For profitability on rvv, we
13945// use a power of two for both inner and outer extend.)
13947
13948 EVT VT = N->getValueType(0);
13949 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13950 return SDValue();
13951
13952 SDValue N0 = N->getOperand(0);
13953 SDValue N1 = N->getOperand(1);
13955 return SDValue();
13956 if (!N0.hasOneUse() || !N1.hasOneUse())
13957 return SDValue();
13958
13959 SDValue Src0 = N0.getOperand(0);
13960 SDValue Src1 = N1.getOperand(0);
13961 EVT SrcVT = Src0.getValueType();
13962 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13963 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13964 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13965 return SDValue();
13966
13967 LLVMContext &C = *DAG.getContext();
13969 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13970
13971 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13972 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13973
13974 // Src0 and Src1 are zero extended, so they're always positive if signed.
13975 //
13976 // sub can produce a negative from two positive operands, so it needs sign
13977 // extended. Other nodes produce a positive from two positive operands, so
13978 // zero extend instead.
13979 unsigned OuterExtend =
13980 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13981
13982 return DAG.getNode(
13983 OuterExtend, SDLoc(N), VT,
13984 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13985}
13986
13987// Try to turn (add (xor bool, 1) -1) into (neg bool).
13989 SDValue N0 = N->getOperand(0);
13990 SDValue N1 = N->getOperand(1);
13991 EVT VT = N->getValueType(0);
13992 SDLoc DL(N);
13993
13994 // RHS should be -1.
13995 if (!isAllOnesConstant(N1))
13996 return SDValue();
13997
13998 // Look for (xor X, 1).
13999 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14000 return SDValue();
14001
14002 // First xor input should be 0 or 1.
14004 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14005 return SDValue();
14006
14007 // Emit a negate of the setcc.
14008 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14009 N0.getOperand(0));
14010}
14011
14014 const RISCVSubtarget &Subtarget) {
14015 SelectionDAG &DAG = DCI.DAG;
14016 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14017 return V;
14018 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14019 return V;
14020 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14021 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14022 return V;
14023 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14024 return V;
14025 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14026 return V;
14027 if (SDValue V = combineBinOpOfZExt(N, DAG))
14028 return V;
14029
14030 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14031 // (select lhs, rhs, cc, x, (add x, y))
14032 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14033}
14034
14035// Try to turn a sub boolean RHS and constant LHS into an addi.
14037 SDValue N0 = N->getOperand(0);
14038 SDValue N1 = N->getOperand(1);
14039 EVT VT = N->getValueType(0);
14040 SDLoc DL(N);
14041
14042 // Require a constant LHS.
14043 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14044 if (!N0C)
14045 return SDValue();
14046
14047 // All our optimizations involve subtracting 1 from the immediate and forming
14048 // an ADDI. Make sure the new immediate is valid for an ADDI.
14049 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14050 if (!ImmValMinus1.isSignedIntN(12))
14051 return SDValue();
14052
14053 SDValue NewLHS;
14054 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14055 // (sub constant, (setcc x, y, eq/neq)) ->
14056 // (add (setcc x, y, neq/eq), constant - 1)
14057 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14058 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14059 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14060 return SDValue();
14061 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14062 NewLHS =
14063 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14064 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14065 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14066 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14067 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14068 NewLHS = N1.getOperand(0);
14069 } else
14070 return SDValue();
14071
14072 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14073 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14074}
14075
14076// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14077// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14078// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14079// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14081 const RISCVSubtarget &Subtarget) {
14082 if (!Subtarget.hasStdExtZbb())
14083 return SDValue();
14084
14085 EVT VT = N->getValueType(0);
14086
14087 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14088 return SDValue();
14089
14090 SDValue N0 = N->getOperand(0);
14091 SDValue N1 = N->getOperand(1);
14092
14093 if (N0->getOpcode() != ISD::SHL)
14094 return SDValue();
14095
14096 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14097 if (!ShAmtCLeft)
14098 return SDValue();
14099 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14100
14101 if (ShiftedAmount >= 8)
14102 return SDValue();
14103
14104 SDValue LeftShiftOperand = N0->getOperand(0);
14105 SDValue RightShiftOperand = N1;
14106
14107 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14108 if (N1->getOpcode() != ISD::SRL)
14109 return SDValue();
14110 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14111 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14112 return SDValue();
14113 RightShiftOperand = N1.getOperand(0);
14114 }
14115
14116 // At least one shift should have a single use.
14117 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14118 return SDValue();
14119
14120 if (LeftShiftOperand != RightShiftOperand)
14121 return SDValue();
14122
14123 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14124 Mask <<= ShiftedAmount;
14125 // Check that X has indeed the right shape (only the Y-th bit can be set in
14126 // every byte).
14127 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14128 return SDValue();
14129
14130 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14131}
14132
14134 const RISCVSubtarget &Subtarget) {
14135 if (SDValue V = combineSubOfBoolean(N, DAG))
14136 return V;
14137
14138 EVT VT = N->getValueType(0);
14139 SDValue N0 = N->getOperand(0);
14140 SDValue N1 = N->getOperand(1);
14141 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14142 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14143 isNullConstant(N1.getOperand(1))) {
14144 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14145 if (CCVal == ISD::SETLT) {
14146 SDLoc DL(N);
14147 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14148 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14149 DAG.getConstant(ShAmt, DL, VT));
14150 }
14151 }
14152
14153 if (SDValue V = combineBinOpOfZExt(N, DAG))
14154 return V;
14155 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14156 return V;
14157
14158 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14159 // (select lhs, rhs, cc, x, (sub x, y))
14160 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14161}
14162
14163// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14164// Legalizing setcc can introduce xors like this. Doing this transform reduces
14165// the number of xors and may allow the xor to fold into a branch condition.
14167 SDValue N0 = N->getOperand(0);
14168 SDValue N1 = N->getOperand(1);
14169 bool IsAnd = N->getOpcode() == ISD::AND;
14170
14171 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14172 return SDValue();
14173
14174 if (!N0.hasOneUse() || !N1.hasOneUse())
14175 return SDValue();
14176
14177 SDValue N01 = N0.getOperand(1);
14178 SDValue N11 = N1.getOperand(1);
14179
14180 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14181 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14182 // operation is And, allow one of the Xors to use -1.
14183 if (isOneConstant(N01)) {
14184 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14185 return SDValue();
14186 } else if (isOneConstant(N11)) {
14187 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14188 if (!(IsAnd && isAllOnesConstant(N01)))
14189 return SDValue();
14190 } else
14191 return SDValue();
14192
14193 EVT VT = N->getValueType(0);
14194
14195 SDValue N00 = N0.getOperand(0);
14196 SDValue N10 = N1.getOperand(0);
14197
14198 // The LHS of the xors needs to be 0/1.
14200 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14201 return SDValue();
14202
14203 // Invert the opcode and insert a new xor.
14204 SDLoc DL(N);
14205 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14206 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14207 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14208}
14209
14210// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14211// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14212// value to an unsigned value. This will be lowered to vmax and series of
14213// vnclipu instructions later. This can be extended to other truncated types
14214// other than i8 by replacing 256 and 255 with the equivalent constants for the
14215// type.
14217 EVT VT = N->getValueType(0);
14218 SDValue N0 = N->getOperand(0);
14219 EVT SrcVT = N0.getValueType();
14220
14221 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14222 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14223 return SDValue();
14224
14225 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14226 return SDValue();
14227
14228 SDValue Cond = N0.getOperand(0);
14229 SDValue True = N0.getOperand(1);
14230 SDValue False = N0.getOperand(2);
14231
14232 if (Cond.getOpcode() != ISD::SETCC)
14233 return SDValue();
14234
14235 // FIXME: Support the version of this pattern with the select operands
14236 // swapped.
14237 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14238 if (CCVal != ISD::SETULT)
14239 return SDValue();
14240
14241 SDValue CondLHS = Cond.getOperand(0);
14242 SDValue CondRHS = Cond.getOperand(1);
14243
14244 if (CondLHS != True)
14245 return SDValue();
14246
14247 unsigned ScalarBits = VT.getScalarSizeInBits();
14248
14249 // FIXME: Support other constants.
14250 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14251 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14252 return SDValue();
14253
14254 if (False.getOpcode() != ISD::SIGN_EXTEND)
14255 return SDValue();
14256
14257 False = False.getOperand(0);
14258
14259 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14260 return SDValue();
14261
14262 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14263 if (!FalseRHSC || !FalseRHSC->isZero())
14264 return SDValue();
14265
14266 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14267 if (CCVal2 != ISD::SETGT)
14268 return SDValue();
14269
14270 // Emit the signed to unsigned saturation pattern.
14271 SDLoc DL(N);
14272 SDValue Max =
14273 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14274 SDValue Min =
14275 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14276 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14277 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14278}
14279
14281 const RISCVSubtarget &Subtarget) {
14282 SDValue N0 = N->getOperand(0);
14283 EVT VT = N->getValueType(0);
14284
14285 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14286 // extending X. This is safe since we only need the LSB after the shift and
14287 // shift amounts larger than 31 would produce poison. If we wait until
14288 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14289 // to use a BEXT instruction.
14290 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14291 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14292 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14293 SDLoc DL(N0);
14294 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14295 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14296 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14297 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14298 }
14299
14300 return combineTruncSelectToSMaxUSat(N, DAG);
14301}
14302
14303// Combines two comparison operation and logic operation to one selection
14304// operation(min, max) and logic operation. Returns new constructed Node if
14305// conditions for optimization are satisfied.
14308 const RISCVSubtarget &Subtarget) {
14309 SelectionDAG &DAG = DCI.DAG;
14310
14311 SDValue N0 = N->getOperand(0);
14312 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14313 // extending X. This is safe since we only need the LSB after the shift and
14314 // shift amounts larger than 31 would produce poison. If we wait until
14315 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14316 // to use a BEXT instruction.
14317 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14318 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14319 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14320 N0.hasOneUse()) {
14321 SDLoc DL(N);
14322 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14323 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14324 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14325 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14326 DAG.getConstant(1, DL, MVT::i64));
14327 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14328 }
14329
14330 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14331 return V;
14332 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14333 return V;
14334
14335 if (DCI.isAfterLegalizeDAG())
14336 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14337 return V;
14338
14339 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14340 // (select lhs, rhs, cc, x, (and x, y))
14341 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14342}
14343
14344// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14345// FIXME: Generalize to other binary operators with same operand.
14347 SelectionDAG &DAG) {
14348 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14349
14350 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14352 !N0.hasOneUse() || !N1.hasOneUse())
14353 return SDValue();
14354
14355 // Should have the same condition.
14356 SDValue Cond = N0.getOperand(1);
14357 if (Cond != N1.getOperand(1))
14358 return SDValue();
14359
14360 SDValue TrueV = N0.getOperand(0);
14361 SDValue FalseV = N1.getOperand(0);
14362
14363 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14364 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14365 !isOneConstant(TrueV.getOperand(1)) ||
14366 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14367 return SDValue();
14368
14369 EVT VT = N->getValueType(0);
14370 SDLoc DL(N);
14371
14372 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14373 Cond);
14374 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14375 Cond);
14376 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14377 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14378}
14379
14381 const RISCVSubtarget &Subtarget) {
14382 SelectionDAG &DAG = DCI.DAG;
14383
14384 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14385 return V;
14386 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14387 return V;
14388
14389 if (DCI.isAfterLegalizeDAG())
14390 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14391 return V;
14392
14393 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14394 // We may be able to pull a common operation out of the true and false value.
14395 SDValue N0 = N->getOperand(0);
14396 SDValue N1 = N->getOperand(1);
14397 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14398 return V;
14399 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14400 return V;
14401
14402 // fold (or (select cond, 0, y), x) ->
14403 // (select cond, x, (or x, y))
14404 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14405}
14406
14408 const RISCVSubtarget &Subtarget) {
14409 SDValue N0 = N->getOperand(0);
14410 SDValue N1 = N->getOperand(1);
14411
14412 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14413 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14414 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14415 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14416 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14417 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14418 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14419 SDLoc DL(N);
14420 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14421 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14422 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14423 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14424 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14425 }
14426
14427 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14428 // NOTE: Assumes ROL being legal means ROLW is legal.
14429 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14430 if (N0.getOpcode() == RISCVISD::SLLW &&
14432 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14433 SDLoc DL(N);
14434 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14435 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14436 }
14437
14438 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14439 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14440 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14441 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14442 if (ConstN00 && CC == ISD::SETLT) {
14443 EVT VT = N0.getValueType();
14444 SDLoc DL(N0);
14445 const APInt &Imm = ConstN00->getAPIntValue();
14446 if ((Imm + 1).isSignedIntN(12))
14447 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14448 DAG.getConstant(Imm + 1, DL, VT), CC);
14449 }
14450 }
14451
14452 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14453 return V;
14454 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14455 return V;
14456
14457 // fold (xor (select cond, 0, y), x) ->
14458 // (select cond, x, (xor x, y))
14459 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14460}
14461
14462// Try to expand a scalar multiply to a faster sequence.
14465 const RISCVSubtarget &Subtarget) {
14466
14467 EVT VT = N->getValueType(0);
14468
14469 // LI + MUL is usually smaller than the alternative sequence.
14471 return SDValue();
14472
14473 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14474 return SDValue();
14475
14476 if (VT != Subtarget.getXLenVT())
14477 return SDValue();
14478
14479 const bool HasShlAdd =
14480 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14481
14482 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14483 if (!CNode)
14484 return SDValue();
14485 uint64_t MulAmt = CNode->getZExtValue();
14486
14487 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14488 // We're adding additional uses of X here, and in principle, we should be freezing
14489 // X before doing so. However, adding freeze here causes real regressions, and no
14490 // other target properly freezes X in these cases either.
14491 SDValue X = N->getOperand(0);
14492
14493 if (HasShlAdd) {
14494 for (uint64_t Divisor : {3, 5, 9}) {
14495 if (MulAmt % Divisor != 0)
14496 continue;
14497 uint64_t MulAmt2 = MulAmt / Divisor;
14498 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14499 if (isPowerOf2_64(MulAmt2)) {
14500 SDLoc DL(N);
14501 SDValue X = N->getOperand(0);
14502 // Put the shift first if we can fold a zext into the
14503 // shift forming a slli.uw.
14504 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14505 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14506 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14507 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14508 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14509 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14510 Shl);
14511 }
14512 // Otherwise, put rhe shl second so that it can fold with following
14513 // instructions (e.g. sext or add).
14514 SDValue Mul359 =
14515 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14516 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14517 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14518 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14519 }
14520
14521 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14522 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14523 SDLoc DL(N);
14524 SDValue Mul359 =
14525 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14526 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14527 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14528 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14529 Mul359);
14530 }
14531 }
14532
14533 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14534 // shXadd. First check if this a sum of two power of 2s because that's
14535 // easy. Then count how many zeros are up to the first bit.
14536 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14537 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14538 if (ScaleShift >= 1 && ScaleShift < 4) {
14539 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14540 SDLoc DL(N);
14541 SDValue Shift1 =
14542 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14543 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14544 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14545 }
14546 }
14547
14548 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14549 // This is the two instruction form, there are also three instruction
14550 // variants we could implement. e.g.
14551 // (2^(1,2,3) * 3,5,9 + 1) << C2
14552 // 2^(C1>3) * 3,5,9 +/- 1
14553 for (uint64_t Divisor : {3, 5, 9}) {
14554 uint64_t C = MulAmt - 1;
14555 if (C <= Divisor)
14556 continue;
14557 unsigned TZ = llvm::countr_zero(C);
14558 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14559 SDLoc DL(N);
14560 SDValue Mul359 =
14561 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14562 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14563 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14564 DAG.getConstant(TZ, DL, VT), X);
14565 }
14566 }
14567
14568 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14569 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14570 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14571 if (ScaleShift >= 1 && ScaleShift < 4) {
14572 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14573 SDLoc DL(N);
14574 SDValue Shift1 =
14575 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14576 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14577 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14578 DAG.getConstant(ScaleShift, DL, VT), X));
14579 }
14580 }
14581
14582 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14583 for (uint64_t Offset : {3, 5, 9}) {
14584 if (isPowerOf2_64(MulAmt + Offset)) {
14585 SDLoc DL(N);
14586 SDValue Shift1 =
14587 DAG.getNode(ISD::SHL, DL, VT, X,
14588 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14589 SDValue Mul359 =
14590 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14591 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14592 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14593 }
14594 }
14595 }
14596
14597 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14598 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14599 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14600 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14601 SDLoc DL(N);
14602 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14603 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14604 SDValue Shift2 =
14605 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14606 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14607 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14608 }
14609
14610 if (HasShlAdd) {
14611 for (uint64_t Divisor : {3, 5, 9}) {
14612 if (MulAmt % Divisor != 0)
14613 continue;
14614 uint64_t MulAmt2 = MulAmt / Divisor;
14615 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14616 // of 25 which happen to be quite common.
14617 for (uint64_t Divisor2 : {3, 5, 9}) {
14618 if (MulAmt2 % Divisor2 != 0)
14619 continue;
14620 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14621 if (isPowerOf2_64(MulAmt3)) {
14622 SDLoc DL(N);
14623 SDValue Mul359A =
14624 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14625 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14626 SDValue Mul359B = DAG.getNode(
14627 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14628 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14629 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14630 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14631 }
14632 }
14633 }
14634 }
14635
14636 return SDValue();
14637}
14638
14639// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14640// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14641// Same for other equivalent types with other equivalent constants.
14643 EVT VT = N->getValueType(0);
14644 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14645
14646 // Do this for legal vectors unless they are i1 or i8 vectors.
14647 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14648 return SDValue();
14649
14650 if (N->getOperand(0).getOpcode() != ISD::AND ||
14651 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14652 return SDValue();
14653
14654 SDValue And = N->getOperand(0);
14655 SDValue Srl = And.getOperand(0);
14656
14657 APInt V1, V2, V3;
14658 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14659 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14661 return SDValue();
14662
14663 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14664 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14665 V3 != (HalfSize - 1))
14666 return SDValue();
14667
14668 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14669 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14670 VT.getVectorElementCount() * 2);
14671 SDLoc DL(N);
14672 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14673 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14674 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14675 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14676}
14677
14680 const RISCVSubtarget &Subtarget) {
14681 EVT VT = N->getValueType(0);
14682 if (!VT.isVector())
14683 return expandMul(N, DAG, DCI, Subtarget);
14684
14685 SDLoc DL(N);
14686 SDValue N0 = N->getOperand(0);
14687 SDValue N1 = N->getOperand(1);
14688 SDValue MulOper;
14689 unsigned AddSubOpc;
14690
14691 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14692 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14693 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14694 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14695 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14696 AddSubOpc = V->getOpcode();
14697 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14698 SDValue Opnd = V->getOperand(1);
14699 MulOper = V->getOperand(0);
14700 if (AddSubOpc == ISD::SUB)
14701 std::swap(Opnd, MulOper);
14702 if (isOneOrOneSplat(Opnd))
14703 return true;
14704 }
14705 return false;
14706 };
14707
14708 if (IsAddSubWith1(N0)) {
14709 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14710 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14711 }
14712
14713 if (IsAddSubWith1(N1)) {
14714 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14715 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14716 }
14717
14718 if (SDValue V = combineBinOpOfZExt(N, DAG))
14719 return V;
14720
14722 return V;
14723
14724 return SDValue();
14725}
14726
14727/// According to the property that indexed load/store instructions zero-extend
14728/// their indices, try to narrow the type of index operand.
14729static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14730 if (isIndexTypeSigned(IndexType))
14731 return false;
14732
14733 if (!N->hasOneUse())
14734 return false;
14735
14736 EVT VT = N.getValueType();
14737 SDLoc DL(N);
14738
14739 // In general, what we're doing here is seeing if we can sink a truncate to
14740 // a smaller element type into the expression tree building our index.
14741 // TODO: We can generalize this and handle a bunch more cases if useful.
14742
14743 // Narrow a buildvector to the narrowest element type. This requires less
14744 // work and less register pressure at high LMUL, and creates smaller constants
14745 // which may be cheaper to materialize.
14746 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14747 KnownBits Known = DAG.computeKnownBits(N);
14748 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14749 LLVMContext &C = *DAG.getContext();
14750 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14751 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14752 N = DAG.getNode(ISD::TRUNCATE, DL,
14753 VT.changeVectorElementType(ResultVT), N);
14754 return true;
14755 }
14756 }
14757
14758 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14759 if (N.getOpcode() != ISD::SHL)
14760 return false;
14761
14762 SDValue N0 = N.getOperand(0);
14763 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14765 return false;
14766 if (!N0->hasOneUse())
14767 return false;
14768
14769 APInt ShAmt;
14770 SDValue N1 = N.getOperand(1);
14771 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14772 return false;
14773
14774 SDValue Src = N0.getOperand(0);
14775 EVT SrcVT = Src.getValueType();
14776 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14777 unsigned ShAmtV = ShAmt.getZExtValue();
14778 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14779 NewElen = std::max(NewElen, 8U);
14780
14781 // Skip if NewElen is not narrower than the original extended type.
14782 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14783 return false;
14784
14785 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14786 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14787
14788 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14789 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14790 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14791 return true;
14792}
14793
14794// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14795// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14796// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14797// can become a sext.w instead of a shift pair.
14799 const RISCVSubtarget &Subtarget) {
14800 SDValue N0 = N->getOperand(0);
14801 SDValue N1 = N->getOperand(1);
14802 EVT VT = N->getValueType(0);
14803 EVT OpVT = N0.getValueType();
14804
14805 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14806 return SDValue();
14807
14808 // RHS needs to be a constant.
14809 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14810 if (!N1C)
14811 return SDValue();
14812
14813 // LHS needs to be (and X, 0xffffffff).
14814 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14815 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14816 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14817 return SDValue();
14818
14819 // Looking for an equality compare.
14820 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14821 if (!isIntEqualitySetCC(Cond))
14822 return SDValue();
14823
14824 // Don't do this if the sign bit is provably zero, it will be turned back into
14825 // an AND.
14826 APInt SignMask = APInt::getOneBitSet(64, 31);
14827 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14828 return SDValue();
14829
14830 const APInt &C1 = N1C->getAPIntValue();
14831
14832 SDLoc dl(N);
14833 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14834 // to be equal.
14835 if (C1.getActiveBits() > 32)
14836 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14837
14838 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14839 N0.getOperand(0), DAG.getValueType(MVT::i32));
14840 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14841 dl, OpVT), Cond);
14842}
14843
14844static SDValue
14846 const RISCVSubtarget &Subtarget) {
14847 SDValue Src = N->getOperand(0);
14848 EVT VT = N->getValueType(0);
14849 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14850 unsigned Opc = Src.getOpcode();
14851
14852 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14853 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14854 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14855 Subtarget.hasStdExtZfhmin())
14856 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14857 Src.getOperand(0));
14858
14859 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14860 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14861 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14862 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14863 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14864 Src.getOperand(1));
14865
14866 return SDValue();
14867}
14868
14869namespace {
14870// Forward declaration of the structure holding the necessary information to
14871// apply a combine.
14872struct CombineResult;
14873
14874enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14875/// Helper class for folding sign/zero extensions.
14876/// In particular, this class is used for the following combines:
14877/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14878/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14879/// mul | mul_vl -> vwmul(u) | vwmul_su
14880/// shl | shl_vl -> vwsll
14881/// fadd -> vfwadd | vfwadd_w
14882/// fsub -> vfwsub | vfwsub_w
14883/// fmul -> vfwmul
14884/// An object of this class represents an operand of the operation we want to
14885/// combine.
14886/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14887/// NodeExtensionHelper for `a` and one for `b`.
14888///
14889/// This class abstracts away how the extension is materialized and
14890/// how its number of users affect the combines.
14891///
14892/// In particular:
14893/// - VWADD_W is conceptually == add(op0, sext(op1))
14894/// - VWADDU_W == add(op0, zext(op1))
14895/// - VWSUB_W == sub(op0, sext(op1))
14896/// - VWSUBU_W == sub(op0, zext(op1))
14897/// - VFWADD_W == fadd(op0, fpext(op1))
14898/// - VFWSUB_W == fsub(op0, fpext(op1))
14899/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14900/// zext|sext(smaller_value).
14901struct NodeExtensionHelper {
14902 /// Records if this operand is like being zero extended.
14903 bool SupportsZExt;
14904 /// Records if this operand is like being sign extended.
14905 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14906 /// instance, a splat constant (e.g., 3), would support being both sign and
14907 /// zero extended.
14908 bool SupportsSExt;
14909 /// Records if this operand is like being floating-Point extended.
14910 bool SupportsFPExt;
14911 /// This boolean captures whether we care if this operand would still be
14912 /// around after the folding happens.
14913 bool EnforceOneUse;
14914 /// Original value that this NodeExtensionHelper represents.
14915 SDValue OrigOperand;
14916
14917 /// Get the value feeding the extension or the value itself.
14918 /// E.g., for zext(a), this would return a.
14919 SDValue getSource() const {
14920 switch (OrigOperand.getOpcode()) {
14921 case ISD::ZERO_EXTEND:
14922 case ISD::SIGN_EXTEND:
14923 case RISCVISD::VSEXT_VL:
14924 case RISCVISD::VZEXT_VL:
14926 return OrigOperand.getOperand(0);
14927 default:
14928 return OrigOperand;
14929 }
14930 }
14931
14932 /// Check if this instance represents a splat.
14933 bool isSplat() const {
14934 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14935 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14936 }
14937
14938 /// Get the extended opcode.
14939 unsigned getExtOpc(ExtKind SupportsExt) const {
14940 switch (SupportsExt) {
14941 case ExtKind::SExt:
14942 return RISCVISD::VSEXT_VL;
14943 case ExtKind::ZExt:
14944 return RISCVISD::VZEXT_VL;
14945 case ExtKind::FPExt:
14947 }
14948 llvm_unreachable("Unknown ExtKind enum");
14949 }
14950
14951 /// Get or create a value that can feed \p Root with the given extension \p
14952 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14953 /// operand. \see ::getSource().
14954 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14955 const RISCVSubtarget &Subtarget,
14956 std::optional<ExtKind> SupportsExt) const {
14957 if (!SupportsExt.has_value())
14958 return OrigOperand;
14959
14960 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14961
14962 SDValue Source = getSource();
14963 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
14964 if (Source.getValueType() == NarrowVT)
14965 return Source;
14966
14967 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
14968 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
14969 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
14970 Root->getOpcode() == RISCVISD::VFMADD_VL);
14971 return Source;
14972 }
14973
14974 unsigned ExtOpc = getExtOpc(*SupportsExt);
14975
14976 // If we need an extension, we should be changing the type.
14977 SDLoc DL(OrigOperand);
14978 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
14979 switch (OrigOperand.getOpcode()) {
14980 case ISD::ZERO_EXTEND:
14981 case ISD::SIGN_EXTEND:
14982 case RISCVISD::VSEXT_VL:
14983 case RISCVISD::VZEXT_VL:
14985 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
14986 case ISD::SPLAT_VECTOR:
14987 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14989 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14990 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14992 Source = Source.getOperand(1);
14993 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
14994 Source = Source.getOperand(0);
14995 assert(Source.getValueType() == NarrowVT.getVectorElementType());
14996 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
14997 DAG.getUNDEF(NarrowVT), Source, VL);
14998 default:
14999 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15000 // and that operand should already have the right NarrowVT so no
15001 // extension should be required at this point.
15002 llvm_unreachable("Unsupported opcode");
15003 }
15004 }
15005
15006 /// Helper function to get the narrow type for \p Root.
15007 /// The narrow type is the type of \p Root where we divided the size of each
15008 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15009 /// \pre Both the narrow type and the original type should be legal.
15010 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15011 MVT VT = Root->getSimpleValueType(0);
15012
15013 // Determine the narrow size.
15014 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15015
15016 MVT EltVT = SupportsExt == ExtKind::FPExt
15017 ? MVT::getFloatingPointVT(NarrowSize)
15018 : MVT::getIntegerVT(NarrowSize);
15019
15020 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15021 "Trying to extend something we can't represent");
15022 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15023 return NarrowVT;
15024 }
15025
15026 /// Get the opcode to materialize:
15027 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15028 static unsigned getSExtOpcode(unsigned Opcode) {
15029 switch (Opcode) {
15030 case ISD::ADD:
15031 case RISCVISD::ADD_VL:
15034 case ISD::OR:
15035 return RISCVISD::VWADD_VL;
15036 case ISD::SUB:
15037 case RISCVISD::SUB_VL:
15040 return RISCVISD::VWSUB_VL;
15041 case ISD::MUL:
15042 case RISCVISD::MUL_VL:
15043 return RISCVISD::VWMUL_VL;
15044 default:
15045 llvm_unreachable("Unexpected opcode");
15046 }
15047 }
15048
15049 /// Get the opcode to materialize:
15050 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15051 static unsigned getZExtOpcode(unsigned Opcode) {
15052 switch (Opcode) {
15053 case ISD::ADD:
15054 case RISCVISD::ADD_VL:
15057 case ISD::OR:
15058 return RISCVISD::VWADDU_VL;
15059 case ISD::SUB:
15060 case RISCVISD::SUB_VL:
15063 return RISCVISD::VWSUBU_VL;
15064 case ISD::MUL:
15065 case RISCVISD::MUL_VL:
15066 return RISCVISD::VWMULU_VL;
15067 case ISD::SHL:
15068 case RISCVISD::SHL_VL:
15069 return RISCVISD::VWSLL_VL;
15070 default:
15071 llvm_unreachable("Unexpected opcode");
15072 }
15073 }
15074
15075 /// Get the opcode to materialize:
15076 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15077 static unsigned getFPExtOpcode(unsigned Opcode) {
15078 switch (Opcode) {
15079 case RISCVISD::FADD_VL:
15081 return RISCVISD::VFWADD_VL;
15082 case RISCVISD::FSUB_VL:
15084 return RISCVISD::VFWSUB_VL;
15085 case RISCVISD::FMUL_VL:
15086 return RISCVISD::VFWMUL_VL;
15088 return RISCVISD::VFWMADD_VL;
15090 return RISCVISD::VFWMSUB_VL;
15092 return RISCVISD::VFWNMADD_VL;
15094 return RISCVISD::VFWNMSUB_VL;
15095 default:
15096 llvm_unreachable("Unexpected opcode");
15097 }
15098 }
15099
15100 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15101 /// newOpcode(a, b).
15102 static unsigned getSUOpcode(unsigned Opcode) {
15103 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15104 "SU is only supported for MUL");
15105 return RISCVISD::VWMULSU_VL;
15106 }
15107
15108 /// Get the opcode to materialize
15109 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15110 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15111 switch (Opcode) {
15112 case ISD::ADD:
15113 case RISCVISD::ADD_VL:
15114 case ISD::OR:
15115 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15117 case ISD::SUB:
15118 case RISCVISD::SUB_VL:
15119 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15121 case RISCVISD::FADD_VL:
15122 return RISCVISD::VFWADD_W_VL;
15123 case RISCVISD::FSUB_VL:
15124 return RISCVISD::VFWSUB_W_VL;
15125 default:
15126 llvm_unreachable("Unexpected opcode");
15127 }
15128 }
15129
15130 using CombineToTry = std::function<std::optional<CombineResult>(
15131 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15132 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15133 const RISCVSubtarget &)>;
15134
15135 /// Check if this node needs to be fully folded or extended for all users.
15136 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15137
15138 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15139 const RISCVSubtarget &Subtarget) {
15140 unsigned Opc = OrigOperand.getOpcode();
15141 MVT VT = OrigOperand.getSimpleValueType();
15142
15143 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15144 "Unexpected Opcode");
15145
15146 // The pasthru must be undef for tail agnostic.
15147 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15148 return;
15149
15150 // Get the scalar value.
15151 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15152 : OrigOperand.getOperand(1);
15153
15154 // See if we have enough sign bits or zero bits in the scalar to use a
15155 // widening opcode by splatting to smaller element size.
15156 unsigned EltBits = VT.getScalarSizeInBits();
15157 unsigned ScalarBits = Op.getValueSizeInBits();
15158 // If we're not getting all bits from the element, we need special handling.
15159 if (ScalarBits < EltBits) {
15160 // This should only occur on RV32.
15161 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15162 !Subtarget.is64Bit() && "Unexpected splat");
15163 // vmv.v.x sign extends narrow inputs.
15164 SupportsSExt = true;
15165
15166 // If the input is positive, then sign extend is also zero extend.
15167 if (DAG.SignBitIsZero(Op))
15168 SupportsZExt = true;
15169
15170 EnforceOneUse = false;
15171 return;
15172 }
15173
15174 unsigned NarrowSize = EltBits / 2;
15175 // If the narrow type cannot be expressed with a legal VMV,
15176 // this is not a valid candidate.
15177 if (NarrowSize < 8)
15178 return;
15179
15180 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15181 SupportsSExt = true;
15182
15183 if (DAG.MaskedValueIsZero(Op,
15184 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15185 SupportsZExt = true;
15186
15187 EnforceOneUse = false;
15188 }
15189
15190 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15191 const RISCVSubtarget &Subtarget) {
15192 // Any f16 extension will neeed zvfh
15193 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15194 return false;
15195 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15196 // zvfbfwma
15197 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15198 Root->getOpcode() != RISCVISD::VFMADD_VL))
15199 return false;
15200 return true;
15201 }
15202
15203 /// Helper method to set the various fields of this struct based on the
15204 /// type of \p Root.
15205 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15206 const RISCVSubtarget &Subtarget) {
15207 SupportsZExt = false;
15208 SupportsSExt = false;
15209 SupportsFPExt = false;
15210 EnforceOneUse = true;
15211 unsigned Opc = OrigOperand.getOpcode();
15212 // For the nodes we handle below, we end up using their inputs directly: see
15213 // getSource(). However since they either don't have a passthru or we check
15214 // that their passthru is undef, we can safely ignore their mask and VL.
15215 switch (Opc) {
15216 case ISD::ZERO_EXTEND:
15217 case ISD::SIGN_EXTEND: {
15218 MVT VT = OrigOperand.getSimpleValueType();
15219 if (!VT.isVector())
15220 break;
15221
15222 SDValue NarrowElt = OrigOperand.getOperand(0);
15223 MVT NarrowVT = NarrowElt.getSimpleValueType();
15224 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15225 if (NarrowVT.getVectorElementType() == MVT::i1)
15226 break;
15227
15228 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15229 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15230 break;
15231 }
15232 case RISCVISD::VZEXT_VL:
15233 SupportsZExt = true;
15234 break;
15235 case RISCVISD::VSEXT_VL:
15236 SupportsSExt = true;
15237 break;
15239 MVT NarrowEltVT =
15241 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15242 break;
15243 SupportsFPExt = true;
15244 break;
15245 }
15246 case ISD::SPLAT_VECTOR:
15248 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15249 break;
15250 case RISCVISD::VFMV_V_F_VL: {
15251 MVT VT = OrigOperand.getSimpleValueType();
15252
15253 if (!OrigOperand.getOperand(0).isUndef())
15254 break;
15255
15256 SDValue Op = OrigOperand.getOperand(1);
15257 if (Op.getOpcode() != ISD::FP_EXTEND)
15258 break;
15259
15260 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15261 Subtarget))
15262 break;
15263
15264 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15265 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15266 if (NarrowSize != ScalarBits)
15267 break;
15268
15269 SupportsFPExt = true;
15270 break;
15271 }
15272 default:
15273 break;
15274 }
15275 }
15276
15277 /// Check if \p Root supports any extension folding combines.
15278 static bool isSupportedRoot(const SDNode *Root,
15279 const RISCVSubtarget &Subtarget) {
15280 switch (Root->getOpcode()) {
15281 case ISD::ADD:
15282 case ISD::SUB:
15283 case ISD::MUL: {
15284 return Root->getValueType(0).isScalableVector();
15285 }
15286 case ISD::OR: {
15287 return Root->getValueType(0).isScalableVector() &&
15288 Root->getFlags().hasDisjoint();
15289 }
15290 // Vector Widening Integer Add/Sub/Mul Instructions
15291 case RISCVISD::ADD_VL:
15292 case RISCVISD::MUL_VL:
15295 case RISCVISD::SUB_VL:
15298 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15299 case RISCVISD::FADD_VL:
15300 case RISCVISD::FSUB_VL:
15301 case RISCVISD::FMUL_VL:
15304 return true;
15305 case ISD::SHL:
15306 return Root->getValueType(0).isScalableVector() &&
15307 Subtarget.hasStdExtZvbb();
15308 case RISCVISD::SHL_VL:
15309 return Subtarget.hasStdExtZvbb();
15314 return true;
15315 default:
15316 return false;
15317 }
15318 }
15319
15320 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15321 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15322 const RISCVSubtarget &Subtarget) {
15323 assert(isSupportedRoot(Root, Subtarget) &&
15324 "Trying to build an helper with an "
15325 "unsupported root");
15326 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15328 OrigOperand = Root->getOperand(OperandIdx);
15329
15330 unsigned Opc = Root->getOpcode();
15331 switch (Opc) {
15332 // We consider
15333 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15334 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15335 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15342 if (OperandIdx == 1) {
15343 SupportsZExt =
15345 SupportsSExt =
15347 SupportsFPExt =
15349 // There's no existing extension here, so we don't have to worry about
15350 // making sure it gets removed.
15351 EnforceOneUse = false;
15352 break;
15353 }
15354 [[fallthrough]];
15355 default:
15356 fillUpExtensionSupport(Root, DAG, Subtarget);
15357 break;
15358 }
15359 }
15360
15361 /// Helper function to get the Mask and VL from \p Root.
15362 static std::pair<SDValue, SDValue>
15363 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15364 const RISCVSubtarget &Subtarget) {
15365 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15366 switch (Root->getOpcode()) {
15367 case ISD::ADD:
15368 case ISD::SUB:
15369 case ISD::MUL:
15370 case ISD::OR:
15371 case ISD::SHL: {
15372 SDLoc DL(Root);
15373 MVT VT = Root->getSimpleValueType(0);
15374 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15375 }
15376 default:
15377 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15378 }
15379 }
15380
15381 /// Helper function to check if \p N is commutative with respect to the
15382 /// foldings that are supported by this class.
15383 static bool isCommutative(const SDNode *N) {
15384 switch (N->getOpcode()) {
15385 case ISD::ADD:
15386 case ISD::MUL:
15387 case ISD::OR:
15388 case RISCVISD::ADD_VL:
15389 case RISCVISD::MUL_VL:
15392 case RISCVISD::FADD_VL:
15393 case RISCVISD::FMUL_VL:
15399 return true;
15400 case ISD::SUB:
15401 case RISCVISD::SUB_VL:
15404 case RISCVISD::FSUB_VL:
15406 case ISD::SHL:
15407 case RISCVISD::SHL_VL:
15408 return false;
15409 default:
15410 llvm_unreachable("Unexpected opcode");
15411 }
15412 }
15413
15414 /// Get a list of combine to try for folding extensions in \p Root.
15415 /// Note that each returned CombineToTry function doesn't actually modify
15416 /// anything. Instead they produce an optional CombineResult that if not None,
15417 /// need to be materialized for the combine to be applied.
15418 /// \see CombineResult::materialize.
15419 /// If the related CombineToTry function returns std::nullopt, that means the
15420 /// combine didn't match.
15421 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15422};
15423
15424/// Helper structure that holds all the necessary information to materialize a
15425/// combine that does some extension folding.
15426struct CombineResult {
15427 /// Opcode to be generated when materializing the combine.
15428 unsigned TargetOpcode;
15429 // No value means no extension is needed.
15430 std::optional<ExtKind> LHSExt;
15431 std::optional<ExtKind> RHSExt;
15432 /// Root of the combine.
15433 SDNode *Root;
15434 /// LHS of the TargetOpcode.
15435 NodeExtensionHelper LHS;
15436 /// RHS of the TargetOpcode.
15437 NodeExtensionHelper RHS;
15438
15439 CombineResult(unsigned TargetOpcode, SDNode *Root,
15440 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15441 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15442 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15443 LHS(LHS), RHS(RHS) {}
15444
15445 /// Return a value that uses TargetOpcode and that can be used to replace
15446 /// Root.
15447 /// The actual replacement is *not* done in that method.
15448 SDValue materialize(SelectionDAG &DAG,
15449 const RISCVSubtarget &Subtarget) const {
15450 SDValue Mask, VL, Passthru;
15451 std::tie(Mask, VL) =
15452 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15453 switch (Root->getOpcode()) {
15454 default:
15455 Passthru = Root->getOperand(2);
15456 break;
15457 case ISD::ADD:
15458 case ISD::SUB:
15459 case ISD::MUL:
15460 case ISD::OR:
15461 case ISD::SHL:
15462 Passthru = DAG.getUNDEF(Root->getValueType(0));
15463 break;
15464 }
15465 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15466 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15467 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15468 Passthru, Mask, VL);
15469 }
15470};
15471
15472/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15473/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15474/// are zext) and LHS and RHS can be folded into Root.
15475/// AllowExtMask define which form `ext` can take in this pattern.
15476///
15477/// \note If the pattern can match with both zext and sext, the returned
15478/// CombineResult will feature the zext result.
15479///
15480/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15481/// can be used to apply the pattern.
15482static std::optional<CombineResult>
15483canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15484 const NodeExtensionHelper &RHS,
15485 uint8_t AllowExtMask, SelectionDAG &DAG,
15486 const RISCVSubtarget &Subtarget) {
15487 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15488 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15489 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15490 /*RHSExt=*/{ExtKind::ZExt});
15491 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15492 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15493 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15494 /*RHSExt=*/{ExtKind::SExt});
15495 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15496 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15497 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15498 /*RHSExt=*/{ExtKind::FPExt});
15499 return std::nullopt;
15500}
15501
15502/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15503/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15504/// are zext) and LHS and RHS can be folded into Root.
15505///
15506/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15507/// can be used to apply the pattern.
15508static std::optional<CombineResult>
15509canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15510 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15511 const RISCVSubtarget &Subtarget) {
15512 return canFoldToVWWithSameExtensionImpl(
15513 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15514 Subtarget);
15515}
15516
15517/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15518///
15519/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15520/// can be used to apply the pattern.
15521static std::optional<CombineResult>
15522canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15523 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15524 const RISCVSubtarget &Subtarget) {
15525 if (RHS.SupportsFPExt)
15526 return CombineResult(
15527 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15528 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15529
15530 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15531 // sext/zext?
15532 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15533 // purposes.
15534 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15535 return CombineResult(
15536 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15537 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15538 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15539 return CombineResult(
15540 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15541 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15542 return std::nullopt;
15543}
15544
15545/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15546///
15547/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15548/// can be used to apply the pattern.
15549static std::optional<CombineResult>
15550canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15551 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15552 const RISCVSubtarget &Subtarget) {
15553 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15554 Subtarget);
15555}
15556
15557/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15558///
15559/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15560/// can be used to apply the pattern.
15561static std::optional<CombineResult>
15562canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15563 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15564 const RISCVSubtarget &Subtarget) {
15565 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15566 Subtarget);
15567}
15568
15569/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15570///
15571/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15572/// can be used to apply the pattern.
15573static std::optional<CombineResult>
15574canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15575 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15576 const RISCVSubtarget &Subtarget) {
15577 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15578 Subtarget);
15579}
15580
15581/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15582///
15583/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15584/// can be used to apply the pattern.
15585static std::optional<CombineResult>
15586canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15587 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15588 const RISCVSubtarget &Subtarget) {
15589
15590 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15591 return std::nullopt;
15592 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15593 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15594 /*RHSExt=*/{ExtKind::ZExt});
15595}
15596
15598NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15599 SmallVector<CombineToTry> Strategies;
15600 switch (Root->getOpcode()) {
15601 case ISD::ADD:
15602 case ISD::SUB:
15603 case ISD::OR:
15604 case RISCVISD::ADD_VL:
15605 case RISCVISD::SUB_VL:
15606 case RISCVISD::FADD_VL:
15607 case RISCVISD::FSUB_VL:
15608 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15609 Strategies.push_back(canFoldToVWWithSameExtension);
15610 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15611 Strategies.push_back(canFoldToVW_W);
15612 break;
15613 case RISCVISD::FMUL_VL:
15618 Strategies.push_back(canFoldToVWWithSameExtension);
15619 break;
15620 case ISD::MUL:
15621 case RISCVISD::MUL_VL:
15622 // mul -> vwmul(u)
15623 Strategies.push_back(canFoldToVWWithSameExtension);
15624 // mul -> vwmulsu
15625 Strategies.push_back(canFoldToVW_SU);
15626 break;
15627 case ISD::SHL:
15628 case RISCVISD::SHL_VL:
15629 // shl -> vwsll
15630 Strategies.push_back(canFoldToVWWithZEXT);
15631 break;
15634 // vwadd_w|vwsub_w -> vwadd|vwsub
15635 Strategies.push_back(canFoldToVWWithSEXT);
15636 break;
15639 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15640 Strategies.push_back(canFoldToVWWithZEXT);
15641 break;
15644 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15645 Strategies.push_back(canFoldToVWWithFPEXT);
15646 break;
15647 default:
15648 llvm_unreachable("Unexpected opcode");
15649 }
15650 return Strategies;
15651}
15652} // End anonymous namespace.
15653
15654/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15655/// The supported combines are:
15656/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15657/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15658/// mul | mul_vl -> vwmul(u) | vwmul_su
15659/// shl | shl_vl -> vwsll
15660/// fadd_vl -> vfwadd | vfwadd_w
15661/// fsub_vl -> vfwsub | vfwsub_w
15662/// fmul_vl -> vfwmul
15663/// vwadd_w(u) -> vwadd(u)
15664/// vwsub_w(u) -> vwsub(u)
15665/// vfwadd_w -> vfwadd
15666/// vfwsub_w -> vfwsub
15669 const RISCVSubtarget &Subtarget) {
15670 SelectionDAG &DAG = DCI.DAG;
15671 if (DCI.isBeforeLegalize())
15672 return SDValue();
15673
15674 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15675 return SDValue();
15676
15677 SmallVector<SDNode *> Worklist;
15678 SmallSet<SDNode *, 8> Inserted;
15679 Worklist.push_back(N);
15680 Inserted.insert(N);
15681 SmallVector<CombineResult> CombinesToApply;
15682
15683 while (!Worklist.empty()) {
15684 SDNode *Root = Worklist.pop_back_val();
15685
15686 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15687 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15688 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15689 &Inserted](const NodeExtensionHelper &Op) {
15690 if (Op.needToPromoteOtherUsers()) {
15691 for (SDUse &Use : Op.OrigOperand->uses()) {
15692 SDNode *TheUser = Use.getUser();
15693 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15694 return false;
15695 // We only support the first 2 operands of FMA.
15696 if (Use.getOperandNo() >= 2)
15697 return false;
15698 if (Inserted.insert(TheUser).second)
15699 Worklist.push_back(TheUser);
15700 }
15701 }
15702 return true;
15703 };
15704
15705 // Control the compile time by limiting the number of node we look at in
15706 // total.
15707 if (Inserted.size() > ExtensionMaxWebSize)
15708 return SDValue();
15709
15711 NodeExtensionHelper::getSupportedFoldings(Root);
15712
15713 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15714 bool Matched = false;
15715 for (int Attempt = 0;
15716 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15717 ++Attempt) {
15718
15719 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15720 FoldingStrategies) {
15721 std::optional<CombineResult> Res =
15722 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15723 if (Res) {
15724 Matched = true;
15725 CombinesToApply.push_back(*Res);
15726 // All the inputs that are extended need to be folded, otherwise
15727 // we would be leaving the old input (since it is may still be used),
15728 // and the new one.
15729 if (Res->LHSExt.has_value())
15730 if (!AppendUsersIfNeeded(LHS))
15731 return SDValue();
15732 if (Res->RHSExt.has_value())
15733 if (!AppendUsersIfNeeded(RHS))
15734 return SDValue();
15735 break;
15736 }
15737 }
15738 std::swap(LHS, RHS);
15739 }
15740 // Right now we do an all or nothing approach.
15741 if (!Matched)
15742 return SDValue();
15743 }
15744 // Store the value for the replacement of the input node separately.
15745 SDValue InputRootReplacement;
15746 // We do the RAUW after we materialize all the combines, because some replaced
15747 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15748 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15749 // yet-to-be-visited CombinesToApply roots.
15751 ValuesToReplace.reserve(CombinesToApply.size());
15752 for (CombineResult Res : CombinesToApply) {
15753 SDValue NewValue = Res.materialize(DAG, Subtarget);
15754 if (!InputRootReplacement) {
15755 assert(Res.Root == N &&
15756 "First element is expected to be the current node");
15757 InputRootReplacement = NewValue;
15758 } else {
15759 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15760 }
15761 }
15762 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15763 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15764 DCI.AddToWorklist(OldNewValues.second.getNode());
15765 }
15766 return InputRootReplacement;
15767}
15768
15769// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15770// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15771// y will be the Passthru and cond will be the Mask.
15773 unsigned Opc = N->getOpcode();
15776
15777 SDValue Y = N->getOperand(0);
15778 SDValue MergeOp = N->getOperand(1);
15779 unsigned MergeOpc = MergeOp.getOpcode();
15780
15781 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15782 return SDValue();
15783
15784 SDValue X = MergeOp->getOperand(1);
15785
15786 if (!MergeOp.hasOneUse())
15787 return SDValue();
15788
15789 // Passthru should be undef
15790 SDValue Passthru = N->getOperand(2);
15791 if (!Passthru.isUndef())
15792 return SDValue();
15793
15794 // Mask should be all ones
15795 SDValue Mask = N->getOperand(3);
15796 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15797 return SDValue();
15798
15799 // False value of MergeOp should be all zeros
15800 SDValue Z = MergeOp->getOperand(2);
15801
15802 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15803 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15804 Z = Z.getOperand(1);
15805
15806 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15807 return SDValue();
15808
15809 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15810 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15811 N->getFlags());
15812}
15813
15816 const RISCVSubtarget &Subtarget) {
15817 [[maybe_unused]] unsigned Opc = N->getOpcode();
15820
15821 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15822 return V;
15823
15824 return combineVWADDSUBWSelect(N, DCI.DAG);
15825}
15826
15827// Helper function for performMemPairCombine.
15828// Try to combine the memory loads/stores LSNode1 and LSNode2
15829// into a single memory pair operation.
15831 LSBaseSDNode *LSNode2, SDValue BasePtr,
15832 uint64_t Imm) {
15834 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15835
15836 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15837 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15838 return SDValue();
15839
15841 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15842
15843 // The new operation has twice the width.
15844 MVT XLenVT = Subtarget.getXLenVT();
15845 EVT MemVT = LSNode1->getMemoryVT();
15846 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15847 MachineMemOperand *MMO = LSNode1->getMemOperand();
15849 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15850
15851 if (LSNode1->getOpcode() == ISD::LOAD) {
15852 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15853 unsigned Opcode;
15854 if (MemVT == MVT::i32)
15855 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15856 else
15857 Opcode = RISCVISD::TH_LDD;
15858
15859 SDValue Res = DAG.getMemIntrinsicNode(
15860 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15861 {LSNode1->getChain(), BasePtr,
15862 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15863 NewMemVT, NewMMO);
15864
15865 SDValue Node1 =
15866 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15867 SDValue Node2 =
15868 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15869
15870 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15871 return Node1;
15872 } else {
15873 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15874
15875 SDValue Res = DAG.getMemIntrinsicNode(
15876 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15877 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15878 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15879 NewMemVT, NewMMO);
15880
15881 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15882 return Res;
15883 }
15884}
15885
15886// Try to combine two adjacent loads/stores to a single pair instruction from
15887// the XTHeadMemPair vendor extension.
15890 SelectionDAG &DAG = DCI.DAG;
15892 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15893
15894 // Target does not support load/store pair.
15895 if (!Subtarget.hasVendorXTHeadMemPair())
15896 return SDValue();
15897
15898 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15899 EVT MemVT = LSNode1->getMemoryVT();
15900 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15901
15902 // No volatile, indexed or atomic loads/stores.
15903 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15904 return SDValue();
15905
15906 // Function to get a base + constant representation from a memory value.
15907 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15908 if (Ptr->getOpcode() == ISD::ADD)
15909 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15910 return {Ptr->getOperand(0), C1->getZExtValue()};
15911 return {Ptr, 0};
15912 };
15913
15914 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15915
15916 SDValue Chain = N->getOperand(0);
15917 for (SDUse &Use : Chain->uses()) {
15918 if (Use.getUser() != N && Use.getResNo() == 0 &&
15919 Use.getUser()->getOpcode() == N->getOpcode()) {
15920 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
15921
15922 // No volatile, indexed or atomic loads/stores.
15923 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15924 continue;
15925
15926 // Check if LSNode1 and LSNode2 have the same type and extension.
15927 if (LSNode1->getOpcode() == ISD::LOAD)
15928 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15929 cast<LoadSDNode>(LSNode1)->getExtensionType())
15930 continue;
15931
15932 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15933 continue;
15934
15935 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15936
15937 // Check if the base pointer is the same for both instruction.
15938 if (Base1 != Base2)
15939 continue;
15940
15941 // Check if the offsets match the XTHeadMemPair encoding contraints.
15942 bool Valid = false;
15943 if (MemVT == MVT::i32) {
15944 // Check for adjacent i32 values and a 2-bit index.
15945 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15946 Valid = true;
15947 } else if (MemVT == MVT::i64) {
15948 // Check for adjacent i64 values and a 2-bit index.
15949 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15950 Valid = true;
15951 }
15952
15953 if (!Valid)
15954 continue;
15955
15956 // Try to combine.
15957 if (SDValue Res =
15958 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15959 return Res;
15960 }
15961 }
15962
15963 return SDValue();
15964}
15965
15966// Fold
15967// (fp_to_int (froundeven X)) -> fcvt X, rne
15968// (fp_to_int (ftrunc X)) -> fcvt X, rtz
15969// (fp_to_int (ffloor X)) -> fcvt X, rdn
15970// (fp_to_int (fceil X)) -> fcvt X, rup
15971// (fp_to_int (fround X)) -> fcvt X, rmm
15972// (fp_to_int (frint X)) -> fcvt X
15975 const RISCVSubtarget &Subtarget) {
15976 SelectionDAG &DAG = DCI.DAG;
15977 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15978 MVT XLenVT = Subtarget.getXLenVT();
15979
15980 SDValue Src = N->getOperand(0);
15981
15982 // Don't do this for strict-fp Src.
15983 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15984 return SDValue();
15985
15986 // Ensure the FP type is legal.
15987 if (!TLI.isTypeLegal(Src.getValueType()))
15988 return SDValue();
15989
15990 // Don't do this for f16 with Zfhmin and not Zfh.
15991 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15992 return SDValue();
15993
15994 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15995 // If the result is invalid, we didn't find a foldable instruction.
15996 if (FRM == RISCVFPRndMode::Invalid)
15997 return SDValue();
15998
15999 SDLoc DL(N);
16000 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16001 EVT VT = N->getValueType(0);
16002
16003 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16004 MVT SrcVT = Src.getSimpleValueType();
16005 MVT SrcContainerVT = SrcVT;
16006 MVT ContainerVT = VT.getSimpleVT();
16007 SDValue XVal = Src.getOperand(0);
16008
16009 // For widening and narrowing conversions we just combine it into a
16010 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16011 // end up getting lowered to their appropriate pseudo instructions based on
16012 // their operand types
16013 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16014 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16015 return SDValue();
16016
16017 // Make fixed-length vectors scalable first
16018 if (SrcVT.isFixedLengthVector()) {
16019 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16020 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16021 ContainerVT =
16022 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16023 }
16024
16025 auto [Mask, VL] =
16026 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16027
16028 SDValue FpToInt;
16029 if (FRM == RISCVFPRndMode::RTZ) {
16030 // Use the dedicated trunc static rounding mode if we're truncating so we
16031 // don't need to generate calls to fsrmi/fsrm
16032 unsigned Opc =
16034 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16035 } else {
16036 unsigned Opc =
16038 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16039 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16040 }
16041
16042 // If converted from fixed-length to scalable, convert back
16043 if (VT.isFixedLengthVector())
16044 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16045
16046 return FpToInt;
16047 }
16048
16049 // Only handle XLen or i32 types. Other types narrower than XLen will
16050 // eventually be legalized to XLenVT.
16051 if (VT != MVT::i32 && VT != XLenVT)
16052 return SDValue();
16053
16054 unsigned Opc;
16055 if (VT == XLenVT)
16056 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16057 else
16059
16060 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16061 DAG.getTargetConstant(FRM, DL, XLenVT));
16062 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16063}
16064
16065// Fold
16066// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16067// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16068// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16069// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16070// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16071// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16074 const RISCVSubtarget &Subtarget) {
16075 SelectionDAG &DAG = DCI.DAG;
16076 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16077 MVT XLenVT = Subtarget.getXLenVT();
16078
16079 // Only handle XLen types. Other types narrower than XLen will eventually be
16080 // legalized to XLenVT.
16081 EVT DstVT = N->getValueType(0);
16082 if (DstVT != XLenVT)
16083 return SDValue();
16084
16085 SDValue Src = N->getOperand(0);
16086
16087 // Don't do this for strict-fp Src.
16088 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
16089 return SDValue();
16090
16091 // Ensure the FP type is also legal.
16092 if (!TLI.isTypeLegal(Src.getValueType()))
16093 return SDValue();
16094
16095 // Don't do this for f16 with Zfhmin and not Zfh.
16096 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16097 return SDValue();
16098
16099 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16100
16101 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16102 if (FRM == RISCVFPRndMode::Invalid)
16103 return SDValue();
16104
16105 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16106
16107 unsigned Opc;
16108 if (SatVT == DstVT)
16109 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16110 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16112 else
16113 return SDValue();
16114 // FIXME: Support other SatVTs by clamping before or after the conversion.
16115
16116 Src = Src.getOperand(0);
16117
16118 SDLoc DL(N);
16119 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16120 DAG.getTargetConstant(FRM, DL, XLenVT));
16121
16122 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16123 // extend.
16124 if (Opc == RISCVISD::FCVT_WU_RV64)
16125 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16126
16127 // RISC-V FP-to-int conversions saturate to the destination register size, but
16128 // don't produce 0 for nan.
16129 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16130 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16131}
16132
16133// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16134// smaller than XLenVT.
16136 const RISCVSubtarget &Subtarget) {
16137 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16138
16139 SDValue Src = N->getOperand(0);
16140 if (Src.getOpcode() != ISD::BSWAP)
16141 return SDValue();
16142
16143 EVT VT = N->getValueType(0);
16144 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16145 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16146 return SDValue();
16147
16148 SDLoc DL(N);
16149 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16150}
16151
16152// Convert from one FMA opcode to another based on whether we are negating the
16153// multiply result and/or the accumulator.
16154// NOTE: Only supports RVV operations with VL.
16155static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16156 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16157 if (NegMul) {
16158 // clang-format off
16159 switch (Opcode) {
16160 default: llvm_unreachable("Unexpected opcode");
16161 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16162 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16163 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16164 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16169 }
16170 // clang-format on
16171 }
16172
16173 // Negating the accumulator changes ADD<->SUB.
16174 if (NegAcc) {
16175 // clang-format off
16176 switch (Opcode) {
16177 default: llvm_unreachable("Unexpected opcode");
16178 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16179 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16180 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16181 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16186 }
16187 // clang-format on
16188 }
16189
16190 return Opcode;
16191}
16192
16194 // Fold FNEG_VL into FMA opcodes.
16195 // The first operand of strict-fp is chain.
16196 unsigned Offset = N->isTargetStrictFPOpcode();
16197 SDValue A = N->getOperand(0 + Offset);
16198 SDValue B = N->getOperand(1 + Offset);
16199 SDValue C = N->getOperand(2 + Offset);
16200 SDValue Mask = N->getOperand(3 + Offset);
16201 SDValue VL = N->getOperand(4 + Offset);
16202
16203 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16204 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16205 V.getOperand(2) == VL) {
16206 // Return the negated input.
16207 V = V.getOperand(0);
16208 return true;
16209 }
16210
16211 return false;
16212 };
16213
16214 bool NegA = invertIfNegative(A);
16215 bool NegB = invertIfNegative(B);
16216 bool NegC = invertIfNegative(C);
16217
16218 // If no operands are negated, we're done.
16219 if (!NegA && !NegB && !NegC)
16220 return SDValue();
16221
16222 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16223 if (N->isTargetStrictFPOpcode())
16224 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16225 {N->getOperand(0), A, B, C, Mask, VL});
16226 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16227 VL);
16228}
16229
16232 const RISCVSubtarget &Subtarget) {
16233 SelectionDAG &DAG = DCI.DAG;
16234
16236 return V;
16237
16238 // FIXME: Ignore strict opcodes for now.
16239 if (N->isTargetStrictFPOpcode())
16240 return SDValue();
16241
16242 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16243}
16244
16246 const RISCVSubtarget &Subtarget) {
16247 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16248
16249 EVT VT = N->getValueType(0);
16250
16251 if (VT != Subtarget.getXLenVT())
16252 return SDValue();
16253
16254 if (!isa<ConstantSDNode>(N->getOperand(1)))
16255 return SDValue();
16256 uint64_t ShAmt = N->getConstantOperandVal(1);
16257
16258 SDValue N0 = N->getOperand(0);
16259
16260 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16261 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16262 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16263 unsigned ExtSize =
16264 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16265 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16266 N0.getOperand(0).hasOneUse() &&
16267 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16268 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16269 if (LShAmt < ExtSize) {
16270 unsigned Size = VT.getSizeInBits();
16271 SDLoc ShlDL(N0.getOperand(0));
16272 SDValue Shl =
16273 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16274 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16275 SDLoc DL(N);
16276 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16277 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16278 }
16279 }
16280 }
16281
16282 if (ShAmt > 32 || VT != MVT::i64)
16283 return SDValue();
16284
16285 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16286 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16287 //
16288 // Also try these folds where an add or sub is in the middle.
16289 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16290 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16291 SDValue Shl;
16292 ConstantSDNode *AddC = nullptr;
16293
16294 // We might have an ADD or SUB between the SRA and SHL.
16295 bool IsAdd = N0.getOpcode() == ISD::ADD;
16296 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16297 // Other operand needs to be a constant we can modify.
16298 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16299 if (!AddC)
16300 return SDValue();
16301
16302 // AddC needs to have at least 32 trailing zeros.
16303 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16304 return SDValue();
16305
16306 // All users should be a shift by constant less than or equal to 32. This
16307 // ensures we'll do this optimization for each of them to produce an
16308 // add/sub+sext_inreg they can all share.
16309 for (SDNode *U : N0->users()) {
16310 if (U->getOpcode() != ISD::SRA ||
16311 !isa<ConstantSDNode>(U->getOperand(1)) ||
16312 U->getConstantOperandVal(1) > 32)
16313 return SDValue();
16314 }
16315
16316 Shl = N0.getOperand(IsAdd ? 0 : 1);
16317 } else {
16318 // Not an ADD or SUB.
16319 Shl = N0;
16320 }
16321
16322 // Look for a shift left by 32.
16323 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16324 Shl.getConstantOperandVal(1) != 32)
16325 return SDValue();
16326
16327 // We if we didn't look through an add/sub, then the shl should have one use.
16328 // If we did look through an add/sub, the sext_inreg we create is free so
16329 // we're only creating 2 new instructions. It's enough to only remove the
16330 // original sra+add/sub.
16331 if (!AddC && !Shl.hasOneUse())
16332 return SDValue();
16333
16334 SDLoc DL(N);
16335 SDValue In = Shl.getOperand(0);
16336
16337 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16338 // constant.
16339 if (AddC) {
16340 SDValue ShiftedAddC =
16341 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16342 if (IsAdd)
16343 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16344 else
16345 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16346 }
16347
16348 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16349 DAG.getValueType(MVT::i32));
16350 if (ShAmt == 32)
16351 return SExt;
16352
16353 return DAG.getNode(
16354 ISD::SHL, DL, MVT::i64, SExt,
16355 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16356}
16357
16358// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16359// the result is used as the conditon of a br_cc or select_cc we can invert,
16360// inverting the setcc is free, and Z is 0/1. Caller will invert the
16361// br_cc/select_cc.
16363 bool IsAnd = Cond.getOpcode() == ISD::AND;
16364 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16365 return SDValue();
16366
16367 if (!Cond.hasOneUse())
16368 return SDValue();
16369
16370 SDValue Setcc = Cond.getOperand(0);
16371 SDValue Xor = Cond.getOperand(1);
16372 // Canonicalize setcc to LHS.
16373 if (Setcc.getOpcode() != ISD::SETCC)
16374 std::swap(Setcc, Xor);
16375 // LHS should be a setcc and RHS should be an xor.
16376 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16377 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16378 return SDValue();
16379
16380 // If the condition is an And, SimplifyDemandedBits may have changed
16381 // (xor Z, 1) to (not Z).
16382 SDValue Xor1 = Xor.getOperand(1);
16383 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16384 return SDValue();
16385
16386 EVT VT = Cond.getValueType();
16387 SDValue Xor0 = Xor.getOperand(0);
16388
16389 // The LHS of the xor needs to be 0/1.
16391 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16392 return SDValue();
16393
16394 // We can only invert integer setccs.
16395 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16396 if (!SetCCOpVT.isScalarInteger())
16397 return SDValue();
16398
16399 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16400 if (ISD::isIntEqualitySetCC(CCVal)) {
16401 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16402 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16403 Setcc.getOperand(1), CCVal);
16404 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16405 // Invert (setlt 0, X) by converting to (setlt X, 1).
16406 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16407 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16408 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16409 // (setlt X, 1) by converting to (setlt 0, X).
16410 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16411 DAG.getConstant(0, SDLoc(Setcc), VT),
16412 Setcc.getOperand(0), CCVal);
16413 } else
16414 return SDValue();
16415
16416 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16417 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16418}
16419
16420// Perform common combines for BR_CC and SELECT_CC condtions.
16421static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16422 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16423 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16424
16425 // As far as arithmetic right shift always saves the sign,
16426 // shift can be omitted.
16427 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16428 // setge (sra X, N), 0 -> setge X, 0
16429 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16430 LHS.getOpcode() == ISD::SRA) {
16431 LHS = LHS.getOperand(0);
16432 return true;
16433 }
16434
16435 if (!ISD::isIntEqualitySetCC(CCVal))
16436 return false;
16437
16438 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16439 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16440 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16441 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16442 // If we're looking for eq 0 instead of ne 0, we need to invert the
16443 // condition.
16444 bool Invert = CCVal == ISD::SETEQ;
16445 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16446 if (Invert)
16447 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16448
16449 RHS = LHS.getOperand(1);
16450 LHS = LHS.getOperand(0);
16451 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16452
16453 CC = DAG.getCondCode(CCVal);
16454 return true;
16455 }
16456
16457 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16458 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16459 RHS = LHS.getOperand(1);
16460 LHS = LHS.getOperand(0);
16461 return true;
16462 }
16463
16464 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16465 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16466 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16467 SDValue LHS0 = LHS.getOperand(0);
16468 if (LHS0.getOpcode() == ISD::AND &&
16469 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16470 uint64_t Mask = LHS0.getConstantOperandVal(1);
16471 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16472 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16473 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16474 CC = DAG.getCondCode(CCVal);
16475
16476 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16477 LHS = LHS0.getOperand(0);
16478 if (ShAmt != 0)
16479 LHS =
16480 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16481 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16482 return true;
16483 }
16484 }
16485 }
16486
16487 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16488 // This can occur when legalizing some floating point comparisons.
16489 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16490 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16491 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16492 CC = DAG.getCondCode(CCVal);
16493 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16494 return true;
16495 }
16496
16497 if (isNullConstant(RHS)) {
16498 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16499 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16500 CC = DAG.getCondCode(CCVal);
16501 LHS = NewCond;
16502 return true;
16503 }
16504 }
16505
16506 return false;
16507}
16508
16509// Fold
16510// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16511// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16512// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16513// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16515 SDValue TrueVal, SDValue FalseVal,
16516 bool Swapped) {
16517 bool Commutative = true;
16518 unsigned Opc = TrueVal.getOpcode();
16519 switch (Opc) {
16520 default:
16521 return SDValue();
16522 case ISD::SHL:
16523 case ISD::SRA:
16524 case ISD::SRL:
16525 case ISD::SUB:
16526 Commutative = false;
16527 break;
16528 case ISD::ADD:
16529 case ISD::OR:
16530 case ISD::XOR:
16531 break;
16532 }
16533
16534 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16535 return SDValue();
16536
16537 unsigned OpToFold;
16538 if (FalseVal == TrueVal.getOperand(0))
16539 OpToFold = 0;
16540 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16541 OpToFold = 1;
16542 else
16543 return SDValue();
16544
16545 EVT VT = N->getValueType(0);
16546 SDLoc DL(N);
16547 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16548 EVT OtherOpVT = OtherOp.getValueType();
16549 SDValue IdentityOperand =
16550 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16551 if (!Commutative)
16552 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16553 assert(IdentityOperand && "No identity operand!");
16554
16555 if (Swapped)
16556 std::swap(OtherOp, IdentityOperand);
16557 SDValue NewSel =
16558 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16559 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16560}
16561
16562// This tries to get rid of `select` and `icmp` that are being used to handle
16563// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16565 SDValue Cond = N->getOperand(0);
16566
16567 // This represents either CTTZ or CTLZ instruction.
16568 SDValue CountZeroes;
16569
16570 SDValue ValOnZero;
16571
16572 if (Cond.getOpcode() != ISD::SETCC)
16573 return SDValue();
16574
16575 if (!isNullConstant(Cond->getOperand(1)))
16576 return SDValue();
16577
16578 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16579 if (CCVal == ISD::CondCode::SETEQ) {
16580 CountZeroes = N->getOperand(2);
16581 ValOnZero = N->getOperand(1);
16582 } else if (CCVal == ISD::CondCode::SETNE) {
16583 CountZeroes = N->getOperand(1);
16584 ValOnZero = N->getOperand(2);
16585 } else {
16586 return SDValue();
16587 }
16588
16589 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16590 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16591 CountZeroes = CountZeroes.getOperand(0);
16592
16593 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16594 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16595 CountZeroes.getOpcode() != ISD::CTLZ &&
16596 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16597 return SDValue();
16598
16599 if (!isNullConstant(ValOnZero))
16600 return SDValue();
16601
16602 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16603 if (Cond->getOperand(0) != CountZeroesArgument)
16604 return SDValue();
16605
16606 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16607 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16608 CountZeroes.getValueType(), CountZeroesArgument);
16609 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16610 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16611 CountZeroes.getValueType(), CountZeroesArgument);
16612 }
16613
16614 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16615 SDValue BitWidthMinusOne =
16616 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16617
16618 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16619 CountZeroes, BitWidthMinusOne);
16620 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16621}
16622
16624 const RISCVSubtarget &Subtarget) {
16625 SDValue Cond = N->getOperand(0);
16626 SDValue True = N->getOperand(1);
16627 SDValue False = N->getOperand(2);
16628 SDLoc DL(N);
16629 EVT VT = N->getValueType(0);
16630 EVT CondVT = Cond.getValueType();
16631
16632 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16633 return SDValue();
16634
16635 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16636 // BEXTI, where C is power of 2.
16637 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16638 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16639 SDValue LHS = Cond.getOperand(0);
16640 SDValue RHS = Cond.getOperand(1);
16641 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16642 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16643 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16644 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16645 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16646 return DAG.getSelect(DL, VT,
16647 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16648 False, True);
16649 }
16650 }
16651 return SDValue();
16652}
16653
16655 const RISCVSubtarget &Subtarget) {
16656 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16657 return Folded;
16658
16659 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16660 return V;
16661
16662 if (Subtarget.hasConditionalMoveFusion())
16663 return SDValue();
16664
16665 SDValue TrueVal = N->getOperand(1);
16666 SDValue FalseVal = N->getOperand(2);
16667 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16668 return V;
16669 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16670}
16671
16672/// If we have a build_vector where each lane is binop X, C, where C
16673/// is a constant (but not necessarily the same constant on all lanes),
16674/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16675/// We assume that materializing a constant build vector will be no more
16676/// expensive that performing O(n) binops.
16678 const RISCVSubtarget &Subtarget,
16679 const RISCVTargetLowering &TLI) {
16680 SDLoc DL(N);
16681 EVT VT = N->getValueType(0);
16682
16683 assert(!VT.isScalableVector() && "unexpected build vector");
16684
16685 if (VT.getVectorNumElements() == 1)
16686 return SDValue();
16687
16688 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16689 if (!TLI.isBinOp(Opcode))
16690 return SDValue();
16691
16692 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16693 return SDValue();
16694
16695 // This BUILD_VECTOR involves an implicit truncation, and sinking
16696 // truncates through binops is non-trivial.
16697 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16698 return SDValue();
16699
16700 SmallVector<SDValue> LHSOps;
16701 SmallVector<SDValue> RHSOps;
16702 for (SDValue Op : N->ops()) {
16703 if (Op.isUndef()) {
16704 // We can't form a divide or remainder from undef.
16705 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16706 return SDValue();
16707
16708 LHSOps.push_back(Op);
16709 RHSOps.push_back(Op);
16710 continue;
16711 }
16712
16713 // TODO: We can handle operations which have an neutral rhs value
16714 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16715 // of profit in a more explicit manner.
16716 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16717 return SDValue();
16718
16719 LHSOps.push_back(Op.getOperand(0));
16720 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16721 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16722 return SDValue();
16723 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16724 // have different LHS and RHS types.
16725 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16726 return SDValue();
16727
16728 RHSOps.push_back(Op.getOperand(1));
16729 }
16730
16731 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16732 DAG.getBuildVector(VT, DL, RHSOps));
16733}
16734
16736 const RISCVSubtarget &Subtarget,
16737 const RISCVTargetLowering &TLI) {
16738 SDValue InVec = N->getOperand(0);
16739 SDValue InVal = N->getOperand(1);
16740 SDValue EltNo = N->getOperand(2);
16741 SDLoc DL(N);
16742
16743 EVT VT = InVec.getValueType();
16744 if (VT.isScalableVector())
16745 return SDValue();
16746
16747 if (!InVec.hasOneUse())
16748 return SDValue();
16749
16750 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16751 // move the insert_vector_elts into the arms of the binop. Note that
16752 // the new RHS must be a constant.
16753 const unsigned InVecOpcode = InVec->getOpcode();
16754 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16755 InVal.hasOneUse()) {
16756 SDValue InVecLHS = InVec->getOperand(0);
16757 SDValue InVecRHS = InVec->getOperand(1);
16758 SDValue InValLHS = InVal->getOperand(0);
16759 SDValue InValRHS = InVal->getOperand(1);
16760
16762 return SDValue();
16763 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16764 return SDValue();
16765 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16766 // have different LHS and RHS types.
16767 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16768 return SDValue();
16770 InVecLHS, InValLHS, EltNo);
16772 InVecRHS, InValRHS, EltNo);
16773 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16774 }
16775
16776 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16777 // move the insert_vector_elt to the source operand of the concat_vector.
16778 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16779 return SDValue();
16780
16781 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16782 if (!IndexC)
16783 return SDValue();
16784 unsigned Elt = IndexC->getZExtValue();
16785
16786 EVT ConcatVT = InVec.getOperand(0).getValueType();
16787 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16788 return SDValue();
16789 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16790 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16791
16792 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16793 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16794 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16795 ConcatOp, InVal, NewIdx);
16796
16797 SmallVector<SDValue> ConcatOps;
16798 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16799 ConcatOps[ConcatOpIdx] = ConcatOp;
16800 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16801}
16802
16803// If we're concatenating a series of vector loads like
16804// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16805// Then we can turn this into a strided load by widening the vector elements
16806// vlse32 p, stride=n
16808 const RISCVSubtarget &Subtarget,
16809 const RISCVTargetLowering &TLI) {
16810 SDLoc DL(N);
16811 EVT VT = N->getValueType(0);
16812
16813 // Only perform this combine on legal MVTs.
16814 if (!TLI.isTypeLegal(VT))
16815 return SDValue();
16816
16817 // TODO: Potentially extend this to scalable vectors
16818 if (VT.isScalableVector())
16819 return SDValue();
16820
16821 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16822 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16823 !SDValue(BaseLd, 0).hasOneUse())
16824 return SDValue();
16825
16826 EVT BaseLdVT = BaseLd->getValueType(0);
16827
16828 // Go through the loads and check that they're strided
16830 Lds.push_back(BaseLd);
16831 Align Align = BaseLd->getAlign();
16832 for (SDValue Op : N->ops().drop_front()) {
16833 auto *Ld = dyn_cast<LoadSDNode>(Op);
16834 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16835 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16836 Ld->getValueType(0) != BaseLdVT)
16837 return SDValue();
16838
16839 Lds.push_back(Ld);
16840
16841 // The common alignment is the most restrictive (smallest) of all the loads
16842 Align = std::min(Align, Ld->getAlign());
16843 }
16844
16845 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16846 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16847 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16848 // If the load ptrs can be decomposed into a common (Base + Index) with a
16849 // common constant stride, then return the constant stride.
16850 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16851 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16852 if (BIO1.equalBaseIndex(BIO2, DAG))
16853 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16854
16855 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16856 SDValue P1 = Ld1->getBasePtr();
16857 SDValue P2 = Ld2->getBasePtr();
16858 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16859 return {{P2.getOperand(1), false}};
16860 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16861 return {{P1.getOperand(1), true}};
16862
16863 return std::nullopt;
16864 };
16865
16866 // Get the distance between the first and second loads
16867 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16868 if (!BaseDiff)
16869 return SDValue();
16870
16871 // Check all the loads are the same distance apart
16872 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16873 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16874 return SDValue();
16875
16876 // TODO: At this point, we've successfully matched a generalized gather
16877 // load. Maybe we should emit that, and then move the specialized
16878 // matchers above and below into a DAG combine?
16879
16880 // Get the widened scalar type, e.g. v4i8 -> i64
16881 unsigned WideScalarBitWidth =
16882 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16883 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16884
16885 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16886 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16887 if (!TLI.isTypeLegal(WideVecVT))
16888 return SDValue();
16889
16890 // Check that the operation is legal
16891 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16892 return SDValue();
16893
16894 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16895 SDValue Stride =
16896 std::holds_alternative<SDValue>(StrideVariant)
16897 ? std::get<SDValue>(StrideVariant)
16898 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
16899 Lds[0]->getOffset().getValueType());
16900 if (MustNegateStride)
16901 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16902
16903 SDValue AllOneMask =
16904 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16905 DAG.getConstant(1, DL, MVT::i1));
16906
16907 uint64_t MemSize;
16908 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16909 ConstStride && ConstStride->getSExtValue() >= 0)
16910 // total size = (elsize * n) + (stride - elsize) * (n-1)
16911 // = elsize + stride * (n-1)
16912 MemSize = WideScalarVT.getSizeInBits() +
16913 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16914 else
16915 // If Stride isn't constant, then we can't know how much it will load
16917
16919 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16920 Align);
16921
16922 SDValue StridedLoad = DAG.getStridedLoadVP(
16923 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
16924 AllOneMask,
16925 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
16926
16927 for (SDValue Ld : N->ops())
16928 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16929
16930 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16931}
16932
16934 const RISCVSubtarget &Subtarget) {
16935
16936 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16937
16938 if (N->getValueType(0).isFixedLengthVector())
16939 return SDValue();
16940
16941 SDValue Addend = N->getOperand(0);
16942 SDValue MulOp = N->getOperand(1);
16943
16944 if (N->getOpcode() == RISCVISD::ADD_VL) {
16945 SDValue AddPassthruOp = N->getOperand(2);
16946 if (!AddPassthruOp.isUndef())
16947 return SDValue();
16948 }
16949
16950 auto IsVWMulOpc = [](unsigned Opc) {
16951 switch (Opc) {
16952 case RISCVISD::VWMUL_VL:
16955 return true;
16956 default:
16957 return false;
16958 }
16959 };
16960
16961 if (!IsVWMulOpc(MulOp.getOpcode()))
16962 std::swap(Addend, MulOp);
16963
16964 if (!IsVWMulOpc(MulOp.getOpcode()))
16965 return SDValue();
16966
16967 SDValue MulPassthruOp = MulOp.getOperand(2);
16968
16969 if (!MulPassthruOp.isUndef())
16970 return SDValue();
16971
16972 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
16973 const RISCVSubtarget &Subtarget) {
16974 if (N->getOpcode() == ISD::ADD) {
16975 SDLoc DL(N);
16976 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
16977 Subtarget);
16978 }
16979 return std::make_pair(N->getOperand(3), N->getOperand(4));
16980 }(N, DAG, Subtarget);
16981
16982 SDValue MulMask = MulOp.getOperand(3);
16983 SDValue MulVL = MulOp.getOperand(4);
16984
16985 if (AddMask != MulMask || AddVL != MulVL)
16986 return SDValue();
16987
16988 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
16989 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
16990 "Unexpected opcode after VWMACC_VL");
16991 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
16992 "Unexpected opcode after VWMACC_VL!");
16993 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
16994 "Unexpected opcode after VWMUL_VL!");
16995 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
16996 "Unexpected opcode after VWMUL_VL!");
16997
16998 SDLoc DL(N);
16999 EVT VT = N->getValueType(0);
17000 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17001 AddVL};
17002 return DAG.getNode(Opc, DL, VT, Ops);
17003}
17004
17006 ISD::MemIndexType &IndexType,
17008 if (!DCI.isBeforeLegalize())
17009 return false;
17010
17011 SelectionDAG &DAG = DCI.DAG;
17012 const MVT XLenVT =
17013 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17014
17015 const EVT IndexVT = Index.getValueType();
17016
17017 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17018 // mode, so anything else must be manually legalized.
17019 if (!isIndexTypeSigned(IndexType))
17020 return false;
17021
17022 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17023 // Any index legalization should first promote to XLenVT, so we don't lose
17024 // bits when scaling. This may create an illegal index type so we let
17025 // LLVM's legalization take care of the splitting.
17026 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17027 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17028 IndexVT.changeVectorElementType(XLenVT), Index);
17029 }
17030 IndexType = ISD::UNSIGNED_SCALED;
17031 return true;
17032}
17033
17034/// Match the index vector of a scatter or gather node as the shuffle mask
17035/// which performs the rearrangement if possible. Will only match if
17036/// all lanes are touched, and thus replacing the scatter or gather with
17037/// a unit strided access and shuffle is legal.
17038static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17039 SmallVector<int> &ShuffleMask) {
17040 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17041 return false;
17042 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17043 return false;
17044
17045 const unsigned ElementSize = VT.getScalarStoreSize();
17046 const unsigned NumElems = VT.getVectorNumElements();
17047
17048 // Create the shuffle mask and check all bits active
17049 assert(ShuffleMask.empty());
17050 BitVector ActiveLanes(NumElems);
17051 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17052 // TODO: We've found an active bit of UB, and could be
17053 // more aggressive here if desired.
17054 if (Index->getOperand(i)->isUndef())
17055 return false;
17056 uint64_t C = Index->getConstantOperandVal(i);
17057 if (C % ElementSize != 0)
17058 return false;
17059 C = C / ElementSize;
17060 if (C >= NumElems)
17061 return false;
17062 ShuffleMask.push_back(C);
17063 ActiveLanes.set(C);
17064 }
17065 return ActiveLanes.all();
17066}
17067
17068/// Match the index of a gather or scatter operation as an operation
17069/// with twice the element width and half the number of elements. This is
17070/// generally profitable (if legal) because these operations are linear
17071/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17072/// come out ahead.
17073static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17074 Align BaseAlign, const RISCVSubtarget &ST) {
17075 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17076 return false;
17077 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17078 return false;
17079
17080 // Attempt a doubling. If we can use a element type 4x or 8x in
17081 // size, this will happen via multiply iterations of the transform.
17082 const unsigned NumElems = VT.getVectorNumElements();
17083 if (NumElems % 2 != 0)
17084 return false;
17085
17086 const unsigned ElementSize = VT.getScalarStoreSize();
17087 const unsigned WiderElementSize = ElementSize * 2;
17088 if (WiderElementSize > ST.getELen()/8)
17089 return false;
17090
17091 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17092 return false;
17093
17094 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17095 // TODO: We've found an active bit of UB, and could be
17096 // more aggressive here if desired.
17097 if (Index->getOperand(i)->isUndef())
17098 return false;
17099 // TODO: This offset check is too strict if we support fully
17100 // misaligned memory operations.
17101 uint64_t C = Index->getConstantOperandVal(i);
17102 if (i % 2 == 0) {
17103 if (C % WiderElementSize != 0)
17104 return false;
17105 continue;
17106 }
17107 uint64_t Last = Index->getConstantOperandVal(i-1);
17108 if (C != Last + ElementSize)
17109 return false;
17110 }
17111 return true;
17112}
17113
17114// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17115// This would be benefit for the cases where X and Y are both the same value
17116// type of low precision vectors. Since the truncate would be lowered into
17117// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17118// restriction, such pattern would be expanded into a series of "vsetvli"
17119// and "vnsrl" instructions later to reach this point.
17121 SDValue Mask = N->getOperand(1);
17122 SDValue VL = N->getOperand(2);
17123
17124 bool IsVLMAX = isAllOnesConstant(VL) ||
17125 (isa<RegisterSDNode>(VL) &&
17126 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17127 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17128 Mask.getOperand(0) != VL)
17129 return SDValue();
17130
17131 auto IsTruncNode = [&](SDValue V) {
17132 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17133 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17134 };
17135
17136 SDValue Op = N->getOperand(0);
17137
17138 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17139 // to distinguish such pattern.
17140 while (IsTruncNode(Op)) {
17141 if (!Op.hasOneUse())
17142 return SDValue();
17143 Op = Op.getOperand(0);
17144 }
17145
17146 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17147 return SDValue();
17148
17149 SDValue N0 = Op.getOperand(0);
17150 SDValue N1 = Op.getOperand(1);
17151 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17152 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17153 return SDValue();
17154
17155 SDValue N00 = N0.getOperand(0);
17156 SDValue N10 = N1.getOperand(0);
17157 if (!N00.getValueType().isVector() ||
17158 N00.getValueType() != N10.getValueType() ||
17159 N->getValueType(0) != N10.getValueType())
17160 return SDValue();
17161
17162 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17163 SDValue SMin =
17164 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17165 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17166 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17167}
17168
17169// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17170// maximum value for the truncated type.
17171// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17172// is the signed maximum value for the truncated type and C2 is the signed
17173// minimum value.
17175 const RISCVSubtarget &Subtarget) {
17176 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17177
17178 MVT VT = N->getSimpleValueType(0);
17179
17180 SDValue Mask = N->getOperand(1);
17181 SDValue VL = N->getOperand(2);
17182
17183 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17184 APInt &SplatVal) {
17185 if (V.getOpcode() != Opc &&
17186 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17187 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17188 return SDValue();
17189
17190 SDValue Op = V.getOperand(1);
17191
17192 // Peek through conversion between fixed and scalable vectors.
17193 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17194 isNullConstant(Op.getOperand(2)) &&
17195 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17196 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17197 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17198 isNullConstant(Op.getOperand(1).getOperand(1)))
17199 Op = Op.getOperand(1).getOperand(0);
17200
17201 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17202 return V.getOperand(0);
17203
17204 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17205 Op.getOperand(2) == VL) {
17206 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17207 SplatVal =
17208 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17209 return V.getOperand(0);
17210 }
17211 }
17212
17213 return SDValue();
17214 };
17215
17216 SDLoc DL(N);
17217
17218 auto DetectUSatPattern = [&](SDValue V) {
17219 APInt LoC, HiC;
17220
17221 // Simple case, V is a UMIN.
17222 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17223 if (HiC.isMask(VT.getScalarSizeInBits()))
17224 return UMinOp;
17225
17226 // If we have an SMAX that removes negative numbers first, then we can match
17227 // SMIN instead of UMIN.
17228 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17229 if (SDValue SMaxOp =
17230 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17231 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17232 return SMinOp;
17233
17234 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17235 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17236 // first.
17237 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17238 if (SDValue SMinOp =
17239 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17240 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17241 HiC.uge(LoC))
17242 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17243 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17244 Mask, VL);
17245
17246 return SDValue();
17247 };
17248
17249 auto DetectSSatPattern = [&](SDValue V) {
17250 unsigned NumDstBits = VT.getScalarSizeInBits();
17251 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17252 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17253 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17254
17255 APInt HiC, LoC;
17256 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17257 if (SDValue SMaxOp =
17258 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17259 if (HiC == SignedMax && LoC == SignedMin)
17260 return SMaxOp;
17261
17262 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17263 if (SDValue SMinOp =
17264 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17265 if (HiC == SignedMax && LoC == SignedMin)
17266 return SMinOp;
17267
17268 return SDValue();
17269 };
17270
17271 SDValue Src = N->getOperand(0);
17272
17273 // Look through multiple layers of truncates.
17274 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17275 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17276 Src.hasOneUse())
17277 Src = Src.getOperand(0);
17278
17279 SDValue Val;
17280 unsigned ClipOpc;
17281 if ((Val = DetectUSatPattern(Src)))
17283 else if ((Val = DetectSSatPattern(Src)))
17285 else
17286 return SDValue();
17287
17288 MVT ValVT = Val.getSimpleValueType();
17289
17290 do {
17291 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17292 ValVT = ValVT.changeVectorElementType(ValEltVT);
17293 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17294 } while (ValVT != VT);
17295
17296 return Val;
17297}
17298
17299// Convert
17300// (iX ctpop (bitcast (vXi1 A)))
17301// ->
17302// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17303// FIXME: It's complicated to match all the variations of this after type
17304// legalization so we only handle the pre-type legalization pattern, but that
17305// requires the fixed vector type to be legal.
17307 const RISCVSubtarget &Subtarget) {
17308 EVT VT = N->getValueType(0);
17309 if (!VT.isScalarInteger())
17310 return SDValue();
17311
17312 SDValue Src = N->getOperand(0);
17313
17314 // Peek through zero_extend. It doesn't change the count.
17315 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17316 Src = Src.getOperand(0);
17317
17318 if (Src.getOpcode() != ISD::BITCAST)
17319 return SDValue();
17320
17321 Src = Src.getOperand(0);
17322 EVT SrcEVT = Src.getValueType();
17323 if (!SrcEVT.isSimple())
17324 return SDValue();
17325
17326 MVT SrcMVT = SrcEVT.getSimpleVT();
17327 // Make sure the input is an i1 vector.
17328 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17329 return SDValue();
17330
17331 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17332 return SDValue();
17333
17334 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17335 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17336
17337 SDLoc DL(N);
17338 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17339
17340 MVT XLenVT = Subtarget.getXLenVT();
17341 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17342 return DAG.getZExtOrTrunc(Pop, DL, VT);
17343}
17344
17346 DAGCombinerInfo &DCI) const {
17347 SelectionDAG &DAG = DCI.DAG;
17348 const MVT XLenVT = Subtarget.getXLenVT();
17349 SDLoc DL(N);
17350
17351 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17352 // bits are demanded. N will be added to the Worklist if it was not deleted.
17353 // Caller should return SDValue(N, 0) if this returns true.
17354 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17355 SDValue Op = N->getOperand(OpNo);
17356 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17357 if (!SimplifyDemandedBits(Op, Mask, DCI))
17358 return false;
17359
17360 if (N->getOpcode() != ISD::DELETED_NODE)
17361 DCI.AddToWorklist(N);
17362 return true;
17363 };
17364
17365 switch (N->getOpcode()) {
17366 default:
17367 break;
17368 case RISCVISD::SplitF64: {
17369 SDValue Op0 = N->getOperand(0);
17370 // If the input to SplitF64 is just BuildPairF64 then the operation is
17371 // redundant. Instead, use BuildPairF64's operands directly.
17372 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17373 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17374
17375 if (Op0->isUndef()) {
17376 SDValue Lo = DAG.getUNDEF(MVT::i32);
17377 SDValue Hi = DAG.getUNDEF(MVT::i32);
17378 return DCI.CombineTo(N, Lo, Hi);
17379 }
17380
17381 // It's cheaper to materialise two 32-bit integers than to load a double
17382 // from the constant pool and transfer it to integer registers through the
17383 // stack.
17384 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17385 APInt V = C->getValueAPF().bitcastToAPInt();
17386 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17387 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17388 return DCI.CombineTo(N, Lo, Hi);
17389 }
17390
17391 // This is a target-specific version of a DAGCombine performed in
17392 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17393 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17394 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17395 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17396 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17397 break;
17398 SDValue NewSplitF64 =
17399 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17400 Op0.getOperand(0));
17401 SDValue Lo = NewSplitF64.getValue(0);
17402 SDValue Hi = NewSplitF64.getValue(1);
17403 APInt SignBit = APInt::getSignMask(32);
17404 if (Op0.getOpcode() == ISD::FNEG) {
17405 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17406 DAG.getConstant(SignBit, DL, MVT::i32));
17407 return DCI.CombineTo(N, Lo, NewHi);
17408 }
17409 assert(Op0.getOpcode() == ISD::FABS);
17410 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17411 DAG.getConstant(~SignBit, DL, MVT::i32));
17412 return DCI.CombineTo(N, Lo, NewHi);
17413 }
17414 case RISCVISD::SLLW:
17415 case RISCVISD::SRAW:
17416 case RISCVISD::SRLW:
17417 case RISCVISD::RORW:
17418 case RISCVISD::ROLW: {
17419 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17420 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17421 SimplifyDemandedLowBitsHelper(1, 5))
17422 return SDValue(N, 0);
17423
17424 break;
17425 }
17426 case RISCVISD::CLZW:
17427 case RISCVISD::CTZW: {
17428 // Only the lower 32 bits of the first operand are read
17429 if (SimplifyDemandedLowBitsHelper(0, 32))
17430 return SDValue(N, 0);
17431 break;
17432 }
17434 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17435 // conversion is unnecessary and can be replaced with the
17436 // FMV_X_ANYEXTW_RV64 operand.
17437 SDValue Op0 = N->getOperand(0);
17439 return Op0.getOperand(0);
17440 break;
17441 }
17444 SDLoc DL(N);
17445 SDValue Op0 = N->getOperand(0);
17446 MVT VT = N->getSimpleValueType(0);
17447
17448 // Constant fold.
17449 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17450 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17451 return DAG.getConstant(Val, DL, VT);
17452 }
17453
17454 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17455 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17456 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17457 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17458 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17459 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17460 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17461 assert(Op0.getOperand(0).getValueType() == VT &&
17462 "Unexpected value type!");
17463 return Op0.getOperand(0);
17464 }
17465
17466 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17467 cast<LoadSDNode>(Op0)->isSimple()) {
17469 auto *LN0 = cast<LoadSDNode>(Op0);
17470 SDValue Load =
17471 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17472 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17473 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17474 return Load;
17475 }
17476
17477 // This is a target-specific version of a DAGCombine performed in
17478 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17479 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17480 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17481 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17482 !Op0.getNode()->hasOneUse())
17483 break;
17484 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17485 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17486 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17487 if (Op0.getOpcode() == ISD::FNEG)
17488 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17489 DAG.getConstant(SignBit, DL, VT));
17490
17491 assert(Op0.getOpcode() == ISD::FABS);
17492 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17493 DAG.getConstant(~SignBit, DL, VT));
17494 }
17495 case ISD::ABS: {
17496 EVT VT = N->getValueType(0);
17497 SDValue N0 = N->getOperand(0);
17498 // abs (sext) -> zext (abs)
17499 // abs (zext) -> zext (handled elsewhere)
17500 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17501 SDValue Src = N0.getOperand(0);
17502 SDLoc DL(N);
17503 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17504 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17505 }
17506 break;
17507 }
17508 case ISD::ADD: {
17509 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17510 return V;
17511 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17512 return V;
17513 return performADDCombine(N, DCI, Subtarget);
17514 }
17515 case ISD::SUB: {
17516 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17517 return V;
17518 return performSUBCombine(N, DAG, Subtarget);
17519 }
17520 case ISD::AND:
17521 return performANDCombine(N, DCI, Subtarget);
17522 case ISD::OR: {
17523 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17524 return V;
17525 return performORCombine(N, DCI, Subtarget);
17526 }
17527 case ISD::XOR:
17528 return performXORCombine(N, DAG, Subtarget);
17529 case ISD::MUL:
17530 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17531 return V;
17532 return performMULCombine(N, DAG, DCI, Subtarget);
17533 case ISD::SDIV:
17534 case ISD::UDIV:
17535 case ISD::SREM:
17536 case ISD::UREM:
17537 if (SDValue V = combineBinOpOfZExt(N, DAG))
17538 return V;
17539 break;
17540 case ISD::FMUL: {
17541 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17542 SDValue N0 = N->getOperand(0);
17543 SDValue N1 = N->getOperand(1);
17544 if (N0->getOpcode() != ISD::FCOPYSIGN)
17545 std::swap(N0, N1);
17546 if (N0->getOpcode() != ISD::FCOPYSIGN)
17547 return SDValue();
17548 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17549 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17550 return SDValue();
17551 EVT VT = N->getValueType(0);
17552 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17553 return SDValue();
17554 SDValue Sign = N0->getOperand(1);
17555 if (Sign.getValueType() != VT)
17556 return SDValue();
17557 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17558 }
17559 case ISD::FADD:
17560 case ISD::UMAX:
17561 case ISD::UMIN:
17562 case ISD::SMAX:
17563 case ISD::SMIN:
17564 case ISD::FMAXNUM:
17565 case ISD::FMINNUM: {
17566 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17567 return V;
17568 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17569 return V;
17570 return SDValue();
17571 }
17572 case ISD::SETCC:
17573 return performSETCCCombine(N, DAG, Subtarget);
17575 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17576 case ISD::ZERO_EXTEND:
17577 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17578 // type legalization. This is safe because fp_to_uint produces poison if
17579 // it overflows.
17580 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17581 SDValue Src = N->getOperand(0);
17582 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17583 isTypeLegal(Src.getOperand(0).getValueType()))
17584 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17585 Src.getOperand(0));
17586 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17587 isTypeLegal(Src.getOperand(1).getValueType())) {
17588 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17589 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17590 Src.getOperand(0), Src.getOperand(1));
17591 DCI.CombineTo(N, Res);
17592 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17593 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17594 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17595 }
17596 }
17597 return SDValue();
17599 if (SDValue V = combineTruncOfSraSext(N, DAG))
17600 return V;
17601 return combineTruncToVnclip(N, DAG, Subtarget);
17602 case ISD::TRUNCATE:
17603 return performTRUNCATECombine(N, DAG, Subtarget);
17604 case ISD::SELECT:
17605 return performSELECTCombine(N, DAG, Subtarget);
17607 case RISCVISD::CZERO_NEZ: {
17608 SDValue Val = N->getOperand(0);
17609 SDValue Cond = N->getOperand(1);
17610
17611 unsigned Opc = N->getOpcode();
17612
17613 // czero_eqz x, x -> x
17614 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17615 return Val;
17616
17617 unsigned InvOpc =
17619
17620 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17621 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17622 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17623 SDValue NewCond = Cond.getOperand(0);
17624 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17625 if (DAG.MaskedValueIsZero(NewCond, Mask))
17626 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17627 }
17628 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17629 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17630 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17631 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17632 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17633 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17634 if (ISD::isIntEqualitySetCC(CCVal))
17635 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17636 N->getValueType(0), Val, Cond.getOperand(0));
17637 }
17638 return SDValue();
17639 }
17640 case RISCVISD::SELECT_CC: {
17641 // Transform
17642 SDValue LHS = N->getOperand(0);
17643 SDValue RHS = N->getOperand(1);
17644 SDValue CC = N->getOperand(2);
17645 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17646 SDValue TrueV = N->getOperand(3);
17647 SDValue FalseV = N->getOperand(4);
17648 SDLoc DL(N);
17649 EVT VT = N->getValueType(0);
17650
17651 // If the True and False values are the same, we don't need a select_cc.
17652 if (TrueV == FalseV)
17653 return TrueV;
17654
17655 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17656 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17657 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17658 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17659 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17660 if (CCVal == ISD::CondCode::SETGE)
17661 std::swap(TrueV, FalseV);
17662
17663 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17664 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17665 // Only handle simm12, if it is not in this range, it can be considered as
17666 // register.
17667 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17668 isInt<12>(TrueSImm - FalseSImm)) {
17669 SDValue SRA =
17670 DAG.getNode(ISD::SRA, DL, VT, LHS,
17671 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17672 SDValue AND =
17673 DAG.getNode(ISD::AND, DL, VT, SRA,
17674 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17675 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17676 }
17677
17678 if (CCVal == ISD::CondCode::SETGE)
17679 std::swap(TrueV, FalseV);
17680 }
17681
17682 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17683 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17684 {LHS, RHS, CC, TrueV, FalseV});
17685
17686 if (!Subtarget.hasConditionalMoveFusion()) {
17687 // (select c, -1, y) -> -c | y
17688 if (isAllOnesConstant(TrueV)) {
17689 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17690 SDValue Neg = DAG.getNegative(C, DL, VT);
17691 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17692 }
17693 // (select c, y, -1) -> -!c | y
17694 if (isAllOnesConstant(FalseV)) {
17695 SDValue C =
17696 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17697 SDValue Neg = DAG.getNegative(C, DL, VT);
17698 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17699 }
17700
17701 // (select c, 0, y) -> -!c & y
17702 if (isNullConstant(TrueV)) {
17703 SDValue C =
17704 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17705 SDValue Neg = DAG.getNegative(C, DL, VT);
17706 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17707 }
17708 // (select c, y, 0) -> -c & y
17709 if (isNullConstant(FalseV)) {
17710 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17711 SDValue Neg = DAG.getNegative(C, DL, VT);
17712 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17713 }
17714 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17715 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17716 if (((isOneConstant(FalseV) && LHS == TrueV &&
17717 CCVal == ISD::CondCode::SETNE) ||
17718 (isOneConstant(TrueV) && LHS == FalseV &&
17719 CCVal == ISD::CondCode::SETEQ)) &&
17721 // freeze it to be safe.
17722 LHS = DAG.getFreeze(LHS);
17724 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17725 }
17726 }
17727
17728 // If both true/false are an xor with 1, pull through the select.
17729 // This can occur after op legalization if both operands are setccs that
17730 // require an xor to invert.
17731 // FIXME: Generalize to other binary ops with identical operand?
17732 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17733 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17734 isOneConstant(TrueV.getOperand(1)) &&
17735 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17736 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17737 TrueV.getOperand(0), FalseV.getOperand(0));
17738 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17739 }
17740
17741 return SDValue();
17742 }
17743 case RISCVISD::BR_CC: {
17744 SDValue LHS = N->getOperand(1);
17745 SDValue RHS = N->getOperand(2);
17746 SDValue CC = N->getOperand(3);
17747 SDLoc DL(N);
17748
17749 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17750 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17751 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17752
17753 return SDValue();
17754 }
17755 case ISD::BITREVERSE:
17756 return performBITREVERSECombine(N, DAG, Subtarget);
17757 case ISD::FP_TO_SINT:
17758 case ISD::FP_TO_UINT:
17759 return performFP_TO_INTCombine(N, DCI, Subtarget);
17762 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17763 case ISD::FCOPYSIGN: {
17764 EVT VT = N->getValueType(0);
17765 if (!VT.isVector())
17766 break;
17767 // There is a form of VFSGNJ which injects the negated sign of its second
17768 // operand. Try and bubble any FNEG up after the extend/round to produce
17769 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17770 // TRUNC=1.
17771 SDValue In2 = N->getOperand(1);
17772 // Avoid cases where the extend/round has multiple uses, as duplicating
17773 // those is typically more expensive than removing a fneg.
17774 if (!In2.hasOneUse())
17775 break;
17776 if (In2.getOpcode() != ISD::FP_EXTEND &&
17777 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17778 break;
17779 In2 = In2.getOperand(0);
17780 if (In2.getOpcode() != ISD::FNEG)
17781 break;
17782 SDLoc DL(N);
17783 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17784 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17785 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17786 }
17787 case ISD::MGATHER: {
17788 const auto *MGN = cast<MaskedGatherSDNode>(N);
17789 const EVT VT = N->getValueType(0);
17790 SDValue Index = MGN->getIndex();
17791 SDValue ScaleOp = MGN->getScale();
17792 ISD::MemIndexType IndexType = MGN->getIndexType();
17793 assert(!MGN->isIndexScaled() &&
17794 "Scaled gather/scatter should not be formed");
17795
17796 SDLoc DL(N);
17797 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17798 return DAG.getMaskedGather(
17799 N->getVTList(), MGN->getMemoryVT(), DL,
17800 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17801 MGN->getBasePtr(), Index, ScaleOp},
17802 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17803
17804 if (narrowIndex(Index, IndexType, DAG))
17805 return DAG.getMaskedGather(
17806 N->getVTList(), MGN->getMemoryVT(), DL,
17807 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17808 MGN->getBasePtr(), Index, ScaleOp},
17809 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17810
17811 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17812 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17813 // The sequence will be XLenVT, not the type of Index. Tell
17814 // isSimpleVIDSequence this so we avoid overflow.
17815 if (std::optional<VIDSequence> SimpleVID =
17816 isSimpleVIDSequence(Index, Subtarget.getXLen());
17817 SimpleVID && SimpleVID->StepDenominator == 1) {
17818 const int64_t StepNumerator = SimpleVID->StepNumerator;
17819 const int64_t Addend = SimpleVID->Addend;
17820
17821 // Note: We don't need to check alignment here since (by assumption
17822 // from the existance of the gather), our offsets must be sufficiently
17823 // aligned.
17824
17825 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17826 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17827 assert(IndexType == ISD::UNSIGNED_SCALED);
17828 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17829 DAG.getSignedConstant(Addend, DL, PtrVT));
17830
17831 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17833 SDValue StridedLoad = DAG.getStridedLoadVP(
17834 VT, DL, MGN->getChain(), BasePtr,
17835 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
17836 EVL, MGN->getMemOperand());
17837 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17838 StridedLoad, MGN->getPassThru(), EVL);
17839 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17840 DL);
17841 }
17842 }
17843
17844 SmallVector<int> ShuffleMask;
17845 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17846 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17847 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17848 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17849 MGN->getMask(), DAG.getUNDEF(VT),
17850 MGN->getMemoryVT(), MGN->getMemOperand(),
17852 SDValue Shuffle =
17853 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17854 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17855 }
17856
17857 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17858 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17859 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17860 SmallVector<SDValue> NewIndices;
17861 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17862 NewIndices.push_back(Index.getOperand(i));
17863 EVT IndexVT = Index.getValueType()
17864 .getHalfNumVectorElementsVT(*DAG.getContext());
17865 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17866
17867 unsigned ElementSize = VT.getScalarStoreSize();
17868 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17869 auto EltCnt = VT.getVectorElementCount();
17870 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17871 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17872 EltCnt.divideCoefficientBy(2));
17873 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17874 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17875 EltCnt.divideCoefficientBy(2));
17876 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17877
17878 SDValue Gather =
17879 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17880 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17881 Index, ScaleOp},
17882 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17883 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17884 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17885 }
17886 break;
17887 }
17888 case ISD::MSCATTER:{
17889 const auto *MSN = cast<MaskedScatterSDNode>(N);
17890 SDValue Index = MSN->getIndex();
17891 SDValue ScaleOp = MSN->getScale();
17892 ISD::MemIndexType IndexType = MSN->getIndexType();
17893 assert(!MSN->isIndexScaled() &&
17894 "Scaled gather/scatter should not be formed");
17895
17896 SDLoc DL(N);
17897 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17898 return DAG.getMaskedScatter(
17899 N->getVTList(), MSN->getMemoryVT(), DL,
17900 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17901 Index, ScaleOp},
17902 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17903
17904 if (narrowIndex(Index, IndexType, DAG))
17905 return DAG.getMaskedScatter(
17906 N->getVTList(), MSN->getMemoryVT(), DL,
17907 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17908 Index, ScaleOp},
17909 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17910
17911 EVT VT = MSN->getValue()->getValueType(0);
17912 SmallVector<int> ShuffleMask;
17913 if (!MSN->isTruncatingStore() &&
17914 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17915 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17916 DAG.getUNDEF(VT), ShuffleMask);
17917 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17918 DAG.getUNDEF(XLenVT), MSN->getMask(),
17919 MSN->getMemoryVT(), MSN->getMemOperand(),
17920 ISD::UNINDEXED, false);
17921 }
17922 break;
17923 }
17924 case ISD::VP_GATHER: {
17925 const auto *VPGN = cast<VPGatherSDNode>(N);
17926 SDValue Index = VPGN->getIndex();
17927 SDValue ScaleOp = VPGN->getScale();
17928 ISD::MemIndexType IndexType = VPGN->getIndexType();
17929 assert(!VPGN->isIndexScaled() &&
17930 "Scaled gather/scatter should not be formed");
17931
17932 SDLoc DL(N);
17933 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17934 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17935 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17936 ScaleOp, VPGN->getMask(),
17937 VPGN->getVectorLength()},
17938 VPGN->getMemOperand(), IndexType);
17939
17940 if (narrowIndex(Index, IndexType, DAG))
17941 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17942 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17943 ScaleOp, VPGN->getMask(),
17944 VPGN->getVectorLength()},
17945 VPGN->getMemOperand(), IndexType);
17946
17947 break;
17948 }
17949 case ISD::VP_SCATTER: {
17950 const auto *VPSN = cast<VPScatterSDNode>(N);
17951 SDValue Index = VPSN->getIndex();
17952 SDValue ScaleOp = VPSN->getScale();
17953 ISD::MemIndexType IndexType = VPSN->getIndexType();
17954 assert(!VPSN->isIndexScaled() &&
17955 "Scaled gather/scatter should not be formed");
17956
17957 SDLoc DL(N);
17958 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17959 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17960 {VPSN->getChain(), VPSN->getValue(),
17961 VPSN->getBasePtr(), Index, ScaleOp,
17962 VPSN->getMask(), VPSN->getVectorLength()},
17963 VPSN->getMemOperand(), IndexType);
17964
17965 if (narrowIndex(Index, IndexType, DAG))
17966 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17967 {VPSN->getChain(), VPSN->getValue(),
17968 VPSN->getBasePtr(), Index, ScaleOp,
17969 VPSN->getMask(), VPSN->getVectorLength()},
17970 VPSN->getMemOperand(), IndexType);
17971 break;
17972 }
17973 case RISCVISD::SHL_VL:
17974 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17975 return V;
17976 [[fallthrough]];
17977 case RISCVISD::SRA_VL:
17978 case RISCVISD::SRL_VL: {
17979 SDValue ShAmt = N->getOperand(1);
17981 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17982 SDLoc DL(N);
17983 SDValue VL = N->getOperand(4);
17984 EVT VT = N->getValueType(0);
17985 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17986 ShAmt.getOperand(1), VL);
17987 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
17988 N->getOperand(2), N->getOperand(3), N->getOperand(4));
17989 }
17990 break;
17991 }
17992 case ISD::SRA:
17993 if (SDValue V = performSRACombine(N, DAG, Subtarget))
17994 return V;
17995 [[fallthrough]];
17996 case ISD::SRL:
17997 case ISD::SHL: {
17998 if (N->getOpcode() == ISD::SHL) {
17999 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18000 return V;
18001 }
18002 SDValue ShAmt = N->getOperand(1);
18004 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18005 SDLoc DL(N);
18006 EVT VT = N->getValueType(0);
18007 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18008 ShAmt.getOperand(1),
18009 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18010 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18011 }
18012 break;
18013 }
18014 case RISCVISD::ADD_VL:
18015 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18016 return V;
18017 return combineToVWMACC(N, DAG, Subtarget);
18022 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18023 case RISCVISD::SUB_VL:
18024 case RISCVISD::MUL_VL:
18025 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18034 return performVFMADD_VLCombine(N, DCI, Subtarget);
18035 case RISCVISD::FADD_VL:
18036 case RISCVISD::FSUB_VL:
18037 case RISCVISD::FMUL_VL:
18040 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18041 case ISD::LOAD:
18042 case ISD::STORE: {
18043 if (DCI.isAfterLegalizeDAG())
18044 if (SDValue V = performMemPairCombine(N, DCI))
18045 return V;
18046
18047 if (N->getOpcode() != ISD::STORE)
18048 break;
18049
18050 auto *Store = cast<StoreSDNode>(N);
18051 SDValue Chain = Store->getChain();
18052 EVT MemVT = Store->getMemoryVT();
18053 SDValue Val = Store->getValue();
18054 SDLoc DL(N);
18055
18056 bool IsScalarizable =
18057 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18058 Store->isSimple() &&
18059 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18060 isPowerOf2_64(MemVT.getSizeInBits()) &&
18061 MemVT.getSizeInBits() <= Subtarget.getXLen();
18062
18063 // If sufficiently aligned we can scalarize stores of constant vectors of
18064 // any power-of-two size up to XLen bits, provided that they aren't too
18065 // expensive to materialize.
18066 // vsetivli zero, 2, e8, m1, ta, ma
18067 // vmv.v.i v8, 4
18068 // vse64.v v8, (a0)
18069 // ->
18070 // li a1, 1028
18071 // sh a1, 0(a0)
18072 if (DCI.isBeforeLegalize() && IsScalarizable &&
18074 // Get the constant vector bits
18075 APInt NewC(Val.getValueSizeInBits(), 0);
18076 uint64_t EltSize = Val.getScalarValueSizeInBits();
18077 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18078 if (Val.getOperand(i).isUndef())
18079 continue;
18080 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18081 i * EltSize);
18082 }
18083 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18084
18085 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18086 true) <= 2 &&
18088 NewVT, *Store->getMemOperand())) {
18089 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18090 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18091 Store->getPointerInfo(), Store->getOriginalAlign(),
18092 Store->getMemOperand()->getFlags());
18093 }
18094 }
18095
18096 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18097 // vsetivli zero, 2, e16, m1, ta, ma
18098 // vle16.v v8, (a0)
18099 // vse16.v v8, (a1)
18100 if (auto *L = dyn_cast<LoadSDNode>(Val);
18101 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18102 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18103 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18104 L->getMemoryVT() == MemVT) {
18105 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18107 NewVT, *Store->getMemOperand()) &&
18109 NewVT, *L->getMemOperand())) {
18110 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18111 L->getPointerInfo(), L->getOriginalAlign(),
18112 L->getMemOperand()->getFlags());
18113 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18114 Store->getPointerInfo(), Store->getOriginalAlign(),
18115 Store->getMemOperand()->getFlags());
18116 }
18117 }
18118
18119 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18120 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18121 // any illegal types.
18122 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18123 (DCI.isAfterLegalizeDAG() &&
18125 isNullConstant(Val.getOperand(1)))) {
18126 SDValue Src = Val.getOperand(0);
18127 MVT VecVT = Src.getSimpleValueType();
18128 // VecVT should be scalable and memory VT should match the element type.
18129 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18130 MemVT == VecVT.getVectorElementType()) {
18131 SDLoc DL(N);
18132 MVT MaskVT = getMaskTypeFor(VecVT);
18133 return DAG.getStoreVP(
18134 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18135 DAG.getConstant(1, DL, MaskVT),
18136 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18137 Store->getMemOperand(), Store->getAddressingMode(),
18138 Store->isTruncatingStore(), /*IsCompress*/ false);
18139 }
18140 }
18141
18142 break;
18143 }
18144 case ISD::SPLAT_VECTOR: {
18145 EVT VT = N->getValueType(0);
18146 // Only perform this combine on legal MVT types.
18147 if (!isTypeLegal(VT))
18148 break;
18149 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18150 DAG, Subtarget))
18151 return Gather;
18152 break;
18153 }
18154 case ISD::BUILD_VECTOR:
18155 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18156 return V;
18157 break;
18159 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18160 return V;
18161 break;
18163 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18164 return V;
18165 break;
18166 case RISCVISD::VFMV_V_F_VL: {
18167 const MVT VT = N->getSimpleValueType(0);
18168 SDValue Passthru = N->getOperand(0);
18169 SDValue Scalar = N->getOperand(1);
18170 SDValue VL = N->getOperand(2);
18171
18172 // If VL is 1, we can use vfmv.s.f.
18173 if (isOneConstant(VL))
18174 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18175 break;
18176 }
18177 case RISCVISD::VMV_V_X_VL: {
18178 const MVT VT = N->getSimpleValueType(0);
18179 SDValue Passthru = N->getOperand(0);
18180 SDValue Scalar = N->getOperand(1);
18181 SDValue VL = N->getOperand(2);
18182
18183 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18184 // scalar input.
18185 unsigned ScalarSize = Scalar.getValueSizeInBits();
18186 unsigned EltWidth = VT.getScalarSizeInBits();
18187 if (ScalarSize > EltWidth && Passthru.isUndef())
18188 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18189 return SDValue(N, 0);
18190
18191 // If VL is 1 and the scalar value won't benefit from immediate, we can
18192 // use vmv.s.x.
18193 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18194 if (isOneConstant(VL) &&
18195 (!Const || Const->isZero() ||
18196 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18197 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18198
18199 break;
18200 }
18201 case RISCVISD::VFMV_S_F_VL: {
18202 SDValue Src = N->getOperand(1);
18203 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18204 // into an undef vector.
18205 // TODO: Could use a vslide or vmv.v.v for non-undef.
18206 if (N->getOperand(0).isUndef() &&
18207 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18208 isNullConstant(Src.getOperand(1)) &&
18209 Src.getOperand(0).getValueType().isScalableVector()) {
18210 EVT VT = N->getValueType(0);
18211 EVT SrcVT = Src.getOperand(0).getValueType();
18213 // Widths match, just return the original vector.
18214 if (SrcVT == VT)
18215 return Src.getOperand(0);
18216 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18217 }
18218 [[fallthrough]];
18219 }
18220 case RISCVISD::VMV_S_X_VL: {
18221 const MVT VT = N->getSimpleValueType(0);
18222 SDValue Passthru = N->getOperand(0);
18223 SDValue Scalar = N->getOperand(1);
18224 SDValue VL = N->getOperand(2);
18225
18226 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18227 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18228 return Scalar.getOperand(0);
18229
18230 // Use M1 or smaller to avoid over constraining register allocation
18231 const MVT M1VT = getLMUL1VT(VT);
18232 if (M1VT.bitsLT(VT)) {
18233 SDValue M1Passthru =
18234 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18235 DAG.getVectorIdxConstant(0, DL));
18236 SDValue Result =
18237 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18238 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18239 DAG.getVectorIdxConstant(0, DL));
18240 return Result;
18241 }
18242
18243 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18244 // higher would involve overly constraining the register allocator for
18245 // no purpose.
18246 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18247 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18248 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18249 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18250
18251 break;
18252 }
18253 case RISCVISD::VMV_X_S: {
18254 SDValue Vec = N->getOperand(0);
18255 MVT VecVT = N->getOperand(0).getSimpleValueType();
18256 const MVT M1VT = getLMUL1VT(VecVT);
18257 if (M1VT.bitsLT(VecVT)) {
18258 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18259 DAG.getVectorIdxConstant(0, DL));
18260 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18261 }
18262 break;
18263 }
18267 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18268 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18269 switch (IntNo) {
18270 // By default we do not combine any intrinsic.
18271 default:
18272 return SDValue();
18273 case Intrinsic::riscv_vcpop:
18274 case Intrinsic::riscv_vcpop_mask:
18275 case Intrinsic::riscv_vfirst:
18276 case Intrinsic::riscv_vfirst_mask: {
18277 SDValue VL = N->getOperand(2);
18278 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18279 IntNo == Intrinsic::riscv_vfirst_mask)
18280 VL = N->getOperand(3);
18281 if (!isNullConstant(VL))
18282 return SDValue();
18283 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18284 SDLoc DL(N);
18285 EVT VT = N->getValueType(0);
18286 if (IntNo == Intrinsic::riscv_vfirst ||
18287 IntNo == Intrinsic::riscv_vfirst_mask)
18288 return DAG.getAllOnesConstant(DL, VT);
18289 return DAG.getConstant(0, DL, VT);
18290 }
18291 }
18292 }
18293 case ISD::BITCAST: {
18295 SDValue N0 = N->getOperand(0);
18296 EVT VT = N->getValueType(0);
18297 EVT SrcVT = N0.getValueType();
18298 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18299 unsigned NF = VT.getRISCVVectorTupleNumFields();
18300 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18301 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18302 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18303
18304 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18305
18306 SDValue Result = DAG.getUNDEF(VT);
18307 for (unsigned i = 0; i < NF; ++i)
18308 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18309 DAG.getVectorIdxConstant(i, DL));
18310 return Result;
18311 }
18312 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18313 // type, widen both sides to avoid a trip through memory.
18314 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18315 VT.isScalarInteger()) {
18316 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18317 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18318 Ops[0] = N0;
18319 SDLoc DL(N);
18320 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18321 N0 = DAG.getBitcast(MVT::i8, N0);
18322 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18323 }
18324
18325 return SDValue();
18326 }
18327 case ISD::CTPOP:
18328 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18329 return V;
18330 break;
18331 }
18332
18333 return SDValue();
18334}
18335
18337 EVT XVT, unsigned KeptBits) const {
18338 // For vectors, we don't have a preference..
18339 if (XVT.isVector())
18340 return false;
18341
18342 if (XVT != MVT::i32 && XVT != MVT::i64)
18343 return false;
18344
18345 // We can use sext.w for RV64 or an srai 31 on RV32.
18346 if (KeptBits == 32 || KeptBits == 64)
18347 return true;
18348
18349 // With Zbb we can use sext.h/sext.b.
18350 return Subtarget.hasStdExtZbb() &&
18351 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18352 KeptBits == 16);
18353}
18354
18356 const SDNode *N, CombineLevel Level) const {
18357 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18358 N->getOpcode() == ISD::SRL) &&
18359 "Expected shift op");
18360
18361 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18362 // materialised in fewer instructions than `(OP _, c1)`:
18363 //
18364 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18365 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18366 SDValue N0 = N->getOperand(0);
18367 EVT Ty = N0.getValueType();
18368
18369 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18370 // LD/ST, it can still complete the folding optimization operation performed
18371 // above.
18372 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18373 for (SDNode *Use : X->users()) {
18374 // This use is the one we're on right now. Skip it
18375 if (Use == User || Use->getOpcode() == ISD::SELECT)
18376 continue;
18377 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18378 return false;
18379 }
18380 return true;
18381 };
18382
18383 if (Ty.isScalarInteger() &&
18384 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18385 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18386 return isUsedByLdSt(N0.getNode(), N);
18387
18388 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18389 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18390 if (C1 && C2) {
18391 const APInt &C1Int = C1->getAPIntValue();
18392 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18393
18394 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18395 // and the combine should happen, to potentially allow further combines
18396 // later.
18397 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18398 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18399 return true;
18400
18401 // We can materialise `c1` in an add immediate, so it's "free", and the
18402 // combine should be prevented.
18403 if (C1Int.getSignificantBits() <= 64 &&
18405 return false;
18406
18407 // Neither constant will fit into an immediate, so find materialisation
18408 // costs.
18409 int C1Cost =
18410 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18411 /*CompressionCost*/ true);
18412 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18413 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18414 /*CompressionCost*/ true);
18415
18416 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18417 // combine should be prevented.
18418 if (C1Cost < ShiftedC1Cost)
18419 return false;
18420 }
18421 }
18422
18423 if (!N0->hasOneUse())
18424 return false;
18425
18426 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18427 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18428 !N0->getOperand(0)->hasOneUse())
18429 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18430
18431 return true;
18432}
18433
18435 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18436 TargetLoweringOpt &TLO) const {
18437 // Delay this optimization as late as possible.
18438 if (!TLO.LegalOps)
18439 return false;
18440
18441 EVT VT = Op.getValueType();
18442 if (VT.isVector())
18443 return false;
18444
18445 unsigned Opcode = Op.getOpcode();
18446 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18447 return false;
18448
18449 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18450 if (!C)
18451 return false;
18452
18453 const APInt &Mask = C->getAPIntValue();
18454
18455 // Clear all non-demanded bits initially.
18456 APInt ShrunkMask = Mask & DemandedBits;
18457
18458 // Try to make a smaller immediate by setting undemanded bits.
18459
18460 APInt ExpandedMask = Mask | ~DemandedBits;
18461
18462 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18463 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18464 };
18465 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18466 if (NewMask == Mask)
18467 return true;
18468 SDLoc DL(Op);
18469 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18470 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18471 Op.getOperand(0), NewC);
18472 return TLO.CombineTo(Op, NewOp);
18473 };
18474
18475 // If the shrunk mask fits in sign extended 12 bits, let the target
18476 // independent code apply it.
18477 if (ShrunkMask.isSignedIntN(12))
18478 return false;
18479
18480 // And has a few special cases for zext.
18481 if (Opcode == ISD::AND) {
18482 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18483 // otherwise use SLLI + SRLI.
18484 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18485 if (IsLegalMask(NewMask))
18486 return UseMask(NewMask);
18487
18488 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18489 if (VT == MVT::i64) {
18490 APInt NewMask = APInt(64, 0xffffffff);
18491 if (IsLegalMask(NewMask))
18492 return UseMask(NewMask);
18493 }
18494 }
18495
18496 // For the remaining optimizations, we need to be able to make a negative
18497 // number through a combination of mask and undemanded bits.
18498 if (!ExpandedMask.isNegative())
18499 return false;
18500
18501 // What is the fewest number of bits we need to represent the negative number.
18502 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18503
18504 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18505 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18506 // If we can't create a simm12, we shouldn't change opaque constants.
18507 APInt NewMask = ShrunkMask;
18508 if (MinSignedBits <= 12)
18509 NewMask.setBitsFrom(11);
18510 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18511 NewMask.setBitsFrom(31);
18512 else
18513 return false;
18514
18515 // Check that our new mask is a subset of the demanded mask.
18516 assert(IsLegalMask(NewMask));
18517 return UseMask(NewMask);
18518}
18519
18520static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18521 static const uint64_t GREVMasks[] = {
18522 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18523 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18524
18525 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18526 unsigned Shift = 1 << Stage;
18527 if (ShAmt & Shift) {
18528 uint64_t Mask = GREVMasks[Stage];
18529 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18530 if (IsGORC)
18531 Res |= x;
18532 x = Res;
18533 }
18534 }
18535
18536 return x;
18537}
18538
18540 KnownBits &Known,
18541 const APInt &DemandedElts,
18542 const SelectionDAG &DAG,
18543 unsigned Depth) const {
18544 unsigned BitWidth = Known.getBitWidth();
18545 unsigned Opc = Op.getOpcode();
18546 assert((Opc >= ISD::BUILTIN_OP_END ||
18547 Opc == ISD::INTRINSIC_WO_CHAIN ||
18548 Opc == ISD::INTRINSIC_W_CHAIN ||
18549 Opc == ISD::INTRINSIC_VOID) &&
18550 "Should use MaskedValueIsZero if you don't know whether Op"
18551 " is a target node!");
18552
18553 Known.resetAll();
18554 switch (Opc) {
18555 default: break;
18556 case RISCVISD::SELECT_CC: {
18557 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18558 // If we don't know any bits, early out.
18559 if (Known.isUnknown())
18560 break;
18561 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18562
18563 // Only known if known in both the LHS and RHS.
18564 Known = Known.intersectWith(Known2);
18565 break;
18566 }
18569 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18570 // Result is either all zero or operand 0. We can propagate zeros, but not
18571 // ones.
18572 Known.One.clearAllBits();
18573 break;
18574 case RISCVISD::REMUW: {
18575 KnownBits Known2;
18576 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18577 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18578 // We only care about the lower 32 bits.
18579 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18580 // Restore the original width by sign extending.
18581 Known = Known.sext(BitWidth);
18582 break;
18583 }
18584 case RISCVISD::DIVUW: {
18585 KnownBits Known2;
18586 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18587 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18588 // We only care about the lower 32 bits.
18589 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18590 // Restore the original width by sign extending.
18591 Known = Known.sext(BitWidth);
18592 break;
18593 }
18594 case RISCVISD::SLLW: {
18595 KnownBits Known2;
18596 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18597 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18598 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18599 // Restore the original width by sign extending.
18600 Known = Known.sext(BitWidth);
18601 break;
18602 }
18603 case RISCVISD::CTZW: {
18604 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18605 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18606 unsigned LowBits = llvm::bit_width(PossibleTZ);
18607 Known.Zero.setBitsFrom(LowBits);
18608 break;
18609 }
18610 case RISCVISD::CLZW: {
18611 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18612 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18613 unsigned LowBits = llvm::bit_width(PossibleLZ);
18614 Known.Zero.setBitsFrom(LowBits);
18615 break;
18616 }
18617 case RISCVISD::BREV8:
18618 case RISCVISD::ORC_B: {
18619 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18620 // control value of 7 is equivalent to brev8 and orc.b.
18621 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18622 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18623 // To compute zeros, we need to invert the value and invert it back after.
18624 Known.Zero =
18625 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18626 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18627 break;
18628 }
18629 case RISCVISD::READ_VLENB: {
18630 // We can use the minimum and maximum VLEN values to bound VLENB. We
18631 // know VLEN must be a power of two.
18632 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18633 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18634 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18635 Known.Zero.setLowBits(Log2_32(MinVLenB));
18636 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18637 if (MaxVLenB == MinVLenB)
18638 Known.One.setBit(Log2_32(MinVLenB));
18639 break;
18640 }
18641 case RISCVISD::FCLASS: {
18642 // fclass will only set one of the low 10 bits.
18643 Known.Zero.setBitsFrom(10);
18644 break;
18645 }
18648 unsigned IntNo =
18649 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18650 switch (IntNo) {
18651 default:
18652 // We can't do anything for most intrinsics.
18653 break;
18654 case Intrinsic::riscv_vsetvli:
18655 case Intrinsic::riscv_vsetvlimax: {
18656 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18657 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18658 RISCVII::VLMUL VLMUL =
18659 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18660 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18661 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18662 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18663 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18664
18665 // Result of vsetvli must be not larger than AVL.
18666 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18667 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18668
18669 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18670 if (BitWidth > KnownZeroFirstBit)
18671 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18672 break;
18673 }
18674 }
18675 break;
18676 }
18677 }
18678}
18679
18681 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18682 unsigned Depth) const {
18683 switch (Op.getOpcode()) {
18684 default:
18685 break;
18686 case RISCVISD::SELECT_CC: {
18687 unsigned Tmp =
18688 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18689 if (Tmp == 1) return 1; // Early out.
18690 unsigned Tmp2 =
18691 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18692 return std::min(Tmp, Tmp2);
18693 }
18696 // Output is either all zero or operand 0. We can propagate sign bit count
18697 // from operand 0.
18698 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18699 case RISCVISD::ABSW: {
18700 // We expand this at isel to negw+max. The result will have 33 sign bits
18701 // if the input has at least 33 sign bits.
18702 unsigned Tmp =
18703 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18704 if (Tmp < 33) return 1;
18705 return 33;
18706 }
18707 case RISCVISD::SLLW:
18708 case RISCVISD::SRAW:
18709 case RISCVISD::SRLW:
18710 case RISCVISD::DIVW:
18711 case RISCVISD::DIVUW:
18712 case RISCVISD::REMUW:
18713 case RISCVISD::ROLW:
18714 case RISCVISD::RORW:
18719 // TODO: As the result is sign-extended, this is conservatively correct. A
18720 // more precise answer could be calculated for SRAW depending on known
18721 // bits in the shift amount.
18722 return 33;
18723 case RISCVISD::VMV_X_S: {
18724 // The number of sign bits of the scalar result is computed by obtaining the
18725 // element type of the input vector operand, subtracting its width from the
18726 // XLEN, and then adding one (sign bit within the element type). If the
18727 // element type is wider than XLen, the least-significant XLEN bits are
18728 // taken.
18729 unsigned XLen = Subtarget.getXLen();
18730 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
18731 if (EltBits <= XLen)
18732 return XLen - EltBits + 1;
18733 break;
18734 }
18736 unsigned IntNo = Op.getConstantOperandVal(1);
18737 switch (IntNo) {
18738 default:
18739 break;
18740 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18741 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18742 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18743 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18744 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18745 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18746 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18747 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18748 case Intrinsic::riscv_masked_cmpxchg_i64:
18749 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18750 // narrow atomic operation. These are implemented using atomic
18751 // operations at the minimum supported atomicrmw/cmpxchg width whose
18752 // result is then sign extended to XLEN. With +A, the minimum width is
18753 // 32 for both 64 and 32.
18754 assert(Subtarget.getXLen() == 64);
18756 assert(Subtarget.hasStdExtA());
18757 return 33;
18758 }
18759 break;
18760 }
18761 }
18762
18763 return 1;
18764}
18765
18767 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18768 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18769
18770 // TODO: Add more target nodes.
18771 switch (Op.getOpcode()) {
18773 // Integer select_cc cannot create poison.
18774 // TODO: What are the FP poison semantics?
18775 // TODO: This instruction blocks poison from the unselected operand, can
18776 // we do anything with that?
18777 return !Op.getValueType().isInteger();
18778 }
18780 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18781}
18782
18783const Constant *
18785 assert(Ld && "Unexpected null LoadSDNode");
18786 if (!ISD::isNormalLoad(Ld))
18787 return nullptr;
18788
18789 SDValue Ptr = Ld->getBasePtr();
18790
18791 // Only constant pools with no offset are supported.
18792 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18793 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18794 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18795 CNode->getOffset() != 0)
18796 return nullptr;
18797
18798 return CNode;
18799 };
18800
18801 // Simple case, LLA.
18802 if (Ptr.getOpcode() == RISCVISD::LLA) {
18803 auto *CNode = GetSupportedConstantPool(Ptr);
18804 if (!CNode || CNode->getTargetFlags() != 0)
18805 return nullptr;
18806
18807 return CNode->getConstVal();
18808 }
18809
18810 // Look for a HI and ADD_LO pair.
18811 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18812 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18813 return nullptr;
18814
18815 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18816 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18817
18818 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18819 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18820 return nullptr;
18821
18822 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18823 return nullptr;
18824
18825 return CNodeLo->getConstVal();
18826}
18827
18829 MachineBasicBlock *BB) {
18830 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18831
18832 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18833 // Should the count have wrapped while it was being read, we need to try
18834 // again.
18835 // For example:
18836 // ```
18837 // read:
18838 // csrrs x3, counterh # load high word of counter
18839 // csrrs x2, counter # load low word of counter
18840 // csrrs x4, counterh # load high word of counter
18841 // bne x3, x4, read # check if high word reads match, otherwise try again
18842 // ```
18843
18844 MachineFunction &MF = *BB->getParent();
18845 const BasicBlock *LLVMBB = BB->getBasicBlock();
18847
18848 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
18849 MF.insert(It, LoopMBB);
18850
18851 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
18852 MF.insert(It, DoneMBB);
18853
18854 // Transfer the remainder of BB and its successor edges to DoneMBB.
18855 DoneMBB->splice(DoneMBB->begin(), BB,
18856 std::next(MachineBasicBlock::iterator(MI)), BB->end());
18858
18859 BB->addSuccessor(LoopMBB);
18860
18862 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18863 Register LoReg = MI.getOperand(0).getReg();
18864 Register HiReg = MI.getOperand(1).getReg();
18865 int64_t LoCounter = MI.getOperand(2).getImm();
18866 int64_t HiCounter = MI.getOperand(3).getImm();
18867 DebugLoc DL = MI.getDebugLoc();
18868
18870 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
18871 .addImm(HiCounter)
18872 .addReg(RISCV::X0);
18873 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
18874 .addImm(LoCounter)
18875 .addReg(RISCV::X0);
18876 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
18877 .addImm(HiCounter)
18878 .addReg(RISCV::X0);
18879
18880 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
18881 .addReg(HiReg)
18882 .addReg(ReadAgainReg)
18883 .addMBB(LoopMBB);
18884
18885 LoopMBB->addSuccessor(LoopMBB);
18886 LoopMBB->addSuccessor(DoneMBB);
18887
18888 MI.eraseFromParent();
18889
18890 return DoneMBB;
18891}
18892
18895 const RISCVSubtarget &Subtarget) {
18896 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18897
18898 MachineFunction &MF = *BB->getParent();
18899 DebugLoc DL = MI.getDebugLoc();
18902 Register LoReg = MI.getOperand(0).getReg();
18903 Register HiReg = MI.getOperand(1).getReg();
18904 Register SrcReg = MI.getOperand(2).getReg();
18905
18906 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18907 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18908
18909 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18910 RI, Register());
18912 MachineMemOperand *MMOLo =
18916 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18917 .addFrameIndex(FI)
18918 .addImm(0)
18919 .addMemOperand(MMOLo);
18920 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18921 .addFrameIndex(FI)
18922 .addImm(4)
18923 .addMemOperand(MMOHi);
18924 MI.eraseFromParent(); // The pseudo instruction is gone now.
18925 return BB;
18926}
18927
18930 const RISCVSubtarget &Subtarget) {
18931 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18932 "Unexpected instruction");
18933
18934 MachineFunction &MF = *BB->getParent();
18935 DebugLoc DL = MI.getDebugLoc();
18938 Register DstReg = MI.getOperand(0).getReg();
18939 Register LoReg = MI.getOperand(1).getReg();
18940 Register HiReg = MI.getOperand(2).getReg();
18941
18942 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18943 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18944
18946 MachineMemOperand *MMOLo =
18950 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18951 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
18952 .addFrameIndex(FI)
18953 .addImm(0)
18954 .addMemOperand(MMOLo);
18955 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18956 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
18957 .addFrameIndex(FI)
18958 .addImm(4)
18959 .addMemOperand(MMOHi);
18960 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
18961 MI.eraseFromParent(); // The pseudo instruction is gone now.
18962 return BB;
18963}
18964
18966 switch (MI.getOpcode()) {
18967 default:
18968 return false;
18969 case RISCV::Select_GPR_Using_CC_GPR:
18970 case RISCV::Select_GPR_Using_CC_Imm:
18971 case RISCV::Select_FPR16_Using_CC_GPR:
18972 case RISCV::Select_FPR16INX_Using_CC_GPR:
18973 case RISCV::Select_FPR32_Using_CC_GPR:
18974 case RISCV::Select_FPR32INX_Using_CC_GPR:
18975 case RISCV::Select_FPR64_Using_CC_GPR:
18976 case RISCV::Select_FPR64INX_Using_CC_GPR:
18977 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18978 return true;
18979 }
18980}
18981
18983 unsigned RelOpcode, unsigned EqOpcode,
18984 const RISCVSubtarget &Subtarget) {
18985 DebugLoc DL = MI.getDebugLoc();
18986 Register DstReg = MI.getOperand(0).getReg();
18987 Register Src1Reg = MI.getOperand(1).getReg();
18988 Register Src2Reg = MI.getOperand(2).getReg();
18990 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18992
18993 // Save the current FFLAGS.
18994 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
18995
18996 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
18997 .addReg(Src1Reg)
18998 .addReg(Src2Reg);
19001
19002 // Restore the FFLAGS.
19003 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19004 .addReg(SavedFFlags, RegState::Kill);
19005
19006 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19007 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19008 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19009 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19012
19013 // Erase the pseudoinstruction.
19014 MI.eraseFromParent();
19015 return BB;
19016}
19017
19018static MachineBasicBlock *
19020 MachineBasicBlock *ThisMBB,
19021 const RISCVSubtarget &Subtarget) {
19022 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19023 // Without this, custom-inserter would have generated:
19024 //
19025 // A
19026 // | \
19027 // | B
19028 // | /
19029 // C
19030 // | \
19031 // | D
19032 // | /
19033 // E
19034 //
19035 // A: X = ...; Y = ...
19036 // B: empty
19037 // C: Z = PHI [X, A], [Y, B]
19038 // D: empty
19039 // E: PHI [X, C], [Z, D]
19040 //
19041 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19042 //
19043 // A
19044 // | \
19045 // | C
19046 // | /|
19047 // |/ |
19048 // | |
19049 // | D
19050 // | /
19051 // E
19052 //
19053 // A: X = ...; Y = ...
19054 // D: empty
19055 // E: PHI [X, A], [X, C], [Y, D]
19056
19057 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19058 const DebugLoc &DL = First.getDebugLoc();
19059 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19060 MachineFunction *F = ThisMBB->getParent();
19061 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19062 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19063 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19064 MachineFunction::iterator It = ++ThisMBB->getIterator();
19065 F->insert(It, FirstMBB);
19066 F->insert(It, SecondMBB);
19067 F->insert(It, SinkMBB);
19068
19069 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19070 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19072 ThisMBB->end());
19073 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19074
19075 // Fallthrough block for ThisMBB.
19076 ThisMBB->addSuccessor(FirstMBB);
19077 // Fallthrough block for FirstMBB.
19078 FirstMBB->addSuccessor(SecondMBB);
19079 ThisMBB->addSuccessor(SinkMBB);
19080 FirstMBB->addSuccessor(SinkMBB);
19081 // This is fallthrough.
19082 SecondMBB->addSuccessor(SinkMBB);
19083
19084 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19085 Register FLHS = First.getOperand(1).getReg();
19086 Register FRHS = First.getOperand(2).getReg();
19087 // Insert appropriate branch.
19088 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19089 .addReg(FLHS)
19090 .addReg(FRHS)
19091 .addMBB(SinkMBB);
19092
19093 Register SLHS = Second.getOperand(1).getReg();
19094 Register SRHS = Second.getOperand(2).getReg();
19095 Register Op1Reg4 = First.getOperand(4).getReg();
19096 Register Op1Reg5 = First.getOperand(5).getReg();
19097
19098 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19099 // Insert appropriate branch.
19100 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19101 .addReg(SLHS)
19102 .addReg(SRHS)
19103 .addMBB(SinkMBB);
19104
19105 Register DestReg = Second.getOperand(0).getReg();
19106 Register Op2Reg4 = Second.getOperand(4).getReg();
19107 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19108 .addReg(Op2Reg4)
19109 .addMBB(ThisMBB)
19110 .addReg(Op1Reg4)
19111 .addMBB(FirstMBB)
19112 .addReg(Op1Reg5)
19113 .addMBB(SecondMBB);
19114
19115 // Now remove the Select_FPRX_s.
19116 First.eraseFromParent();
19117 Second.eraseFromParent();
19118 return SinkMBB;
19119}
19120
19123 const RISCVSubtarget &Subtarget) {
19124 // To "insert" Select_* instructions, we actually have to insert the triangle
19125 // control-flow pattern. The incoming instructions know the destination vreg
19126 // to set, the condition code register to branch on, the true/false values to
19127 // select between, and the condcode to use to select the appropriate branch.
19128 //
19129 // We produce the following control flow:
19130 // HeadMBB
19131 // | \
19132 // | IfFalseMBB
19133 // | /
19134 // TailMBB
19135 //
19136 // When we find a sequence of selects we attempt to optimize their emission
19137 // by sharing the control flow. Currently we only handle cases where we have
19138 // multiple selects with the exact same condition (same LHS, RHS and CC).
19139 // The selects may be interleaved with other instructions if the other
19140 // instructions meet some requirements we deem safe:
19141 // - They are not pseudo instructions.
19142 // - They are debug instructions. Otherwise,
19143 // - They do not have side-effects, do not access memory and their inputs do
19144 // not depend on the results of the select pseudo-instructions.
19145 // The TrueV/FalseV operands of the selects cannot depend on the result of
19146 // previous selects in the sequence.
19147 // These conditions could be further relaxed. See the X86 target for a
19148 // related approach and more information.
19149 //
19150 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19151 // is checked here and handled by a separate function -
19152 // EmitLoweredCascadedSelect.
19153
19154 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19155 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19156 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19157 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19158 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19159 Next->getOperand(5).isKill())
19160 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19161
19162 Register LHS = MI.getOperand(1).getReg();
19163 Register RHS;
19164 if (MI.getOperand(2).isReg())
19165 RHS = MI.getOperand(2).getReg();
19166 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19167
19168 SmallVector<MachineInstr *, 4> SelectDebugValues;
19169 SmallSet<Register, 4> SelectDests;
19170 SelectDests.insert(MI.getOperand(0).getReg());
19171
19172 MachineInstr *LastSelectPseudo = &MI;
19173 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19174 SequenceMBBI != E; ++SequenceMBBI) {
19175 if (SequenceMBBI->isDebugInstr())
19176 continue;
19177 if (isSelectPseudo(*SequenceMBBI)) {
19178 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19179 !SequenceMBBI->getOperand(2).isReg() ||
19180 SequenceMBBI->getOperand(2).getReg() != RHS ||
19181 SequenceMBBI->getOperand(3).getImm() != CC ||
19182 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19183 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19184 break;
19185 LastSelectPseudo = &*SequenceMBBI;
19186 SequenceMBBI->collectDebugValues(SelectDebugValues);
19187 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19188 continue;
19189 }
19190 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19191 SequenceMBBI->mayLoadOrStore() ||
19192 SequenceMBBI->usesCustomInsertionHook())
19193 break;
19194 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19195 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19196 }))
19197 break;
19198 }
19199
19200 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19201 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19202 DebugLoc DL = MI.getDebugLoc();
19204
19205 MachineBasicBlock *HeadMBB = BB;
19206 MachineFunction *F = BB->getParent();
19207 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19208 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19209
19210 F->insert(I, IfFalseMBB);
19211 F->insert(I, TailMBB);
19212
19213 // Set the call frame size on entry to the new basic blocks.
19214 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19215 IfFalseMBB->setCallFrameSize(CallFrameSize);
19216 TailMBB->setCallFrameSize(CallFrameSize);
19217
19218 // Transfer debug instructions associated with the selects to TailMBB.
19219 for (MachineInstr *DebugInstr : SelectDebugValues) {
19220 TailMBB->push_back(DebugInstr->removeFromParent());
19221 }
19222
19223 // Move all instructions after the sequence to TailMBB.
19224 TailMBB->splice(TailMBB->end(), HeadMBB,
19225 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19226 // Update machine-CFG edges by transferring all successors of the current
19227 // block to the new block which will contain the Phi nodes for the selects.
19228 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19229 // Set the successors for HeadMBB.
19230 HeadMBB->addSuccessor(IfFalseMBB);
19231 HeadMBB->addSuccessor(TailMBB);
19232
19233 // Insert appropriate branch.
19234 if (MI.getOperand(2).isImm())
19235 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19236 .addReg(LHS)
19237 .addImm(MI.getOperand(2).getImm())
19238 .addMBB(TailMBB);
19239 else
19240 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19241 .addReg(LHS)
19242 .addReg(RHS)
19243 .addMBB(TailMBB);
19244
19245 // IfFalseMBB just falls through to TailMBB.
19246 IfFalseMBB->addSuccessor(TailMBB);
19247
19248 // Create PHIs for all of the select pseudo-instructions.
19249 auto SelectMBBI = MI.getIterator();
19250 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19251 auto InsertionPoint = TailMBB->begin();
19252 while (SelectMBBI != SelectEnd) {
19253 auto Next = std::next(SelectMBBI);
19254 if (isSelectPseudo(*SelectMBBI)) {
19255 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19256 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19257 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19258 .addReg(SelectMBBI->getOperand(4).getReg())
19259 .addMBB(HeadMBB)
19260 .addReg(SelectMBBI->getOperand(5).getReg())
19261 .addMBB(IfFalseMBB);
19262 SelectMBBI->eraseFromParent();
19263 }
19264 SelectMBBI = Next;
19265 }
19266
19267 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19268 return TailMBB;
19269}
19270
19271// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19272static const RISCV::RISCVMaskedPseudoInfo *
19273lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19275 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19276 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19278 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19279 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19280 return Masked;
19281}
19282
19285 unsigned CVTXOpc) {
19286 DebugLoc DL = MI.getDebugLoc();
19287
19289
19291 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19292
19293 // Save the old value of FFLAGS.
19294 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19295
19296 assert(MI.getNumOperands() == 7);
19297
19298 // Emit a VFCVT_X_F
19299 const TargetRegisterInfo *TRI =
19301 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19302 Register Tmp = MRI.createVirtualRegister(RC);
19303 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19304 .add(MI.getOperand(1))
19305 .add(MI.getOperand(2))
19306 .add(MI.getOperand(3))
19307 .add(MachineOperand::CreateImm(7)) // frm = DYN
19308 .add(MI.getOperand(4))
19309 .add(MI.getOperand(5))
19310 .add(MI.getOperand(6))
19311 .add(MachineOperand::CreateReg(RISCV::FRM,
19312 /*IsDef*/ false,
19313 /*IsImp*/ true));
19314
19315 // Emit a VFCVT_F_X
19316 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19317 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19318 // There is no E8 variant for VFCVT_F_X.
19319 assert(Log2SEW >= 4);
19320 unsigned CVTFOpc =
19321 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19322 ->MaskedPseudo;
19323
19324 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19325 .add(MI.getOperand(0))
19326 .add(MI.getOperand(1))
19327 .addReg(Tmp)
19328 .add(MI.getOperand(3))
19329 .add(MachineOperand::CreateImm(7)) // frm = DYN
19330 .add(MI.getOperand(4))
19331 .add(MI.getOperand(5))
19332 .add(MI.getOperand(6))
19333 .add(MachineOperand::CreateReg(RISCV::FRM,
19334 /*IsDef*/ false,
19335 /*IsImp*/ true));
19336
19337 // Restore FFLAGS.
19338 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19339 .addReg(SavedFFLAGS, RegState::Kill);
19340
19341 // Erase the pseudoinstruction.
19342 MI.eraseFromParent();
19343 return BB;
19344}
19345
19347 const RISCVSubtarget &Subtarget) {
19348 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19349 const TargetRegisterClass *RC;
19350 switch (MI.getOpcode()) {
19351 default:
19352 llvm_unreachable("Unexpected opcode");
19353 case RISCV::PseudoFROUND_H:
19354 CmpOpc = RISCV::FLT_H;
19355 F2IOpc = RISCV::FCVT_W_H;
19356 I2FOpc = RISCV::FCVT_H_W;
19357 FSGNJOpc = RISCV::FSGNJ_H;
19358 FSGNJXOpc = RISCV::FSGNJX_H;
19359 RC = &RISCV::FPR16RegClass;
19360 break;
19361 case RISCV::PseudoFROUND_H_INX:
19362 CmpOpc = RISCV::FLT_H_INX;
19363 F2IOpc = RISCV::FCVT_W_H_INX;
19364 I2FOpc = RISCV::FCVT_H_W_INX;
19365 FSGNJOpc = RISCV::FSGNJ_H_INX;
19366 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19367 RC = &RISCV::GPRF16RegClass;
19368 break;
19369 case RISCV::PseudoFROUND_S:
19370 CmpOpc = RISCV::FLT_S;
19371 F2IOpc = RISCV::FCVT_W_S;
19372 I2FOpc = RISCV::FCVT_S_W;
19373 FSGNJOpc = RISCV::FSGNJ_S;
19374 FSGNJXOpc = RISCV::FSGNJX_S;
19375 RC = &RISCV::FPR32RegClass;
19376 break;
19377 case RISCV::PseudoFROUND_S_INX:
19378 CmpOpc = RISCV::FLT_S_INX;
19379 F2IOpc = RISCV::FCVT_W_S_INX;
19380 I2FOpc = RISCV::FCVT_S_W_INX;
19381 FSGNJOpc = RISCV::FSGNJ_S_INX;
19382 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19383 RC = &RISCV::GPRF32RegClass;
19384 break;
19385 case RISCV::PseudoFROUND_D:
19386 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19387 CmpOpc = RISCV::FLT_D;
19388 F2IOpc = RISCV::FCVT_L_D;
19389 I2FOpc = RISCV::FCVT_D_L;
19390 FSGNJOpc = RISCV::FSGNJ_D;
19391 FSGNJXOpc = RISCV::FSGNJX_D;
19392 RC = &RISCV::FPR64RegClass;
19393 break;
19394 case RISCV::PseudoFROUND_D_INX:
19395 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19396 CmpOpc = RISCV::FLT_D_INX;
19397 F2IOpc = RISCV::FCVT_L_D_INX;
19398 I2FOpc = RISCV::FCVT_D_L_INX;
19399 FSGNJOpc = RISCV::FSGNJ_D_INX;
19400 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19401 RC = &RISCV::GPRRegClass;
19402 break;
19403 }
19404
19405 const BasicBlock *BB = MBB->getBasicBlock();
19406 DebugLoc DL = MI.getDebugLoc();
19408
19410 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19411 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19412
19413 F->insert(I, CvtMBB);
19414 F->insert(I, DoneMBB);
19415 // Move all instructions after the sequence to DoneMBB.
19416 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19417 MBB->end());
19418 // Update machine-CFG edges by transferring all successors of the current
19419 // block to the new block which will contain the Phi nodes for the selects.
19421 // Set the successors for MBB.
19422 MBB->addSuccessor(CvtMBB);
19423 MBB->addSuccessor(DoneMBB);
19424
19425 Register DstReg = MI.getOperand(0).getReg();
19426 Register SrcReg = MI.getOperand(1).getReg();
19427 Register MaxReg = MI.getOperand(2).getReg();
19428 int64_t FRM = MI.getOperand(3).getImm();
19429
19430 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19432
19433 Register FabsReg = MRI.createVirtualRegister(RC);
19434 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19435
19436 // Compare the FP value to the max value.
19437 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19438 auto MIB =
19439 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19442
19443 // Insert branch.
19444 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19445 .addReg(CmpReg)
19446 .addReg(RISCV::X0)
19447 .addMBB(DoneMBB);
19448
19449 CvtMBB->addSuccessor(DoneMBB);
19450
19451 // Convert to integer.
19452 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19453 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19456
19457 // Convert back to FP.
19458 Register I2FReg = MRI.createVirtualRegister(RC);
19459 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19462
19463 // Restore the sign bit.
19464 Register CvtReg = MRI.createVirtualRegister(RC);
19465 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19466
19467 // Merge the results.
19468 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19469 .addReg(SrcReg)
19470 .addMBB(MBB)
19471 .addReg(CvtReg)
19472 .addMBB(CvtMBB);
19473
19474 MI.eraseFromParent();
19475 return DoneMBB;
19476}
19477
19480 MachineBasicBlock *BB) const {
19481 switch (MI.getOpcode()) {
19482 default:
19483 llvm_unreachable("Unexpected instr type to insert");
19484 case RISCV::ReadCounterWide:
19485 assert(!Subtarget.is64Bit() &&
19486 "ReadCounterWide is only to be used on riscv32");
19487 return emitReadCounterWidePseudo(MI, BB);
19488 case RISCV::Select_GPR_Using_CC_GPR:
19489 case RISCV::Select_GPR_Using_CC_Imm:
19490 case RISCV::Select_FPR16_Using_CC_GPR:
19491 case RISCV::Select_FPR16INX_Using_CC_GPR:
19492 case RISCV::Select_FPR32_Using_CC_GPR:
19493 case RISCV::Select_FPR32INX_Using_CC_GPR:
19494 case RISCV::Select_FPR64_Using_CC_GPR:
19495 case RISCV::Select_FPR64INX_Using_CC_GPR:
19496 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19497 return emitSelectPseudo(MI, BB, Subtarget);
19498 case RISCV::BuildPairF64Pseudo:
19499 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19500 case RISCV::SplitF64Pseudo:
19501 return emitSplitF64Pseudo(MI, BB, Subtarget);
19502 case RISCV::PseudoQuietFLE_H:
19503 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19504 case RISCV::PseudoQuietFLE_H_INX:
19505 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19506 case RISCV::PseudoQuietFLT_H:
19507 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19508 case RISCV::PseudoQuietFLT_H_INX:
19509 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19510 case RISCV::PseudoQuietFLE_S:
19511 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19512 case RISCV::PseudoQuietFLE_S_INX:
19513 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19514 case RISCV::PseudoQuietFLT_S:
19515 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19516 case RISCV::PseudoQuietFLT_S_INX:
19517 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19518 case RISCV::PseudoQuietFLE_D:
19519 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19520 case RISCV::PseudoQuietFLE_D_INX:
19521 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19522 case RISCV::PseudoQuietFLE_D_IN32X:
19523 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19524 Subtarget);
19525 case RISCV::PseudoQuietFLT_D:
19526 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19527 case RISCV::PseudoQuietFLT_D_INX:
19528 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19529 case RISCV::PseudoQuietFLT_D_IN32X:
19530 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19531 Subtarget);
19532
19533 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19534 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19535 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19536 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19537 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19538 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19539 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19540 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19541 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19542 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19543 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19544 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19545 case RISCV::PseudoFROUND_H:
19546 case RISCV::PseudoFROUND_H_INX:
19547 case RISCV::PseudoFROUND_S:
19548 case RISCV::PseudoFROUND_S_INX:
19549 case RISCV::PseudoFROUND_D:
19550 case RISCV::PseudoFROUND_D_INX:
19551 case RISCV::PseudoFROUND_D_IN32X:
19552 return emitFROUND(MI, BB, Subtarget);
19553 case TargetOpcode::STATEPOINT:
19554 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19555 // while jal call instruction (where statepoint will be lowered at the end)
19556 // has implicit def. This def is early-clobber as it will be set at
19557 // the moment of the call and earlier than any use is read.
19558 // Add this implicit dead def here as a workaround.
19559 MI.addOperand(*MI.getMF(),
19561 RISCV::X1, /*isDef*/ true,
19562 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19563 /*isUndef*/ false, /*isEarlyClobber*/ true));
19564 [[fallthrough]];
19565 case TargetOpcode::STACKMAP:
19566 case TargetOpcode::PATCHPOINT:
19567 if (!Subtarget.is64Bit())
19568 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19569 "supported on 64-bit targets");
19570 return emitPatchPoint(MI, BB);
19571 }
19572}
19573
19575 SDNode *Node) const {
19576 // Add FRM dependency to any instructions with dynamic rounding mode.
19577 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19578 if (Idx < 0) {
19579 // Vector pseudos have FRM index indicated by TSFlags.
19580 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19581 if (Idx < 0)
19582 return;
19583 }
19584 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19585 return;
19586 // If the instruction already reads FRM, don't add another read.
19587 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19588 return;
19589 MI.addOperand(
19590 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19591}
19592
19593void RISCVTargetLowering::analyzeInputArgs(
19594 MachineFunction &MF, CCState &CCInfo,
19595 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19596 RISCVCCAssignFn Fn) const {
19597 unsigned NumArgs = Ins.size();
19599
19600 for (unsigned i = 0; i != NumArgs; ++i) {
19601 MVT ArgVT = Ins[i].VT;
19602 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19603
19604 Type *ArgTy = nullptr;
19605 if (IsRet)
19606 ArgTy = FType->getReturnType();
19607 else if (Ins[i].isOrigArg())
19608 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19609
19610 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19611 /*IsFixed=*/true, IsRet, ArgTy)) {
19612 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19613 << ArgVT << '\n');
19614 llvm_unreachable(nullptr);
19615 }
19616 }
19617}
19618
19619void RISCVTargetLowering::analyzeOutputArgs(
19620 MachineFunction &MF, CCState &CCInfo,
19621 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19622 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19623 unsigned NumArgs = Outs.size();
19624
19625 for (unsigned i = 0; i != NumArgs; i++) {
19626 MVT ArgVT = Outs[i].VT;
19627 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19628 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19629
19630 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19631 Outs[i].IsFixed, IsRet, OrigTy)) {
19632 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19633 << ArgVT << "\n");
19634 llvm_unreachable(nullptr);
19635 }
19636 }
19637}
19638
19639// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19640// values.
19642 const CCValAssign &VA, const SDLoc &DL,
19643 const RISCVSubtarget &Subtarget) {
19644 if (VA.needsCustom()) {
19645 if (VA.getLocVT().isInteger() &&
19646 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19647 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19648 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19649 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19651 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19652 llvm_unreachable("Unexpected Custom handling.");
19653 }
19654
19655 switch (VA.getLocInfo()) {
19656 default:
19657 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19658 case CCValAssign::Full:
19659 break;
19660 case CCValAssign::BCvt:
19661 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19662 break;
19663 }
19664 return Val;
19665}
19666
19667// The caller is responsible for loading the full value if the argument is
19668// passed with CCValAssign::Indirect.
19670 const CCValAssign &VA, const SDLoc &DL,
19671 const ISD::InputArg &In,
19672 const RISCVTargetLowering &TLI) {
19675 EVT LocVT = VA.getLocVT();
19676 SDValue Val;
19677 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19678 Register VReg = RegInfo.createVirtualRegister(RC);
19679 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19680 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19681
19682 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19683 if (In.isOrigArg()) {
19684 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19685 if (OrigArg->getType()->isIntegerTy()) {
19686 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19687 // An input zero extended from i31 can also be considered sign extended.
19688 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19689 (BitWidth < 32 && In.Flags.isZExt())) {
19691 RVFI->addSExt32Register(VReg);
19692 }
19693 }
19694 }
19695
19697 return Val;
19698
19699 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19700}
19701
19703 const CCValAssign &VA, const SDLoc &DL,
19704 const RISCVSubtarget &Subtarget) {
19705 EVT LocVT = VA.getLocVT();
19706
19707 if (VA.needsCustom()) {
19708 if (LocVT.isInteger() &&
19709 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19710 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19711 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19712 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19713 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19714 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19715 llvm_unreachable("Unexpected Custom handling.");
19716 }
19717
19718 switch (VA.getLocInfo()) {
19719 default:
19720 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19721 case CCValAssign::Full:
19722 break;
19723 case CCValAssign::BCvt:
19724 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19725 break;
19726 }
19727 return Val;
19728}
19729
19730// The caller is responsible for loading the full value if the argument is
19731// passed with CCValAssign::Indirect.
19733 const CCValAssign &VA, const SDLoc &DL) {
19735 MachineFrameInfo &MFI = MF.getFrameInfo();
19736 EVT LocVT = VA.getLocVT();
19737 EVT ValVT = VA.getValVT();
19739 if (VA.getLocInfo() == CCValAssign::Indirect) {
19740 // When the value is a scalable vector, we save the pointer which points to
19741 // the scalable vector value in the stack. The ValVT will be the pointer
19742 // type, instead of the scalable vector type.
19743 ValVT = LocVT;
19744 }
19745 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19746 /*IsImmutable=*/true);
19747 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19748 SDValue Val;
19749
19751 switch (VA.getLocInfo()) {
19752 default:
19753 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19754 case CCValAssign::Full:
19756 case CCValAssign::BCvt:
19757 break;
19758 }
19759 Val = DAG.getExtLoad(
19760 ExtType, DL, LocVT, Chain, FIN,
19762 return Val;
19763}
19764
19766 const CCValAssign &VA,
19767 const CCValAssign &HiVA,
19768 const SDLoc &DL) {
19769 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19770 "Unexpected VA");
19772 MachineFrameInfo &MFI = MF.getFrameInfo();
19774
19775 assert(VA.isRegLoc() && "Expected register VA assignment");
19776
19777 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19778 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19779 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19780 SDValue Hi;
19781 if (HiVA.isMemLoc()) {
19782 // Second half of f64 is passed on the stack.
19783 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19784 /*IsImmutable=*/true);
19785 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19786 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19788 } else {
19789 // Second half of f64 is passed in another GPR.
19790 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19791 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19792 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19793 }
19794 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19795}
19796
19797// Transform physical registers into virtual registers.
19799 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19800 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19801 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19802
19804
19805 switch (CallConv) {
19806 default:
19807 report_fatal_error("Unsupported calling convention");
19808 case CallingConv::C:
19809 case CallingConv::Fast:
19811 case CallingConv::GRAAL:
19813 break;
19814 case CallingConv::GHC:
19815 if (Subtarget.hasStdExtE())
19816 report_fatal_error("GHC calling convention is not supported on RVE!");
19817 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19818 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19819 "(Zdinx/D) instruction set extensions");
19820 }
19821
19822 const Function &Func = MF.getFunction();
19823 if (Func.hasFnAttribute("interrupt")) {
19824 if (!Func.arg_empty())
19826 "Functions with the interrupt attribute cannot have arguments!");
19827
19828 StringRef Kind =
19829 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19830
19831 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19833 "Function interrupt attribute argument not supported!");
19834 }
19835
19836 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19837 MVT XLenVT = Subtarget.getXLenVT();
19838 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19839 // Used with vargs to acumulate store chains.
19840 std::vector<SDValue> OutChains;
19841
19842 // Assign locations to all of the incoming arguments.
19844 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19845
19846 if (CallConv == CallingConv::GHC)
19848 else
19849 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19851 : CC_RISCV);
19852
19853 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19854 CCValAssign &VA = ArgLocs[i];
19855 SDValue ArgValue;
19856 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19857 // case.
19858 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19859 assert(VA.needsCustom());
19860 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19861 } else if (VA.isRegLoc())
19862 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19863 else
19864 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19865
19866 if (VA.getLocInfo() == CCValAssign::Indirect) {
19867 // If the original argument was split and passed by reference (e.g. i128
19868 // on RV32), we need to load all parts of it here (using the same
19869 // address). Vectors may be partly split to registers and partly to the
19870 // stack, in which case the base address is partly offset and subsequent
19871 // stores are relative to that.
19872 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19874 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19875 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19876 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19877 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19878 CCValAssign &PartVA = ArgLocs[i + 1];
19879 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19880 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19881 if (PartVA.getValVT().isScalableVector())
19882 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19883 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19884 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19886 ++i;
19887 ++InsIdx;
19888 }
19889 continue;
19890 }
19891 InVals.push_back(ArgValue);
19892 }
19893
19894 if (any_of(ArgLocs,
19895 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19896 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19897
19898 if (IsVarArg) {
19899 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19900 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19901 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19902 MachineFrameInfo &MFI = MF.getFrameInfo();
19903 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19905
19906 // Size of the vararg save area. For now, the varargs save area is either
19907 // zero or large enough to hold a0-a7.
19908 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19909 int FI;
19910
19911 // If all registers are allocated, then all varargs must be passed on the
19912 // stack and we don't need to save any argregs.
19913 if (VarArgsSaveSize == 0) {
19914 int VaArgOffset = CCInfo.getStackSize();
19915 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19916 } else {
19917 int VaArgOffset = -VarArgsSaveSize;
19918 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19919
19920 // If saving an odd number of registers then create an extra stack slot to
19921 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19922 // offsets to even-numbered registered remain 2*XLEN-aligned.
19923 if (Idx % 2) {
19925 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19926 VarArgsSaveSize += XLenInBytes;
19927 }
19928
19929 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19930
19931 // Copy the integer registers that may have been used for passing varargs
19932 // to the vararg save area.
19933 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19934 const Register Reg = RegInfo.createVirtualRegister(RC);
19935 RegInfo.addLiveIn(ArgRegs[I], Reg);
19936 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19937 SDValue Store = DAG.getStore(
19938 Chain, DL, ArgValue, FIN,
19939 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19940 OutChains.push_back(Store);
19941 FIN =
19942 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19943 }
19944 }
19945
19946 // Record the frame index of the first variable argument
19947 // which is a value necessary to VASTART.
19948 RVFI->setVarArgsFrameIndex(FI);
19949 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19950 }
19951
19952 // All stores are grouped in one node to allow the matching between
19953 // the size of Ins and InVals. This only happens for vararg functions.
19954 if (!OutChains.empty()) {
19955 OutChains.push_back(Chain);
19956 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19957 }
19958
19959 return Chain;
19960}
19961
19962/// isEligibleForTailCallOptimization - Check whether the call is eligible
19963/// for tail call optimization.
19964/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19965bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19966 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19967 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19968
19969 auto CalleeCC = CLI.CallConv;
19970 auto &Outs = CLI.Outs;
19971 auto &Caller = MF.getFunction();
19972 auto CallerCC = Caller.getCallingConv();
19973
19974 // Exception-handling functions need a special set of instructions to
19975 // indicate a return to the hardware. Tail-calling another function would
19976 // probably break this.
19977 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19978 // should be expanded as new function attributes are introduced.
19979 if (Caller.hasFnAttribute("interrupt"))
19980 return false;
19981
19982 // Do not tail call opt if the stack is used to pass parameters.
19983 if (CCInfo.getStackSize() != 0)
19984 return false;
19985
19986 // Do not tail call opt if any parameters need to be passed indirectly.
19987 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19988 // passed indirectly. So the address of the value will be passed in a
19989 // register, or if not available, then the address is put on the stack. In
19990 // order to pass indirectly, space on the stack often needs to be allocated
19991 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19992 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19993 // are passed CCValAssign::Indirect.
19994 for (auto &VA : ArgLocs)
19995 if (VA.getLocInfo() == CCValAssign::Indirect)
19996 return false;
19997
19998 // Do not tail call opt if either caller or callee uses struct return
19999 // semantics.
20000 auto IsCallerStructRet = Caller.hasStructRetAttr();
20001 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20002 if (IsCallerStructRet || IsCalleeStructRet)
20003 return false;
20004
20005 // The callee has to preserve all registers the caller needs to preserve.
20006 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20007 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20008 if (CalleeCC != CallerCC) {
20009 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20010 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20011 return false;
20012 }
20013
20014 // Byval parameters hand the function a pointer directly into the stack area
20015 // we want to reuse during a tail call. Working around this *is* possible
20016 // but less efficient and uglier in LowerCall.
20017 for (auto &Arg : Outs)
20018 if (Arg.Flags.isByVal())
20019 return false;
20020
20021 return true;
20022}
20023
20025 return DAG.getDataLayout().getPrefTypeAlign(
20026 VT.getTypeForEVT(*DAG.getContext()));
20027}
20028
20029// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20030// and output parameter nodes.
20032 SmallVectorImpl<SDValue> &InVals) const {
20033 SelectionDAG &DAG = CLI.DAG;
20034 SDLoc &DL = CLI.DL;
20036 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20038 SDValue Chain = CLI.Chain;
20039 SDValue Callee = CLI.Callee;
20040 bool &IsTailCall = CLI.IsTailCall;
20041 CallingConv::ID CallConv = CLI.CallConv;
20042 bool IsVarArg = CLI.IsVarArg;
20043 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20044 MVT XLenVT = Subtarget.getXLenVT();
20045
20047
20048 // Analyze the operands of the call, assigning locations to each operand.
20050 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20051
20052 if (CallConv == CallingConv::GHC) {
20053 if (Subtarget.hasStdExtE())
20054 report_fatal_error("GHC calling convention is not supported on RVE!");
20055 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20056 } else
20057 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20059 : CC_RISCV);
20060
20061 // Check if it's really possible to do a tail call.
20062 if (IsTailCall)
20063 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20064
20065 if (IsTailCall)
20066 ++NumTailCalls;
20067 else if (CLI.CB && CLI.CB->isMustTailCall())
20068 report_fatal_error("failed to perform tail call elimination on a call "
20069 "site marked musttail");
20070
20071 // Get a count of how many bytes are to be pushed on the stack.
20072 unsigned NumBytes = ArgCCInfo.getStackSize();
20073
20074 // Create local copies for byval args
20075 SmallVector<SDValue, 8> ByValArgs;
20076 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20077 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20078 if (!Flags.isByVal())
20079 continue;
20080
20081 SDValue Arg = OutVals[i];
20082 unsigned Size = Flags.getByValSize();
20083 Align Alignment = Flags.getNonZeroByValAlign();
20084
20085 int FI =
20086 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20087 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20088 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20089
20090 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20091 /*IsVolatile=*/false,
20092 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20094 ByValArgs.push_back(FIPtr);
20095 }
20096
20097 if (!IsTailCall)
20098 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20099
20100 // Copy argument values to their designated locations.
20102 SmallVector<SDValue, 8> MemOpChains;
20103 SDValue StackPtr;
20104 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20105 ++i, ++OutIdx) {
20106 CCValAssign &VA = ArgLocs[i];
20107 SDValue ArgValue = OutVals[OutIdx];
20108 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20109
20110 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20111 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20112 assert(VA.isRegLoc() && "Expected register VA assignment");
20113 assert(VA.needsCustom());
20114 SDValue SplitF64 = DAG.getNode(
20115 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20116 SDValue Lo = SplitF64.getValue(0);
20117 SDValue Hi = SplitF64.getValue(1);
20118
20119 Register RegLo = VA.getLocReg();
20120 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20121
20122 // Get the CCValAssign for the Hi part.
20123 CCValAssign &HiVA = ArgLocs[++i];
20124
20125 if (HiVA.isMemLoc()) {
20126 // Second half of f64 is passed on the stack.
20127 if (!StackPtr.getNode())
20128 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20130 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20131 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20132 // Emit the store.
20133 MemOpChains.push_back(DAG.getStore(
20134 Chain, DL, Hi, Address,
20136 } else {
20137 // Second half of f64 is passed in another GPR.
20138 Register RegHigh = HiVA.getLocReg();
20139 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20140 }
20141 continue;
20142 }
20143
20144 // Promote the value if needed.
20145 // For now, only handle fully promoted and indirect arguments.
20146 if (VA.getLocInfo() == CCValAssign::Indirect) {
20147 // Store the argument in a stack slot and pass its address.
20148 Align StackAlign =
20149 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20150 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20151 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20152 // If the original argument was split (e.g. i128), we need
20153 // to store the required parts of it here (and pass just one address).
20154 // Vectors may be partly split to registers and partly to the stack, in
20155 // which case the base address is partly offset and subsequent stores are
20156 // relative to that.
20157 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20158 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20159 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20160 // Calculate the total size to store. We don't have access to what we're
20161 // actually storing other than performing the loop and collecting the
20162 // info.
20164 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20165 SDValue PartValue = OutVals[OutIdx + 1];
20166 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20167 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20168 EVT PartVT = PartValue.getValueType();
20169 if (PartVT.isScalableVector())
20170 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20171 StoredSize += PartVT.getStoreSize();
20172 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20173 Parts.push_back(std::make_pair(PartValue, Offset));
20174 ++i;
20175 ++OutIdx;
20176 }
20177 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20178 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20179 MemOpChains.push_back(
20180 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20182 for (const auto &Part : Parts) {
20183 SDValue PartValue = Part.first;
20184 SDValue PartOffset = Part.second;
20186 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20187 MemOpChains.push_back(
20188 DAG.getStore(Chain, DL, PartValue, Address,
20190 }
20191 ArgValue = SpillSlot;
20192 } else {
20193 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20194 }
20195
20196 // Use local copy if it is a byval arg.
20197 if (Flags.isByVal())
20198 ArgValue = ByValArgs[j++];
20199
20200 if (VA.isRegLoc()) {
20201 // Queue up the argument copies and emit them at the end.
20202 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20203 } else {
20204 assert(VA.isMemLoc() && "Argument not register or memory");
20205 assert(!IsTailCall && "Tail call not allowed if stack is used "
20206 "for passing parameters");
20207
20208 // Work out the address of the stack slot.
20209 if (!StackPtr.getNode())
20210 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20212 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20214
20215 // Emit the store.
20216 MemOpChains.push_back(
20217 DAG.getStore(Chain, DL, ArgValue, Address,
20219 }
20220 }
20221
20222 // Join the stores, which are independent of one another.
20223 if (!MemOpChains.empty())
20224 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20225
20226 SDValue Glue;
20227
20228 // Build a sequence of copy-to-reg nodes, chained and glued together.
20229 for (auto &Reg : RegsToPass) {
20230 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20231 Glue = Chain.getValue(1);
20232 }
20233
20234 // Validate that none of the argument registers have been marked as
20235 // reserved, if so report an error. Do the same for the return address if this
20236 // is not a tailcall.
20237 validateCCReservedRegs(RegsToPass, MF);
20238 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20240 MF.getFunction(),
20241 "Return address register required, but has been reserved."});
20242
20243 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20244 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20245 // split it and then direct call can be matched by PseudoCALL.
20246 bool CalleeIsLargeExternalSymbol = false;
20248 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20249 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20250 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20251 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20252 CalleeIsLargeExternalSymbol = true;
20253 }
20254 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20255 const GlobalValue *GV = S->getGlobal();
20256 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20257 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20258 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20259 }
20260
20261 // The first call operand is the chain and the second is the target address.
20263 Ops.push_back(Chain);
20264 Ops.push_back(Callee);
20265
20266 // Add argument registers to the end of the list so that they are
20267 // known live into the call.
20268 for (auto &Reg : RegsToPass)
20269 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20270
20271 if (!IsTailCall) {
20272 // Add a register mask operand representing the call-preserved registers.
20273 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20274 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20275 assert(Mask && "Missing call preserved mask for calling convention");
20276 Ops.push_back(DAG.getRegisterMask(Mask));
20277 }
20278
20279 // Glue the call to the argument copies, if any.
20280 if (Glue.getNode())
20281 Ops.push_back(Glue);
20282
20283 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20284 "Unexpected CFI type for a direct call");
20285
20286 // Emit the call.
20287 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20288
20289 // Use software guarded branch for large code model non-indirect calls
20290 // Tail call to external symbol will have a null CLI.CB and we need another
20291 // way to determine the callsite type
20292 bool NeedSWGuarded = false;
20294 Subtarget.hasStdExtZicfilp() &&
20295 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20296 NeedSWGuarded = true;
20297
20298 if (IsTailCall) {
20300 unsigned CallOpc =
20301 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20302 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20303 if (CLI.CFIType)
20304 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20305 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20306 return Ret;
20307 }
20308
20309 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20310 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20311 if (CLI.CFIType)
20312 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20313 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20314 Glue = Chain.getValue(1);
20315
20316 // Mark the end of the call, which is glued to the call itself.
20317 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20318 Glue = Chain.getValue(1);
20319
20320 // Assign locations to each value returned by this call.
20322 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20323 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20324
20325 // Copy all of the result registers out of their specified physreg.
20326 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20327 auto &VA = RVLocs[i];
20328 // Copy the value out
20329 SDValue RetValue =
20330 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20331 // Glue the RetValue to the end of the call sequence
20332 Chain = RetValue.getValue(1);
20333 Glue = RetValue.getValue(2);
20334
20335 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20336 assert(VA.needsCustom());
20337 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20338 MVT::i32, Glue);
20339 Chain = RetValue2.getValue(1);
20340 Glue = RetValue2.getValue(2);
20341 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20342 RetValue2);
20343 } else
20344 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20345
20346 InVals.push_back(RetValue);
20347 }
20348
20349 return Chain;
20350}
20351
20353 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20354 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20356 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20357
20358 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20359 MVT VT = Outs[i].VT;
20360 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20361 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20362 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20363 return false;
20364 }
20365 return true;
20366}
20367
20368SDValue
20370 bool IsVarArg,
20372 const SmallVectorImpl<SDValue> &OutVals,
20373 const SDLoc &DL, SelectionDAG &DAG) const {
20375 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20376
20377 // Stores the assignment of the return value to a location.
20379
20380 // Info about the registers and stack slot.
20381 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20382 *DAG.getContext());
20383
20384 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20385 nullptr, CC_RISCV);
20386
20387 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20388 report_fatal_error("GHC functions return void only");
20389
20390 SDValue Glue;
20391 SmallVector<SDValue, 4> RetOps(1, Chain);
20392
20393 // Copy the result values into the output registers.
20394 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20395 SDValue Val = OutVals[OutIdx];
20396 CCValAssign &VA = RVLocs[i];
20397 assert(VA.isRegLoc() && "Can only return in registers!");
20398
20399 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20400 // Handle returning f64 on RV32D with a soft float ABI.
20401 assert(VA.isRegLoc() && "Expected return via registers");
20402 assert(VA.needsCustom());
20403 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20404 DAG.getVTList(MVT::i32, MVT::i32), Val);
20405 SDValue Lo = SplitF64.getValue(0);
20406 SDValue Hi = SplitF64.getValue(1);
20407 Register RegLo = VA.getLocReg();
20408 Register RegHi = RVLocs[++i].getLocReg();
20409
20410 if (STI.isRegisterReservedByUser(RegLo) ||
20411 STI.isRegisterReservedByUser(RegHi))
20413 MF.getFunction(),
20414 "Return value register required, but has been reserved."});
20415
20416 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20417 Glue = Chain.getValue(1);
20418 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20419 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20420 Glue = Chain.getValue(1);
20421 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20422 } else {
20423 // Handle a 'normal' return.
20424 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20425 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20426
20427 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20429 MF.getFunction(),
20430 "Return value register required, but has been reserved."});
20431
20432 // Guarantee that all emitted copies are stuck together.
20433 Glue = Chain.getValue(1);
20434 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20435 }
20436 }
20437
20438 RetOps[0] = Chain; // Update chain.
20439
20440 // Add the glue node if we have it.
20441 if (Glue.getNode()) {
20442 RetOps.push_back(Glue);
20443 }
20444
20445 if (any_of(RVLocs,
20446 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20447 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20448
20449 unsigned RetOpc = RISCVISD::RET_GLUE;
20450 // Interrupt service routines use different return instructions.
20451 const Function &Func = DAG.getMachineFunction().getFunction();
20452 if (Func.hasFnAttribute("interrupt")) {
20453 if (!Func.getReturnType()->isVoidTy())
20455 "Functions with the interrupt attribute must have void return type!");
20456
20458 StringRef Kind =
20459 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20460
20461 if (Kind == "supervisor")
20462 RetOpc = RISCVISD::SRET_GLUE;
20463 else
20464 RetOpc = RISCVISD::MRET_GLUE;
20465 }
20466
20467 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20468}
20469
20470void RISCVTargetLowering::validateCCReservedRegs(
20471 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20472 MachineFunction &MF) const {
20473 const Function &F = MF.getFunction();
20474 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20475
20476 if (llvm::any_of(Regs, [&STI](auto Reg) {
20477 return STI.isRegisterReservedByUser(Reg.first);
20478 }))
20479 F.getContext().diagnose(DiagnosticInfoUnsupported{
20480 F, "Argument register required, but has been reserved."});
20481}
20482
20483// Check if the result of the node is only used as a return value, as
20484// otherwise we can't perform a tail-call.
20486 if (N->getNumValues() != 1)
20487 return false;
20488 if (!N->hasNUsesOfValue(1, 0))
20489 return false;
20490
20491 SDNode *Copy = *N->user_begin();
20492
20493 if (Copy->getOpcode() == ISD::BITCAST) {
20494 return isUsedByReturnOnly(Copy, Chain);
20495 }
20496
20497 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20498 // with soft float ABIs.
20499 if (Copy->getOpcode() != ISD::CopyToReg) {
20500 return false;
20501 }
20502
20503 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20504 // isn't safe to perform a tail call.
20505 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20506 return false;
20507
20508 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20509 bool HasRet = false;
20510 for (SDNode *Node : Copy->users()) {
20511 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20512 return false;
20513 HasRet = true;
20514 }
20515 if (!HasRet)
20516 return false;
20517
20518 Chain = Copy->getOperand(0);
20519 return true;
20520}
20521
20523 return CI->isTailCall();
20524}
20525
20526const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20527#define NODE_NAME_CASE(NODE) \
20528 case RISCVISD::NODE: \
20529 return "RISCVISD::" #NODE;
20530 // clang-format off
20531 switch ((RISCVISD::NodeType)Opcode) {
20533 break;
20534 NODE_NAME_CASE(RET_GLUE)
20535 NODE_NAME_CASE(SRET_GLUE)
20536 NODE_NAME_CASE(MRET_GLUE)
20537 NODE_NAME_CASE(CALL)
20538 NODE_NAME_CASE(TAIL)
20539 NODE_NAME_CASE(SELECT_CC)
20540 NODE_NAME_CASE(BR_CC)
20541 NODE_NAME_CASE(BuildGPRPair)
20542 NODE_NAME_CASE(SplitGPRPair)
20543 NODE_NAME_CASE(BuildPairF64)
20544 NODE_NAME_CASE(SplitF64)
20545 NODE_NAME_CASE(ADD_LO)
20546 NODE_NAME_CASE(HI)
20547 NODE_NAME_CASE(LLA)
20548 NODE_NAME_CASE(ADD_TPREL)
20549 NODE_NAME_CASE(MULHSU)
20550 NODE_NAME_CASE(SHL_ADD)
20551 NODE_NAME_CASE(SLLW)
20552 NODE_NAME_CASE(SRAW)
20553 NODE_NAME_CASE(SRLW)
20554 NODE_NAME_CASE(DIVW)
20555 NODE_NAME_CASE(DIVUW)
20556 NODE_NAME_CASE(REMUW)
20557 NODE_NAME_CASE(ROLW)
20558 NODE_NAME_CASE(RORW)
20559 NODE_NAME_CASE(CLZW)
20560 NODE_NAME_CASE(CTZW)
20561 NODE_NAME_CASE(ABSW)
20562 NODE_NAME_CASE(FMV_H_X)
20563 NODE_NAME_CASE(FMV_X_ANYEXTH)
20564 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20565 NODE_NAME_CASE(FMV_W_X_RV64)
20566 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20567 NODE_NAME_CASE(FCVT_X)
20568 NODE_NAME_CASE(FCVT_XU)
20569 NODE_NAME_CASE(FCVT_W_RV64)
20570 NODE_NAME_CASE(FCVT_WU_RV64)
20571 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20572 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20573 NODE_NAME_CASE(FROUND)
20574 NODE_NAME_CASE(FCLASS)
20575 NODE_NAME_CASE(FSGNJX)
20576 NODE_NAME_CASE(FMAX)
20577 NODE_NAME_CASE(FMIN)
20578 NODE_NAME_CASE(FLI)
20579 NODE_NAME_CASE(READ_COUNTER_WIDE)
20580 NODE_NAME_CASE(BREV8)
20581 NODE_NAME_CASE(ORC_B)
20582 NODE_NAME_CASE(ZIP)
20583 NODE_NAME_CASE(UNZIP)
20584 NODE_NAME_CASE(CLMUL)
20585 NODE_NAME_CASE(CLMULH)
20586 NODE_NAME_CASE(CLMULR)
20587 NODE_NAME_CASE(MOPR)
20588 NODE_NAME_CASE(MOPRR)
20589 NODE_NAME_CASE(SHA256SIG0)
20590 NODE_NAME_CASE(SHA256SIG1)
20591 NODE_NAME_CASE(SHA256SUM0)
20592 NODE_NAME_CASE(SHA256SUM1)
20593 NODE_NAME_CASE(SM4KS)
20594 NODE_NAME_CASE(SM4ED)
20595 NODE_NAME_CASE(SM3P0)
20596 NODE_NAME_CASE(SM3P1)
20597 NODE_NAME_CASE(TH_LWD)
20598 NODE_NAME_CASE(TH_LWUD)
20599 NODE_NAME_CASE(TH_LDD)
20600 NODE_NAME_CASE(TH_SWD)
20601 NODE_NAME_CASE(TH_SDD)
20602 NODE_NAME_CASE(VMV_V_V_VL)
20603 NODE_NAME_CASE(VMV_V_X_VL)
20604 NODE_NAME_CASE(VFMV_V_F_VL)
20605 NODE_NAME_CASE(VMV_X_S)
20606 NODE_NAME_CASE(VMV_S_X_VL)
20607 NODE_NAME_CASE(VFMV_S_F_VL)
20608 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20609 NODE_NAME_CASE(READ_VLENB)
20610 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20611 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20612 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20613 NODE_NAME_CASE(VSLIDEUP_VL)
20614 NODE_NAME_CASE(VSLIDE1UP_VL)
20615 NODE_NAME_CASE(VSLIDEDOWN_VL)
20616 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20617 NODE_NAME_CASE(VFSLIDE1UP_VL)
20618 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20619 NODE_NAME_CASE(VID_VL)
20620 NODE_NAME_CASE(VFNCVT_ROD_VL)
20621 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20622 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20623 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20624 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20625 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20626 NODE_NAME_CASE(VECREDUCE_AND_VL)
20627 NODE_NAME_CASE(VECREDUCE_OR_VL)
20628 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20629 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20630 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20631 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20632 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20633 NODE_NAME_CASE(ADD_VL)
20634 NODE_NAME_CASE(AND_VL)
20635 NODE_NAME_CASE(MUL_VL)
20636 NODE_NAME_CASE(OR_VL)
20637 NODE_NAME_CASE(SDIV_VL)
20638 NODE_NAME_CASE(SHL_VL)
20639 NODE_NAME_CASE(SREM_VL)
20640 NODE_NAME_CASE(SRA_VL)
20641 NODE_NAME_CASE(SRL_VL)
20642 NODE_NAME_CASE(ROTL_VL)
20643 NODE_NAME_CASE(ROTR_VL)
20644 NODE_NAME_CASE(SUB_VL)
20645 NODE_NAME_CASE(UDIV_VL)
20646 NODE_NAME_CASE(UREM_VL)
20647 NODE_NAME_CASE(XOR_VL)
20648 NODE_NAME_CASE(AVGFLOORS_VL)
20649 NODE_NAME_CASE(AVGFLOORU_VL)
20650 NODE_NAME_CASE(AVGCEILS_VL)
20651 NODE_NAME_CASE(AVGCEILU_VL)
20652 NODE_NAME_CASE(SADDSAT_VL)
20653 NODE_NAME_CASE(UADDSAT_VL)
20654 NODE_NAME_CASE(SSUBSAT_VL)
20655 NODE_NAME_CASE(USUBSAT_VL)
20656 NODE_NAME_CASE(FADD_VL)
20657 NODE_NAME_CASE(FSUB_VL)
20658 NODE_NAME_CASE(FMUL_VL)
20659 NODE_NAME_CASE(FDIV_VL)
20660 NODE_NAME_CASE(FNEG_VL)
20661 NODE_NAME_CASE(FABS_VL)
20662 NODE_NAME_CASE(FSQRT_VL)
20663 NODE_NAME_CASE(FCLASS_VL)
20664 NODE_NAME_CASE(VFMADD_VL)
20665 NODE_NAME_CASE(VFNMADD_VL)
20666 NODE_NAME_CASE(VFMSUB_VL)
20667 NODE_NAME_CASE(VFNMSUB_VL)
20668 NODE_NAME_CASE(VFWMADD_VL)
20669 NODE_NAME_CASE(VFWNMADD_VL)
20670 NODE_NAME_CASE(VFWMSUB_VL)
20671 NODE_NAME_CASE(VFWNMSUB_VL)
20672 NODE_NAME_CASE(FCOPYSIGN_VL)
20673 NODE_NAME_CASE(SMIN_VL)
20674 NODE_NAME_CASE(SMAX_VL)
20675 NODE_NAME_CASE(UMIN_VL)
20676 NODE_NAME_CASE(UMAX_VL)
20677 NODE_NAME_CASE(BITREVERSE_VL)
20678 NODE_NAME_CASE(BSWAP_VL)
20679 NODE_NAME_CASE(CTLZ_VL)
20680 NODE_NAME_CASE(CTTZ_VL)
20681 NODE_NAME_CASE(CTPOP_VL)
20682 NODE_NAME_CASE(VFMIN_VL)
20683 NODE_NAME_CASE(VFMAX_VL)
20684 NODE_NAME_CASE(MULHS_VL)
20685 NODE_NAME_CASE(MULHU_VL)
20686 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20687 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20688 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20689 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20690 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20691 NODE_NAME_CASE(SINT_TO_FP_VL)
20692 NODE_NAME_CASE(UINT_TO_FP_VL)
20693 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20694 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20695 NODE_NAME_CASE(FP_EXTEND_VL)
20696 NODE_NAME_CASE(FP_ROUND_VL)
20697 NODE_NAME_CASE(STRICT_FADD_VL)
20698 NODE_NAME_CASE(STRICT_FSUB_VL)
20699 NODE_NAME_CASE(STRICT_FMUL_VL)
20700 NODE_NAME_CASE(STRICT_FDIV_VL)
20701 NODE_NAME_CASE(STRICT_FSQRT_VL)
20702 NODE_NAME_CASE(STRICT_VFMADD_VL)
20703 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20704 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20705 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20706 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20707 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20708 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20709 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20710 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20711 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20712 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20713 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20714 NODE_NAME_CASE(STRICT_FSETCC_VL)
20715 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20716 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20717 NODE_NAME_CASE(VWMUL_VL)
20718 NODE_NAME_CASE(VWMULU_VL)
20719 NODE_NAME_CASE(VWMULSU_VL)
20720 NODE_NAME_CASE(VWADD_VL)
20721 NODE_NAME_CASE(VWADDU_VL)
20722 NODE_NAME_CASE(VWSUB_VL)
20723 NODE_NAME_CASE(VWSUBU_VL)
20724 NODE_NAME_CASE(VWADD_W_VL)
20725 NODE_NAME_CASE(VWADDU_W_VL)
20726 NODE_NAME_CASE(VWSUB_W_VL)
20727 NODE_NAME_CASE(VWSUBU_W_VL)
20728 NODE_NAME_CASE(VWSLL_VL)
20729 NODE_NAME_CASE(VFWMUL_VL)
20730 NODE_NAME_CASE(VFWADD_VL)
20731 NODE_NAME_CASE(VFWSUB_VL)
20732 NODE_NAME_CASE(VFWADD_W_VL)
20733 NODE_NAME_CASE(VFWSUB_W_VL)
20734 NODE_NAME_CASE(VWMACC_VL)
20735 NODE_NAME_CASE(VWMACCU_VL)
20736 NODE_NAME_CASE(VWMACCSU_VL)
20737 NODE_NAME_CASE(SETCC_VL)
20738 NODE_NAME_CASE(VMERGE_VL)
20739 NODE_NAME_CASE(VMAND_VL)
20740 NODE_NAME_CASE(VMOR_VL)
20741 NODE_NAME_CASE(VMXOR_VL)
20742 NODE_NAME_CASE(VMCLR_VL)
20743 NODE_NAME_CASE(VMSET_VL)
20744 NODE_NAME_CASE(VRGATHER_VX_VL)
20745 NODE_NAME_CASE(VRGATHER_VV_VL)
20746 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20747 NODE_NAME_CASE(VSEXT_VL)
20748 NODE_NAME_CASE(VZEXT_VL)
20749 NODE_NAME_CASE(VCPOP_VL)
20750 NODE_NAME_CASE(VFIRST_VL)
20751 NODE_NAME_CASE(READ_CSR)
20752 NODE_NAME_CASE(WRITE_CSR)
20753 NODE_NAME_CASE(SWAP_CSR)
20754 NODE_NAME_CASE(CZERO_EQZ)
20755 NODE_NAME_CASE(CZERO_NEZ)
20756 NODE_NAME_CASE(SW_GUARDED_BRIND)
20757 NODE_NAME_CASE(SW_GUARDED_CALL)
20758 NODE_NAME_CASE(SW_GUARDED_TAIL)
20759 NODE_NAME_CASE(TUPLE_INSERT)
20760 NODE_NAME_CASE(TUPLE_EXTRACT)
20761 NODE_NAME_CASE(SF_VC_XV_SE)
20762 NODE_NAME_CASE(SF_VC_IV_SE)
20763 NODE_NAME_CASE(SF_VC_VV_SE)
20764 NODE_NAME_CASE(SF_VC_FV_SE)
20765 NODE_NAME_CASE(SF_VC_XVV_SE)
20766 NODE_NAME_CASE(SF_VC_IVV_SE)
20767 NODE_NAME_CASE(SF_VC_VVV_SE)
20768 NODE_NAME_CASE(SF_VC_FVV_SE)
20769 NODE_NAME_CASE(SF_VC_XVW_SE)
20770 NODE_NAME_CASE(SF_VC_IVW_SE)
20771 NODE_NAME_CASE(SF_VC_VVW_SE)
20772 NODE_NAME_CASE(SF_VC_FVW_SE)
20773 NODE_NAME_CASE(SF_VC_V_X_SE)
20774 NODE_NAME_CASE(SF_VC_V_I_SE)
20775 NODE_NAME_CASE(SF_VC_V_XV_SE)
20776 NODE_NAME_CASE(SF_VC_V_IV_SE)
20777 NODE_NAME_CASE(SF_VC_V_VV_SE)
20778 NODE_NAME_CASE(SF_VC_V_FV_SE)
20779 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20780 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20781 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20782 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20783 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20784 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20785 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20786 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20787 }
20788 // clang-format on
20789 return nullptr;
20790#undef NODE_NAME_CASE
20791}
20792
20793/// getConstraintType - Given a constraint letter, return the type of
20794/// constraint it is for this target.
20797 if (Constraint.size() == 1) {
20798 switch (Constraint[0]) {
20799 default:
20800 break;
20801 case 'f':
20802 case 'R':
20803 return C_RegisterClass;
20804 case 'I':
20805 case 'J':
20806 case 'K':
20807 return C_Immediate;
20808 case 'A':
20809 return C_Memory;
20810 case 's':
20811 case 'S': // A symbolic address
20812 return C_Other;
20813 }
20814 } else {
20815 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
20816 return C_RegisterClass;
20817 if (Constraint == "cr" || Constraint == "cf")
20818 return C_RegisterClass;
20819 }
20820 return TargetLowering::getConstraintType(Constraint);
20821}
20822
20823std::pair<unsigned, const TargetRegisterClass *>
20825 StringRef Constraint,
20826 MVT VT) const {
20827 // First, see if this is a constraint that directly corresponds to a RISC-V
20828 // register class.
20829 if (Constraint.size() == 1) {
20830 switch (Constraint[0]) {
20831 case 'r':
20832 // TODO: Support fixed vectors up to XLen for P extension?
20833 if (VT.isVector())
20834 break;
20835 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20836 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20837 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20838 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20839 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20840 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20841 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20842 case 'f':
20843 if (VT == MVT::f16) {
20844 if (Subtarget.hasStdExtZfhmin())
20845 return std::make_pair(0U, &RISCV::FPR16RegClass);
20846 if (Subtarget.hasStdExtZhinxmin())
20847 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20848 } else if (VT == MVT::f32) {
20849 if (Subtarget.hasStdExtF())
20850 return std::make_pair(0U, &RISCV::FPR32RegClass);
20851 if (Subtarget.hasStdExtZfinx())
20852 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20853 } else if (VT == MVT::f64) {
20854 if (Subtarget.hasStdExtD())
20855 return std::make_pair(0U, &RISCV::FPR64RegClass);
20856 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20857 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20858 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20859 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20860 }
20861 break;
20862 case 'R':
20863 if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
20864 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20865 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20866 default:
20867 break;
20868 }
20869 } else if (Constraint == "vr") {
20870 for (const auto *RC :
20871 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
20872 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
20873 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
20874 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
20875 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
20876 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
20877 &RISCV::VRN2M4RegClass}) {
20878 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20879 return std::make_pair(0U, RC);
20880 }
20881 } else if (Constraint == "vd") {
20882 for (const auto *RC :
20883 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
20884 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
20885 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
20886 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
20887 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
20888 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
20889 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
20890 &RISCV::VRN2M4NoV0RegClass}) {
20891 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20892 return std::make_pair(0U, RC);
20893 }
20894 } else if (Constraint == "vm") {
20895 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20896 return std::make_pair(0U, &RISCV::VMV0RegClass);
20897 } else if (Constraint == "cr") {
20898 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20899 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20900 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20901 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20902 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20903 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20904 if (!VT.isVector())
20905 return std::make_pair(0U, &RISCV::GPRCRegClass);
20906 } else if (Constraint == "cf") {
20907 if (VT == MVT::f16) {
20908 if (Subtarget.hasStdExtZfhmin())
20909 return std::make_pair(0U, &RISCV::FPR16CRegClass);
20910 if (Subtarget.hasStdExtZhinxmin())
20911 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20912 } else if (VT == MVT::f32) {
20913 if (Subtarget.hasStdExtF())
20914 return std::make_pair(0U, &RISCV::FPR32CRegClass);
20915 if (Subtarget.hasStdExtZfinx())
20916 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20917 } else if (VT == MVT::f64) {
20918 if (Subtarget.hasStdExtD())
20919 return std::make_pair(0U, &RISCV::FPR64CRegClass);
20920 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20921 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20922 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20923 return std::make_pair(0U, &RISCV::GPRCRegClass);
20924 }
20925 }
20926
20927 // Clang will correctly decode the usage of register name aliases into their
20928 // official names. However, other frontends like `rustc` do not. This allows
20929 // users of these frontends to use the ABI names for registers in LLVM-style
20930 // register constraints.
20931 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20932 .Case("{zero}", RISCV::X0)
20933 .Case("{ra}", RISCV::X1)
20934 .Case("{sp}", RISCV::X2)
20935 .Case("{gp}", RISCV::X3)
20936 .Case("{tp}", RISCV::X4)
20937 .Case("{t0}", RISCV::X5)
20938 .Case("{t1}", RISCV::X6)
20939 .Case("{t2}", RISCV::X7)
20940 .Cases("{s0}", "{fp}", RISCV::X8)
20941 .Case("{s1}", RISCV::X9)
20942 .Case("{a0}", RISCV::X10)
20943 .Case("{a1}", RISCV::X11)
20944 .Case("{a2}", RISCV::X12)
20945 .Case("{a3}", RISCV::X13)
20946 .Case("{a4}", RISCV::X14)
20947 .Case("{a5}", RISCV::X15)
20948 .Case("{a6}", RISCV::X16)
20949 .Case("{a7}", RISCV::X17)
20950 .Case("{s2}", RISCV::X18)
20951 .Case("{s3}", RISCV::X19)
20952 .Case("{s4}", RISCV::X20)
20953 .Case("{s5}", RISCV::X21)
20954 .Case("{s6}", RISCV::X22)
20955 .Case("{s7}", RISCV::X23)
20956 .Case("{s8}", RISCV::X24)
20957 .Case("{s9}", RISCV::X25)
20958 .Case("{s10}", RISCV::X26)
20959 .Case("{s11}", RISCV::X27)
20960 .Case("{t3}", RISCV::X28)
20961 .Case("{t4}", RISCV::X29)
20962 .Case("{t5}", RISCV::X30)
20963 .Case("{t6}", RISCV::X31)
20964 .Default(RISCV::NoRegister);
20965 if (XRegFromAlias != RISCV::NoRegister)
20966 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20967
20968 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20969 // TableGen record rather than the AsmName to choose registers for InlineAsm
20970 // constraints, plus we want to match those names to the widest floating point
20971 // register type available, manually select floating point registers here.
20972 //
20973 // The second case is the ABI name of the register, so that frontends can also
20974 // use the ABI names in register constraint lists.
20975 if (Subtarget.hasStdExtF()) {
20976 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20977 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20978 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20979 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20980 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20981 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20982 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20983 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20984 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20985 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20986 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20987 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20988 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20989 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20990 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20991 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20992 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20993 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20994 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20995 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20996 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20997 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20998 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20999 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21000 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21001 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21002 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21003 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21004 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21005 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21006 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21007 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21008 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21009 .Default(RISCV::NoRegister);
21010 if (FReg != RISCV::NoRegister) {
21011 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21012 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21013 unsigned RegNo = FReg - RISCV::F0_F;
21014 unsigned DReg = RISCV::F0_D + RegNo;
21015 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21016 }
21017 if (VT == MVT::f32 || VT == MVT::Other)
21018 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21019 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21020 unsigned RegNo = FReg - RISCV::F0_F;
21021 unsigned HReg = RISCV::F0_H + RegNo;
21022 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21023 }
21024 }
21025 }
21026
21027 if (Subtarget.hasVInstructions()) {
21028 Register VReg = StringSwitch<Register>(Constraint.lower())
21029 .Case("{v0}", RISCV::V0)
21030 .Case("{v1}", RISCV::V1)
21031 .Case("{v2}", RISCV::V2)
21032 .Case("{v3}", RISCV::V3)
21033 .Case("{v4}", RISCV::V4)
21034 .Case("{v5}", RISCV::V5)
21035 .Case("{v6}", RISCV::V6)
21036 .Case("{v7}", RISCV::V7)
21037 .Case("{v8}", RISCV::V8)
21038 .Case("{v9}", RISCV::V9)
21039 .Case("{v10}", RISCV::V10)
21040 .Case("{v11}", RISCV::V11)
21041 .Case("{v12}", RISCV::V12)
21042 .Case("{v13}", RISCV::V13)
21043 .Case("{v14}", RISCV::V14)
21044 .Case("{v15}", RISCV::V15)
21045 .Case("{v16}", RISCV::V16)
21046 .Case("{v17}", RISCV::V17)
21047 .Case("{v18}", RISCV::V18)
21048 .Case("{v19}", RISCV::V19)
21049 .Case("{v20}", RISCV::V20)
21050 .Case("{v21}", RISCV::V21)
21051 .Case("{v22}", RISCV::V22)
21052 .Case("{v23}", RISCV::V23)
21053 .Case("{v24}", RISCV::V24)
21054 .Case("{v25}", RISCV::V25)
21055 .Case("{v26}", RISCV::V26)
21056 .Case("{v27}", RISCV::V27)
21057 .Case("{v28}", RISCV::V28)
21058 .Case("{v29}", RISCV::V29)
21059 .Case("{v30}", RISCV::V30)
21060 .Case("{v31}", RISCV::V31)
21061 .Default(RISCV::NoRegister);
21062 if (VReg != RISCV::NoRegister) {
21063 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21064 return std::make_pair(VReg, &RISCV::VMRegClass);
21065 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21066 return std::make_pair(VReg, &RISCV::VRRegClass);
21067 for (const auto *RC :
21068 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21069 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21070 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21071 return std::make_pair(VReg, RC);
21072 }
21073 }
21074 }
21075 }
21076
21077 std::pair<Register, const TargetRegisterClass *> Res =
21079
21080 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21081 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21082 // Subtarget into account.
21083 if (Res.second == &RISCV::GPRF16RegClass ||
21084 Res.second == &RISCV::GPRF32RegClass ||
21085 Res.second == &RISCV::GPRPairRegClass)
21086 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21087
21088 return Res;
21089}
21090
21093 // Currently only support length 1 constraints.
21094 if (ConstraintCode.size() == 1) {
21095 switch (ConstraintCode[0]) {
21096 case 'A':
21098 default:
21099 break;
21100 }
21101 }
21102
21103 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21104}
21105
21107 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21108 SelectionDAG &DAG) const {
21109 // Currently only support length 1 constraints.
21110 if (Constraint.size() == 1) {
21111 switch (Constraint[0]) {
21112 case 'I':
21113 // Validate & create a 12-bit signed immediate operand.
21114 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21115 uint64_t CVal = C->getSExtValue();
21116 if (isInt<12>(CVal))
21117 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21118 Subtarget.getXLenVT()));
21119 }
21120 return;
21121 case 'J':
21122 // Validate & create an integer zero operand.
21123 if (isNullConstant(Op))
21124 Ops.push_back(
21125 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21126 return;
21127 case 'K':
21128 // Validate & create a 5-bit unsigned immediate operand.
21129 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21130 uint64_t CVal = C->getZExtValue();
21131 if (isUInt<5>(CVal))
21132 Ops.push_back(
21133 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21134 }
21135 return;
21136 case 'S':
21138 return;
21139 default:
21140 break;
21141 }
21142 }
21143 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21144}
21145
21147 Instruction *Inst,
21148 AtomicOrdering Ord) const {
21149 if (Subtarget.hasStdExtZtso()) {
21150 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21151 return Builder.CreateFence(Ord);
21152 return nullptr;
21153 }
21154
21155 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21156 return Builder.CreateFence(Ord);
21157 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21158 return Builder.CreateFence(AtomicOrdering::Release);
21159 return nullptr;
21160}
21161
21163 Instruction *Inst,
21164 AtomicOrdering Ord) const {
21165 if (Subtarget.hasStdExtZtso()) {
21166 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21167 return Builder.CreateFence(Ord);
21168 return nullptr;
21169 }
21170
21171 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21172 return Builder.CreateFence(AtomicOrdering::Acquire);
21173 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21176 return nullptr;
21177}
21178
21181 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21182 // point operations can't be used in an lr/sc sequence without breaking the
21183 // forward-progress guarantee.
21184 if (AI->isFloatingPointOperation() ||
21190
21191 // Don't expand forced atomics, we want to have __sync libcalls instead.
21192 if (Subtarget.hasForcedAtomics())
21194
21195 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21196 if (AI->getOperation() == AtomicRMWInst::Nand) {
21197 if (Subtarget.hasStdExtZacas() &&
21198 (Size >= 32 || Subtarget.hasStdExtZabha()))
21200 if (Size < 32)
21202 }
21203
21204 if (Size < 32 && !Subtarget.hasStdExtZabha())
21206
21208}
21209
21210static Intrinsic::ID
21212 if (XLen == 32) {
21213 switch (BinOp) {
21214 default:
21215 llvm_unreachable("Unexpected AtomicRMW BinOp");
21217 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21218 case AtomicRMWInst::Add:
21219 return Intrinsic::riscv_masked_atomicrmw_add_i32;
21220 case AtomicRMWInst::Sub:
21221 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21223 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21224 case AtomicRMWInst::Max:
21225 return Intrinsic::riscv_masked_atomicrmw_max_i32;
21226 case AtomicRMWInst::Min:
21227 return Intrinsic::riscv_masked_atomicrmw_min_i32;
21229 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21231 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21232 }
21233 }
21234
21235 if (XLen == 64) {
21236 switch (BinOp) {
21237 default:
21238 llvm_unreachable("Unexpected AtomicRMW BinOp");
21240 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21241 case AtomicRMWInst::Add:
21242 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21243 case AtomicRMWInst::Sub:
21244 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21246 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21247 case AtomicRMWInst::Max:
21248 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21249 case AtomicRMWInst::Min:
21250 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21252 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21254 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21255 }
21256 }
21257
21258 llvm_unreachable("Unexpected XLen\n");
21259}
21260
21262 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21263 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21264 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21265 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21266 // mask, as this produces better code than the LR/SC loop emitted by
21267 // int_riscv_masked_atomicrmw_xchg.
21268 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21269 isa<ConstantInt>(AI->getValOperand())) {
21270 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21271 if (CVal->isZero())
21272 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21273 Builder.CreateNot(Mask, "Inv_Mask"),
21274 AI->getAlign(), Ord);
21275 if (CVal->isMinusOne())
21276 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21277 AI->getAlign(), Ord);
21278 }
21279
21280 unsigned XLen = Subtarget.getXLen();
21281 Value *Ordering =
21282 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21283 Type *Tys[] = {AlignedAddr->getType()};
21285 AI->getModule(),
21287
21288 if (XLen == 64) {
21289 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21290 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21291 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21292 }
21293
21294 Value *Result;
21295
21296 // Must pass the shift amount needed to sign extend the loaded value prior
21297 // to performing a signed comparison for min/max. ShiftAmt is the number of
21298 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21299 // is the number of bits to left+right shift the value in order to
21300 // sign-extend.
21301 if (AI->getOperation() == AtomicRMWInst::Min ||
21303 const DataLayout &DL = AI->getDataLayout();
21304 unsigned ValWidth =
21305 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21306 Value *SextShamt =
21307 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21308 Result = Builder.CreateCall(LrwOpScwLoop,
21309 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21310 } else {
21311 Result =
21312 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21313 }
21314
21315 if (XLen == 64)
21316 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21317 return Result;
21318}
21319
21322 AtomicCmpXchgInst *CI) const {
21323 // Don't expand forced atomics, we want to have __sync libcalls instead.
21324 if (Subtarget.hasForcedAtomics())
21326
21328 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21329 (Size == 8 || Size == 16))
21332}
21333
21335 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21336 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21337 unsigned XLen = Subtarget.getXLen();
21338 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21339 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21340 if (XLen == 64) {
21341 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21342 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21343 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21344 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21345 }
21346 Type *Tys[] = {AlignedAddr->getType()};
21347 Value *Result = Builder.CreateIntrinsic(
21348 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21349 if (XLen == 64)
21350 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21351 return Result;
21352}
21353
21355 EVT DataVT) const {
21356 // We have indexed loads for all supported EEW types. Indices are always
21357 // zero extended.
21358 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21359 isTypeLegal(Extend.getValueType()) &&
21360 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21361 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21362}
21363
21365 EVT VT) const {
21366 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21367 return false;
21368
21369 switch (FPVT.getSimpleVT().SimpleTy) {
21370 case MVT::f16:
21371 return Subtarget.hasStdExtZfhmin();
21372 case MVT::f32:
21373 return Subtarget.hasStdExtF();
21374 case MVT::f64:
21375 return Subtarget.hasStdExtD();
21376 default:
21377 return false;
21378 }
21379}
21380
21382 // If we are using the small code model, we can reduce size of jump table
21383 // entry to 4 bytes.
21384 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21387 }
21389}
21390
21392 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21393 unsigned uid, MCContext &Ctx) const {
21394 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21396 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21397}
21398
21400 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21401 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21402 // a power of two as well.
21403 // FIXME: This doesn't work for zve32, but that's already broken
21404 // elsewhere for the same reason.
21405 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21406 static_assert(RISCV::RVVBitsPerBlock == 64,
21407 "RVVBitsPerBlock changed, audit needed");
21408 return true;
21409}
21410
21412 SDValue &Offset,
21414 SelectionDAG &DAG) const {
21415 // Target does not support indexed loads.
21416 if (!Subtarget.hasVendorXTHeadMemIdx())
21417 return false;
21418
21419 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21420 return false;
21421
21422 Base = Op->getOperand(0);
21423 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21424 int64_t RHSC = RHS->getSExtValue();
21425 if (Op->getOpcode() == ISD::SUB)
21426 RHSC = -(uint64_t)RHSC;
21427
21428 // The constants that can be encoded in the THeadMemIdx instructions
21429 // are of the form (sign_extend(imm5) << imm2).
21430 bool isLegalIndexedOffset = false;
21431 for (unsigned i = 0; i < 4; i++)
21432 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21433 isLegalIndexedOffset = true;
21434 break;
21435 }
21436
21437 if (!isLegalIndexedOffset)
21438 return false;
21439
21440 Offset = Op->getOperand(1);
21441 return true;
21442 }
21443
21444 return false;
21445}
21446
21448 SDValue &Offset,
21450 SelectionDAG &DAG) const {
21451 EVT VT;
21452 SDValue Ptr;
21453 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21454 VT = LD->getMemoryVT();
21455 Ptr = LD->getBasePtr();
21456 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21457 VT = ST->getMemoryVT();
21458 Ptr = ST->getBasePtr();
21459 } else
21460 return false;
21461
21462 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21463 return false;
21464
21465 AM = ISD::PRE_INC;
21466 return true;
21467}
21468
21470 SDValue &Base,
21471 SDValue &Offset,
21473 SelectionDAG &DAG) const {
21474 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21475 if (Op->getOpcode() != ISD::ADD)
21476 return false;
21477
21478 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21479 Base = LS->getBasePtr();
21480 else
21481 return false;
21482
21483 if (Base == Op->getOperand(0))
21484 Offset = Op->getOperand(1);
21485 else if (Base == Op->getOperand(1))
21486 Offset = Op->getOperand(0);
21487 else
21488 return false;
21489
21490 AM = ISD::POST_INC;
21491 return true;
21492 }
21493
21494 EVT VT;
21495 SDValue Ptr;
21496 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21497 VT = LD->getMemoryVT();
21498 Ptr = LD->getBasePtr();
21499 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21500 VT = ST->getMemoryVT();
21501 Ptr = ST->getBasePtr();
21502 } else
21503 return false;
21504
21505 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21506 return false;
21507 // Post-indexing updates the base, so it's not a valid transform
21508 // if that's not the same as the load's pointer.
21509 if (Ptr != Base)
21510 return false;
21511
21512 AM = ISD::POST_INC;
21513 return true;
21514}
21515
21517 EVT VT) const {
21518 EVT SVT = VT.getScalarType();
21519
21520 if (!SVT.isSimple())
21521 return false;
21522
21523 switch (SVT.getSimpleVT().SimpleTy) {
21524 case MVT::f16:
21525 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21526 : Subtarget.hasStdExtZfhOrZhinx();
21527 case MVT::f32:
21528 return Subtarget.hasStdExtFOrZfinx();
21529 case MVT::f64:
21530 return Subtarget.hasStdExtDOrZdinx();
21531 default:
21532 break;
21533 }
21534
21535 return false;
21536}
21537
21539 // Zacas will use amocas.w which does not require extension.
21540 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21541}
21542
21544 const Constant *PersonalityFn) const {
21545 return RISCV::X10;
21546}
21547
21549 const Constant *PersonalityFn) const {
21550 return RISCV::X11;
21551}
21552
21554 // Return false to suppress the unnecessary extensions if the LibCall
21555 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21556 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21557 Type.getSizeInBits() < Subtarget.getXLen()))
21558 return false;
21559
21560 return true;
21561}
21562
21564 bool IsSigned) const {
21565 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21566 return true;
21567
21568 return IsSigned;
21569}
21570
21572 SDValue C) const {
21573 // Check integral scalar types.
21574 if (!VT.isScalarInteger())
21575 return false;
21576
21577 // Omit the optimization if the sub target has the M extension and the data
21578 // size exceeds XLen.
21579 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21580 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21581 return false;
21582
21583 auto *ConstNode = cast<ConstantSDNode>(C);
21584 const APInt &Imm = ConstNode->getAPIntValue();
21585
21586 // Break the MUL to a SLLI and an ADD/SUB.
21587 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21588 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21589 return true;
21590
21591 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21592 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21593 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21594 (Imm - 8).isPowerOf2()))
21595 return true;
21596
21597 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21598 // a pair of LUI/ADDI.
21599 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21600 ConstNode->hasOneUse()) {
21601 APInt ImmS = Imm.ashr(Imm.countr_zero());
21602 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21603 (1 - ImmS).isPowerOf2())
21604 return true;
21605 }
21606
21607 return false;
21608}
21609
21611 SDValue ConstNode) const {
21612 // Let the DAGCombiner decide for vectors.
21613 EVT VT = AddNode.getValueType();
21614 if (VT.isVector())
21615 return true;
21616
21617 // Let the DAGCombiner decide for larger types.
21618 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21619 return true;
21620
21621 // It is worse if c1 is simm12 while c1*c2 is not.
21622 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21623 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21624 const APInt &C1 = C1Node->getAPIntValue();
21625 const APInt &C2 = C2Node->getAPIntValue();
21626 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21627 return false;
21628
21629 // Default to true and let the DAGCombiner decide.
21630 return true;
21631}
21632
21634 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21635 unsigned *Fast) const {
21636 if (!VT.isVector()) {
21637 if (Fast)
21638 *Fast = Subtarget.enableUnalignedScalarMem();
21639 return Subtarget.enableUnalignedScalarMem();
21640 }
21641
21642 // All vector implementations must support element alignment
21643 EVT ElemVT = VT.getVectorElementType();
21644 if (Alignment >= ElemVT.getStoreSize()) {
21645 if (Fast)
21646 *Fast = 1;
21647 return true;
21648 }
21649
21650 // Note: We lower an unmasked unaligned vector access to an equally sized
21651 // e8 element type access. Given this, we effectively support all unmasked
21652 // misaligned accesses. TODO: Work through the codegen implications of
21653 // allowing such accesses to be formed, and considered fast.
21654 if (Fast)
21655 *Fast = Subtarget.enableUnalignedVectorMem();
21656 return Subtarget.enableUnalignedVectorMem();
21657}
21658
21659
21661 const AttributeList &FuncAttributes) const {
21662 if (!Subtarget.hasVInstructions())
21663 return MVT::Other;
21664
21665 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21666 return MVT::Other;
21667
21668 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21669 // has an expansion threshold, and we want the number of hardware memory
21670 // operations to correspond roughly to that threshold. LMUL>1 operations
21671 // are typically expanded linearly internally, and thus correspond to more
21672 // than one actual memory operation. Note that store merging and load
21673 // combining will typically form larger LMUL operations from the LMUL1
21674 // operations emitted here, and that's okay because combining isn't
21675 // introducing new memory operations; it's just merging existing ones.
21676 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21677 if (Op.size() < MinVLenInBytes)
21678 // TODO: Figure out short memops. For the moment, do the default thing
21679 // which ends up using scalar sequences.
21680 return MVT::Other;
21681
21682 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21683 // fixed vectors.
21684 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21685 return MVT::Other;
21686
21687 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21688 // a large scalar constant and instead use vmv.v.x/i to do the
21689 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21690 // maximize the chance we can encode the size in the vsetvli.
21691 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21692 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21693
21694 // Do we have sufficient alignment for our preferred VT? If not, revert
21695 // to largest size allowed by our alignment criteria.
21696 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21697 Align RequiredAlign(PreferredVT.getStoreSize());
21698 if (Op.isFixedDstAlign())
21699 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21700 if (Op.isMemcpy())
21701 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21702 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21703 }
21704 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21705}
21706
21708 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21709 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21710 bool IsABIRegCopy = CC.has_value();
21711 EVT ValueVT = Val.getValueType();
21712
21713 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21714 if ((ValueVT == PairVT ||
21715 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21716 ValueVT == MVT::f64)) &&
21717 NumParts == 1 && PartVT == MVT::Untyped) {
21718 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21719 MVT XLenVT = Subtarget.getXLenVT();
21720 if (ValueVT == MVT::f64)
21721 Val = DAG.getBitcast(MVT::i64, Val);
21722 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
21723 // Always creating an MVT::Untyped part, so always use
21724 // RISCVISD::BuildGPRPair.
21725 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
21726 return true;
21727 }
21728
21729 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21730 PartVT == MVT::f32) {
21731 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21732 // nan, and cast to f32.
21733 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21734 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21735 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21736 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21737 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21738 Parts[0] = Val;
21739 return true;
21740 }
21741
21742 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
21743#ifndef NDEBUG
21744 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
21745 [[maybe_unused]] unsigned ValLMUL =
21747 ValNF * RISCV::RVVBitsPerBlock);
21748 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
21749 [[maybe_unused]] unsigned PartLMUL =
21751 PartNF * RISCV::RVVBitsPerBlock);
21752 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
21753 "RISC-V vector tuple type only accepts same register class type "
21754 "TUPLE_INSERT");
21755#endif
21756
21757 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
21758 Val, DAG.getVectorIdxConstant(0, DL));
21759 Parts[0] = Val;
21760 return true;
21761 }
21762
21763 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21764 LLVMContext &Context = *DAG.getContext();
21765 EVT ValueEltVT = ValueVT.getVectorElementType();
21766 EVT PartEltVT = PartVT.getVectorElementType();
21767 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21768 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21769 if (PartVTBitSize % ValueVTBitSize == 0) {
21770 assert(PartVTBitSize >= ValueVTBitSize);
21771 // If the element types are different, bitcast to the same element type of
21772 // PartVT first.
21773 // Give an example here, we want copy a <vscale x 1 x i8> value to
21774 // <vscale x 4 x i16>.
21775 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21776 // subvector, then we can bitcast to <vscale x 4 x i16>.
21777 if (ValueEltVT != PartEltVT) {
21778 if (PartVTBitSize > ValueVTBitSize) {
21779 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21780 assert(Count != 0 && "The number of element should not be zero.");
21781 EVT SameEltTypeVT =
21782 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21783 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21784 DAG.getUNDEF(SameEltTypeVT), Val,
21785 DAG.getVectorIdxConstant(0, DL));
21786 }
21787 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21788 } else {
21789 Val =
21790 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21791 Val, DAG.getVectorIdxConstant(0, DL));
21792 }
21793 Parts[0] = Val;
21794 return true;
21795 }
21796 }
21797
21798 return false;
21799}
21800
21802 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21803 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21804 bool IsABIRegCopy = CC.has_value();
21805
21806 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21807 if ((ValueVT == PairVT ||
21808 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21809 ValueVT == MVT::f64)) &&
21810 NumParts == 1 && PartVT == MVT::Untyped) {
21811 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21812 MVT XLenVT = Subtarget.getXLenVT();
21813
21814 SDValue Val = Parts[0];
21815 // Always starting with an MVT::Untyped part, so always use
21816 // RISCVISD::SplitGPRPair
21817 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
21818 Val);
21819 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
21820 Val.getValue(1));
21821 if (ValueVT == MVT::f64)
21822 Val = DAG.getBitcast(ValueVT, Val);
21823 return Val;
21824 }
21825
21826 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21827 PartVT == MVT::f32) {
21828 SDValue Val = Parts[0];
21829
21830 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21831 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21832 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21833 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21834 return Val;
21835 }
21836
21837 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21838 LLVMContext &Context = *DAG.getContext();
21839 SDValue Val = Parts[0];
21840 EVT ValueEltVT = ValueVT.getVectorElementType();
21841 EVT PartEltVT = PartVT.getVectorElementType();
21842 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21843 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21844 if (PartVTBitSize % ValueVTBitSize == 0) {
21845 assert(PartVTBitSize >= ValueVTBitSize);
21846 EVT SameEltTypeVT = ValueVT;
21847 // If the element types are different, convert it to the same element type
21848 // of PartVT.
21849 // Give an example here, we want copy a <vscale x 1 x i8> value from
21850 // <vscale x 4 x i16>.
21851 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21852 // then we can extract <vscale x 1 x i8>.
21853 if (ValueEltVT != PartEltVT) {
21854 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21855 assert(Count != 0 && "The number of element should not be zero.");
21856 SameEltTypeVT =
21857 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21858 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21859 }
21860 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21861 DAG.getVectorIdxConstant(0, DL));
21862 return Val;
21863 }
21864 }
21865 return SDValue();
21866}
21867
21869 // When aggressively optimizing for code size, we prefer to use a div
21870 // instruction, as it is usually smaller than the alternative sequence.
21871 // TODO: Add vector division?
21872 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21873 return OptSize && !VT.isVector();
21874}
21875
21877 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21878 // some situation.
21879 unsigned Opc = N->getOpcode();
21880 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21881 return false;
21882 return true;
21883}
21884
21885static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21886 Module *M = IRB.GetInsertBlock()->getModule();
21887 Function *ThreadPointerFunc =
21888 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
21889 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21890 IRB.CreateCall(ThreadPointerFunc), Offset);
21891}
21892
21894 // Fuchsia provides a fixed TLS slot for the stack cookie.
21895 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21896 if (Subtarget.isTargetFuchsia())
21897 return useTpOffset(IRB, -0x10);
21898
21899 // Android provides a fixed TLS slot for the stack cookie. See the definition
21900 // of TLS_SLOT_STACK_GUARD in
21901 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21902 if (Subtarget.isTargetAndroid())
21903 return useTpOffset(IRB, -0x18);
21904
21905 Module *M = IRB.GetInsertBlock()->getModule();
21906
21907 if (M->getStackProtectorGuard() == "tls") {
21908 // Users must specify the offset explicitly
21909 int Offset = M->getStackProtectorGuardOffset();
21910 return useTpOffset(IRB, Offset);
21911 }
21912
21914}
21915
21917 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21918 const DataLayout &DL) const {
21919 EVT VT = getValueType(DL, VTy);
21920 // Don't lower vlseg/vsseg for vector types that can't be split.
21921 if (!isTypeLegal(VT))
21922 return false;
21923
21925 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21926 Alignment))
21927 return false;
21928
21929 MVT ContainerVT = VT.getSimpleVT();
21930
21931 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21932 if (!Subtarget.useRVVForFixedLengthVectors())
21933 return false;
21934 // Sometimes the interleaved access pass picks up splats as interleaves of
21935 // one element. Don't lower these.
21936 if (FVTy->getNumElements() < 2)
21937 return false;
21938
21940 } else {
21941 // The intrinsics for scalable vectors are not overloaded on pointer type
21942 // and can only handle the default address space.
21943 if (AddrSpace)
21944 return false;
21945 }
21946
21947 // Need to make sure that EMUL * NFIELDS ≤ 8
21948 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21949 if (Fractional)
21950 return true;
21951 return Factor * LMUL <= 8;
21952}
21953
21955 Align Alignment) const {
21956 if (!Subtarget.hasVInstructions())
21957 return false;
21958
21959 // Only support fixed vectors if we know the minimum vector size.
21960 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21961 return false;
21962
21963 EVT ScalarType = DataType.getScalarType();
21964 if (!isLegalElementTypeForRVV(ScalarType))
21965 return false;
21966
21967 if (!Subtarget.enableUnalignedVectorMem() &&
21968 Alignment < ScalarType.getStoreSize())
21969 return false;
21970
21971 return true;
21972}
21973
21975 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21976 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21977 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21978 Intrinsic::riscv_seg8_load};
21979
21980/// Lower an interleaved load into a vlsegN intrinsic.
21981///
21982/// E.g. Lower an interleaved load (Factor = 2):
21983/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21984/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21985/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21986///
21987/// Into:
21988/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21989/// %ptr, i64 4)
21990/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21991/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21994 ArrayRef<unsigned> Indices, unsigned Factor) const {
21995 assert(Indices.size() == Shuffles.size());
21996
21997 IRBuilder<> Builder(LI);
21998
21999 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22000 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22002 LI->getDataLayout()))
22003 return false;
22004
22005 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22006
22007 // If the segment load is going to be performed segment at a time anyways
22008 // and there's only one element used, use a strided load instead. This
22009 // will be equally fast, and create less vector register pressure.
22010 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22011 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22012 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22013 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22014 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22015 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22016 Value *VL = Builder.getInt32(VTy->getNumElements());
22017
22018 CallInst *CI =
22019 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22020 {VTy, BasePtr->getType(), Stride->getType()},
22021 {BasePtr, Stride, Mask, VL});
22022 CI->addParamAttr(
22024 Shuffles[0]->replaceAllUsesWith(CI);
22025 return true;
22026 };
22027
22028 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22029
22030 CallInst *VlsegN = Builder.CreateIntrinsic(
22031 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22032 {LI->getPointerOperand(), VL});
22033
22034 for (unsigned i = 0; i < Shuffles.size(); i++) {
22035 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22036 Shuffles[i]->replaceAllUsesWith(SubVec);
22037 }
22038
22039 return true;
22040}
22041
22043 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22044 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22045 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22046 Intrinsic::riscv_seg8_store};
22047
22048/// Lower an interleaved store into a vssegN intrinsic.
22049///
22050/// E.g. Lower an interleaved store (Factor = 3):
22051/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22052/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22053/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22054///
22055/// Into:
22056/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22057/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22058/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22059/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22060/// %ptr, i32 4)
22061///
22062/// Note that the new shufflevectors will be removed and we'll only generate one
22063/// vsseg3 instruction in CodeGen.
22065 ShuffleVectorInst *SVI,
22066 unsigned Factor) const {
22067 IRBuilder<> Builder(SI);
22068 auto Mask = SVI->getShuffleMask();
22069 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22070 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22071 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22072 ShuffleVTy->getNumElements() / Factor);
22073 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22074 SI->getPointerAddressSpace(),
22075 SI->getDataLayout()))
22076 return false;
22077
22078 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22079
22080 unsigned Index;
22081 // If the segment store only has one active lane (i.e. the interleave is
22082 // just a spread shuffle), we can use a strided store instead. This will
22083 // be equally fast, and create less vector register pressure.
22084 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22085 isSpreadMask(Mask, Factor, Index)) {
22086 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22087 Value *Data = SVI->getOperand(0);
22088 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22089 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22090 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22091 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22092 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22093 Value *VL = Builder.getInt32(VTy->getNumElements());
22094
22095 CallInst *CI = Builder.CreateIntrinsic(
22096 Intrinsic::experimental_vp_strided_store,
22097 {Data->getType(), BasePtr->getType(), Stride->getType()},
22098 {Data, BasePtr, Stride, Mask, VL});
22099 CI->addParamAttr(
22100 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22101
22102 return true;
22103 }
22104
22106 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22107 {VTy, SI->getPointerOperandType(), XLenTy});
22108
22110
22111 for (unsigned i = 0; i < Factor; i++) {
22112 Value *Shuffle = Builder.CreateShuffleVector(
22113 SVI->getOperand(0), SVI->getOperand(1),
22114 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22115 Ops.push_back(Shuffle);
22116 }
22117 // This VL should be OK (should be executable in one vsseg instruction,
22118 // potentially under larger LMULs) because we checked that the fixed vector
22119 // type fits in isLegalInterleavedAccessType
22120 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22121 Ops.append({SI->getPointerOperand(), VL});
22122
22123 Builder.CreateCall(VssegNFunc, Ops);
22124
22125 return true;
22126}
22127
22129 IntrinsicInst *DI, LoadInst *LI,
22130 SmallVectorImpl<Instruction *> &DeadInsts) const {
22131 assert(LI->isSimple());
22132 IRBuilder<> Builder(LI);
22133
22134 // Only deinterleave2 supported at present.
22135 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
22136 return false;
22137
22138 const unsigned Factor = 2;
22139
22140 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
22141 const DataLayout &DL = LI->getDataLayout();
22142
22143 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22144 LI->getPointerAddressSpace(), DL))
22145 return false;
22146
22147 Value *Return;
22148 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22149
22150 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22151 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22152 Return =
22153 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22154 {ResVTy, LI->getPointerOperandType(), XLenTy},
22155 {LI->getPointerOperand(), VL});
22156 } else {
22157 static const Intrinsic::ID IntrIds[] = {
22158 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22159 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22160 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22161 Intrinsic::riscv_vlseg8};
22162
22163 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22164 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22165 Type *VecTupTy = TargetExtType::get(
22166 LI->getContext(), "riscv.vector.tuple",
22168 NumElts * SEW / 8),
22169 Factor);
22170
22171 Value *VL = Constant::getAllOnesValue(XLenTy);
22172
22173 Value *Vlseg = Builder.CreateIntrinsic(
22174 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22175 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22176 ConstantInt::get(XLenTy, Log2_64(SEW))});
22177
22178 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22179 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22180 for (unsigned i = 0; i < Factor; ++i) {
22181 Value *VecExtract = Builder.CreateIntrinsic(
22182 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22183 {Vlseg, Builder.getInt32(i)});
22184 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22185 }
22186 }
22187
22188 DI->replaceAllUsesWith(Return);
22189
22190 return true;
22191}
22192
22195 SmallVectorImpl<Instruction *> &DeadInsts) const {
22196 assert(SI->isSimple());
22197 IRBuilder<> Builder(SI);
22198
22199 // Only interleave2 supported at present.
22200 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
22201 return false;
22202
22203 const unsigned Factor = 2;
22204
22205 VectorType *InVTy = cast<VectorType>(II->getArgOperand(0)->getType());
22206 const DataLayout &DL = SI->getDataLayout();
22207
22208 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22209 SI->getPointerAddressSpace(), DL))
22210 return false;
22211
22212 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22213
22214 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22215 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22216 Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
22217 {InVTy, SI->getPointerOperandType(), XLenTy},
22218 {II->getArgOperand(0), II->getArgOperand(1),
22219 SI->getPointerOperand(), VL});
22220 } else {
22221 static const Intrinsic::ID IntrIds[] = {
22222 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22223 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22224 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22225 Intrinsic::riscv_vsseg8};
22226
22227 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22228 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22229 Type *VecTupTy = TargetExtType::get(
22230 SI->getContext(), "riscv.vector.tuple",
22231 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22232 NumElts * SEW / 8),
22233 Factor);
22234
22236 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22237
22238 Value *VL = Constant::getAllOnesValue(XLenTy);
22239
22240 Value *StoredVal = PoisonValue::get(VecTupTy);
22241 for (unsigned i = 0; i < Factor; ++i)
22242 StoredVal = Builder.CreateIntrinsic(
22243 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22244 {StoredVal, II->getArgOperand(i), Builder.getInt32(i)});
22245
22246 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22247 ConstantInt::get(XLenTy, Log2_64(SEW))});
22248 }
22249
22250 return true;
22251}
22252
22256 const TargetInstrInfo *TII) const {
22257 assert(MBBI->isCall() && MBBI->getCFIType() &&
22258 "Invalid call instruction for a KCFI check");
22259 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22260 MBBI->getOpcode()));
22261
22262 MachineOperand &Target = MBBI->getOperand(0);
22263 Target.setIsRenamable(false);
22264
22265 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22266 .addReg(Target.getReg())
22267 .addImm(MBBI->getCFIType())
22268 .getInstr();
22269}
22270
22271#define GET_REGISTER_MATCHER
22272#include "RISCVGenAsmMatcher.inc"
22273
22276 const MachineFunction &MF) const {
22278 if (Reg == RISCV::NoRegister)
22280 if (Reg == RISCV::NoRegister)
22282 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22283 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22284 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22285 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22286 StringRef(RegName) + "\"."));
22287 return Reg;
22288}
22289
22292 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22293
22294 if (NontemporalInfo == nullptr)
22296
22297 // 1 for default value work as __RISCV_NTLH_ALL
22298 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22299 // 3 -> __RISCV_NTLH_ALL_PRIVATE
22300 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22301 // 5 -> __RISCV_NTLH_ALL
22302 int NontemporalLevel = 5;
22303 const MDNode *RISCVNontemporalInfo =
22304 I.getMetadata("riscv-nontemporal-domain");
22305 if (RISCVNontemporalInfo != nullptr)
22306 NontemporalLevel =
22307 cast<ConstantInt>(
22308 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22309 ->getValue())
22310 ->getZExtValue();
22311
22312 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22313 "RISC-V target doesn't support this non-temporal domain.");
22314
22315 NontemporalLevel -= 2;
22317 if (NontemporalLevel & 0b1)
22318 Flags |= MONontemporalBit0;
22319 if (NontemporalLevel & 0b10)
22320 Flags |= MONontemporalBit1;
22321
22322 return Flags;
22323}
22324
22327
22328 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22330 TargetFlags |= (NodeFlags & MONontemporalBit0);
22331 TargetFlags |= (NodeFlags & MONontemporalBit1);
22332 return TargetFlags;
22333}
22334
22336 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22337 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22338}
22339
22341 if (VT.isScalableVector())
22342 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22343 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22344 return true;
22345 return Subtarget.hasStdExtZbb() &&
22346 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22347}
22348
22350 ISD::CondCode Cond) const {
22351 return isCtpopFast(VT) ? 0 : 1;
22352}
22353
22355 const Instruction *I) const {
22356 if (Subtarget.hasStdExtZalasr()) {
22357 if (Subtarget.hasStdExtZtso()) {
22358 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22359 // should be lowered to plain load/store. The easiest way to do this is
22360 // to say we should insert fences for them, and the fence insertion code
22361 // will just not insert any fences
22362 auto *LI = dyn_cast<LoadInst>(I);
22363 auto *SI = dyn_cast<StoreInst>(I);
22364 if ((LI &&
22365 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22366 (SI &&
22367 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22368 // Here, this is a load or store which is seq_cst, and needs a .aq or
22369 // .rl therefore we shouldn't try to insert fences
22370 return false;
22371 }
22372 // Here, we are a TSO inst that isn't a seq_cst load/store
22373 return isa<LoadInst>(I) || isa<StoreInst>(I);
22374 }
22375 return false;
22376 }
22377 // Note that one specific case requires fence insertion for an
22378 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22379 // than this hook due to limitations in the interface here.
22380 return isa<LoadInst>(I) || isa<StoreInst>(I);
22381}
22382
22384
22385 // GISel support is in progress or complete for these opcodes.
22386 unsigned Op = Inst.getOpcode();
22387 if (Op == Instruction::Add || Op == Instruction::Sub ||
22388 Op == Instruction::And || Op == Instruction::Or ||
22389 Op == Instruction::Xor || Op == Instruction::InsertElement ||
22390 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22391 Op == Instruction::Freeze || Op == Instruction::Store)
22392 return false;
22393
22394 if (Inst.getType()->isScalableTy())
22395 return true;
22396
22397 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22398 if (Inst.getOperand(i)->getType()->isScalableTy() &&
22399 !isa<ReturnInst>(&Inst))
22400 return true;
22401
22402 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22403 if (AI->getAllocatedType()->isScalableTy())
22404 return true;
22405 }
22406
22407 return false;
22408}
22409
22410SDValue
22411RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22412 SelectionDAG &DAG,
22413 SmallVectorImpl<SDNode *> &Created) const {
22415 if (isIntDivCheap(N->getValueType(0), Attr))
22416 return SDValue(N, 0); // Lower SDIV as SDIV
22417
22418 // Only perform this transform if short forward branch opt is supported.
22419 if (!Subtarget.hasShortForwardBranchOpt())
22420 return SDValue();
22421 EVT VT = N->getValueType(0);
22422 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22423 return SDValue();
22424
22425 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22426 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22427 return SDValue();
22428 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22429}
22430
22431bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22432 EVT VT, const APInt &AndMask) const {
22433 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22434 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22436}
22437
22438unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22439 return Subtarget.getMinimumJumpTableEntries();
22440}
22441
22444 int JTI,
22445 SelectionDAG &DAG) const {
22446 if (Subtarget.hasStdExtZicfilp()) {
22447 // When Zicfilp enabled, we need to use software guarded branch for jump
22448 // table branch.
22449 SDValue Chain = Value;
22450 // Jump table debug info is only needed if CodeView is enabled.
22452 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22453 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22454 }
22455 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22456}
22457
22458// If an output pattern produces multiple instructions tablegen may pick an
22459// arbitrary type from an instructions destination register class to use for the
22460// VT of that MachineSDNode. This VT may be used to look up the representative
22461// register class. If the type isn't legal, the default implementation will
22462// not find a register class.
22463//
22464// Some integer types smaller than XLen are listed in the GPR register class to
22465// support isel patterns for GISel, but are not legal in SelectionDAG. The
22466// arbitrary type tablegen picks may be one of these smaller types.
22467//
22468// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22469// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22470std::pair<const TargetRegisterClass *, uint8_t>
22471RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22472 MVT VT) const {
22473 switch (VT.SimpleTy) {
22474 default:
22475 break;
22476 case MVT::i8:
22477 case MVT::i16:
22478 case MVT::i32:
22480 case MVT::bf16:
22481 case MVT::f16:
22483 }
22484
22486}
22487
22489
22490#define GET_RISCVVIntrinsicsTable_IMPL
22491#include "RISCVGenSearchableTables.inc"
22492
22493} // namespace llvm::RISCVVIntrinsicsTable
22494
22496
22497 // If the function specifically requests inline stack probes, emit them.
22498 if (MF.getFunction().hasFnAttribute("probe-stack"))
22499 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22500 "inline-asm";
22501
22502 return false;
22503}
22504
22506 Align StackAlign) const {
22507 // The default stack probe size is 4096 if the function has no
22508 // stack-probe-size attribute.
22509 const Function &Fn = MF.getFunction();
22510 unsigned StackProbeSize =
22511 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22512 // Round down to the stack alignment.
22513 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22514 return StackProbeSize ? StackProbeSize : StackAlign.value();
22515}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1329
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1106
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1902
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1857
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:844
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1877
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:42
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:404
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:395
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:888
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:904
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:735
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:384
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1259
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1575
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1575
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1562
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1496
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1613
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1593
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1658
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1520
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:317
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)