LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1531 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1534 if (Subtarget.hasVendorXTHeadMemPair())
1536 if (Subtarget.useRVVForFixedLengthVectors())
1538
1539 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1540 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1541
1542 // Disable strict node mutation.
1543 IsStrictFPEnabled = true;
1544 EnableExtLdPromotion = true;
1545
1546 // Let the subtarget decide if a predictable select is more expensive than the
1547 // corresponding branch. This information is used in CGP/SelectOpt to decide
1548 // when to convert selects into branches.
1549 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1550
1551 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1552 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1553
1555 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1556 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1557
1559 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1560 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1561
1562 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1563 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1564}
1565
1567 LLVMContext &Context,
1568 EVT VT) const {
1569 if (!VT.isVector())
1570 return getPointerTy(DL);
1571 if (Subtarget.hasVInstructions() &&
1572 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1573 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1575}
1576
1577MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1578 return Subtarget.getXLenVT();
1579}
1580
1581// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1582bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1583 unsigned VF,
1584 bool IsScalable) const {
1585 if (!Subtarget.hasVInstructions())
1586 return true;
1587
1588 if (!IsScalable)
1589 return true;
1590
1591 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1592 return true;
1593
1594 // Don't allow VF=1 if those types are't legal.
1595 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1596 return true;
1597
1598 // VLEN=32 support is incomplete.
1599 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1600 return true;
1601
1602 // The maximum VF is for the smallest element width with LMUL=8.
1603 // VF must be a power of 2.
1604 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1605 return VF > MaxVF || !isPowerOf2_32(VF);
1606}
1607
1609 return !Subtarget.hasVInstructions() ||
1610 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1611}
1612
1614 const CallInst &I,
1615 MachineFunction &MF,
1616 unsigned Intrinsic) const {
1617 auto &DL = I.getDataLayout();
1618
1619 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1620 bool IsUnitStrided, bool UsePtrVal = false) {
1622 // We can't use ptrVal if the intrinsic can access memory before the
1623 // pointer. This means we can't use it for strided or indexed intrinsics.
1624 if (UsePtrVal)
1625 Info.ptrVal = I.getArgOperand(PtrOp);
1626 else
1627 Info.fallbackAddressSpace =
1628 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1629 Type *MemTy;
1630 if (IsStore) {
1631 // Store value is the first operand.
1632 MemTy = I.getArgOperand(0)->getType();
1633 } else {
1634 // Use return type. If it's segment load, return type is a struct.
1635 MemTy = I.getType();
1636 if (MemTy->isStructTy())
1637 MemTy = MemTy->getStructElementType(0);
1638 }
1639 if (!IsUnitStrided)
1640 MemTy = MemTy->getScalarType();
1641
1642 Info.memVT = getValueType(DL, MemTy);
1643 if (MemTy->isTargetExtTy()) {
1644 // RISC-V vector tuple type's alignment type should be its element type.
1645 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1646 MemTy = Type::getIntNTy(
1647 MemTy->getContext(),
1648 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1649 ->getZExtValue());
1650 Info.align = DL.getABITypeAlign(MemTy);
1651 } else {
1652 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1653 }
1655 Info.flags |=
1657 return true;
1658 };
1659
1660 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1662
1664 switch (Intrinsic) {
1665 default:
1666 return false;
1667 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1668 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1669 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1670 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1671 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1672 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1675 case Intrinsic::riscv_masked_cmpxchg_i32:
1677 Info.memVT = MVT::i32;
1678 Info.ptrVal = I.getArgOperand(0);
1679 Info.offset = 0;
1680 Info.align = Align(4);
1683 return true;
1684 case Intrinsic::riscv_seg2_load:
1685 case Intrinsic::riscv_seg3_load:
1686 case Intrinsic::riscv_seg4_load:
1687 case Intrinsic::riscv_seg5_load:
1688 case Intrinsic::riscv_seg6_load:
1689 case Intrinsic::riscv_seg7_load:
1690 case Intrinsic::riscv_seg8_load:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1692 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1693 case Intrinsic::riscv_seg2_store:
1694 case Intrinsic::riscv_seg3_store:
1695 case Intrinsic::riscv_seg4_store:
1696 case Intrinsic::riscv_seg5_store:
1697 case Intrinsic::riscv_seg6_store:
1698 case Intrinsic::riscv_seg7_store:
1699 case Intrinsic::riscv_seg8_store:
1700 // Operands are (vec, ..., vec, ptr, vl)
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1704 case Intrinsic::riscv_vle:
1705 case Intrinsic::riscv_vle_mask:
1706 case Intrinsic::riscv_vleff:
1707 case Intrinsic::riscv_vleff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ true,
1711 /*UsePtrVal*/ true);
1712 case Intrinsic::riscv_vse:
1713 case Intrinsic::riscv_vse_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ true,
1717 /*UsePtrVal*/ true);
1718 case Intrinsic::riscv_vlse:
1719 case Intrinsic::riscv_vlse_mask:
1720 case Intrinsic::riscv_vloxei:
1721 case Intrinsic::riscv_vloxei_mask:
1722 case Intrinsic::riscv_vluxei:
1723 case Intrinsic::riscv_vluxei_mask:
1724 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1725 /*IsStore*/ false,
1726 /*IsUnitStrided*/ false);
1727 case Intrinsic::riscv_vsse:
1728 case Intrinsic::riscv_vsse_mask:
1729 case Intrinsic::riscv_vsoxei:
1730 case Intrinsic::riscv_vsoxei_mask:
1731 case Intrinsic::riscv_vsuxei:
1732 case Intrinsic::riscv_vsuxei_mask:
1733 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1734 /*IsStore*/ true,
1735 /*IsUnitStrided*/ false);
1736 case Intrinsic::riscv_vlseg2:
1737 case Intrinsic::riscv_vlseg3:
1738 case Intrinsic::riscv_vlseg4:
1739 case Intrinsic::riscv_vlseg5:
1740 case Intrinsic::riscv_vlseg6:
1741 case Intrinsic::riscv_vlseg7:
1742 case Intrinsic::riscv_vlseg8:
1743 case Intrinsic::riscv_vlseg2ff:
1744 case Intrinsic::riscv_vlseg3ff:
1745 case Intrinsic::riscv_vlseg4ff:
1746 case Intrinsic::riscv_vlseg5ff:
1747 case Intrinsic::riscv_vlseg6ff:
1748 case Intrinsic::riscv_vlseg7ff:
1749 case Intrinsic::riscv_vlseg8ff:
1750 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1751 /*IsStore*/ false,
1752 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1753 case Intrinsic::riscv_vlseg2_mask:
1754 case Intrinsic::riscv_vlseg3_mask:
1755 case Intrinsic::riscv_vlseg4_mask:
1756 case Intrinsic::riscv_vlseg5_mask:
1757 case Intrinsic::riscv_vlseg6_mask:
1758 case Intrinsic::riscv_vlseg7_mask:
1759 case Intrinsic::riscv_vlseg8_mask:
1760 case Intrinsic::riscv_vlseg2ff_mask:
1761 case Intrinsic::riscv_vlseg3ff_mask:
1762 case Intrinsic::riscv_vlseg4ff_mask:
1763 case Intrinsic::riscv_vlseg5ff_mask:
1764 case Intrinsic::riscv_vlseg6ff_mask:
1765 case Intrinsic::riscv_vlseg7ff_mask:
1766 case Intrinsic::riscv_vlseg8ff_mask:
1767 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1768 /*IsStore*/ false,
1769 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1770 case Intrinsic::riscv_vlsseg2:
1771 case Intrinsic::riscv_vlsseg3:
1772 case Intrinsic::riscv_vlsseg4:
1773 case Intrinsic::riscv_vlsseg5:
1774 case Intrinsic::riscv_vlsseg6:
1775 case Intrinsic::riscv_vlsseg7:
1776 case Intrinsic::riscv_vlsseg8:
1777 case Intrinsic::riscv_vloxseg2:
1778 case Intrinsic::riscv_vloxseg3:
1779 case Intrinsic::riscv_vloxseg4:
1780 case Intrinsic::riscv_vloxseg5:
1781 case Intrinsic::riscv_vloxseg6:
1782 case Intrinsic::riscv_vloxseg7:
1783 case Intrinsic::riscv_vloxseg8:
1784 case Intrinsic::riscv_vluxseg2:
1785 case Intrinsic::riscv_vluxseg3:
1786 case Intrinsic::riscv_vluxseg4:
1787 case Intrinsic::riscv_vluxseg5:
1788 case Intrinsic::riscv_vluxseg6:
1789 case Intrinsic::riscv_vluxseg7:
1790 case Intrinsic::riscv_vluxseg8:
1791 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1792 /*IsStore*/ false,
1793 /*IsUnitStrided*/ false);
1794 case Intrinsic::riscv_vlsseg2_mask:
1795 case Intrinsic::riscv_vlsseg3_mask:
1796 case Intrinsic::riscv_vlsseg4_mask:
1797 case Intrinsic::riscv_vlsseg5_mask:
1798 case Intrinsic::riscv_vlsseg6_mask:
1799 case Intrinsic::riscv_vlsseg7_mask:
1800 case Intrinsic::riscv_vlsseg8_mask:
1801 case Intrinsic::riscv_vloxseg2_mask:
1802 case Intrinsic::riscv_vloxseg3_mask:
1803 case Intrinsic::riscv_vloxseg4_mask:
1804 case Intrinsic::riscv_vloxseg5_mask:
1805 case Intrinsic::riscv_vloxseg6_mask:
1806 case Intrinsic::riscv_vloxseg7_mask:
1807 case Intrinsic::riscv_vloxseg8_mask:
1808 case Intrinsic::riscv_vluxseg2_mask:
1809 case Intrinsic::riscv_vluxseg3_mask:
1810 case Intrinsic::riscv_vluxseg4_mask:
1811 case Intrinsic::riscv_vluxseg5_mask:
1812 case Intrinsic::riscv_vluxseg6_mask:
1813 case Intrinsic::riscv_vluxseg7_mask:
1814 case Intrinsic::riscv_vluxseg8_mask:
1815 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1816 /*IsStore*/ false,
1817 /*IsUnitStrided*/ false);
1818 case Intrinsic::riscv_vsseg2:
1819 case Intrinsic::riscv_vsseg3:
1820 case Intrinsic::riscv_vsseg4:
1821 case Intrinsic::riscv_vsseg5:
1822 case Intrinsic::riscv_vsseg6:
1823 case Intrinsic::riscv_vsseg7:
1824 case Intrinsic::riscv_vsseg8:
1825 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1826 /*IsStore*/ true,
1827 /*IsUnitStrided*/ false);
1828 case Intrinsic::riscv_vsseg2_mask:
1829 case Intrinsic::riscv_vsseg3_mask:
1830 case Intrinsic::riscv_vsseg4_mask:
1831 case Intrinsic::riscv_vsseg5_mask:
1832 case Intrinsic::riscv_vsseg6_mask:
1833 case Intrinsic::riscv_vsseg7_mask:
1834 case Intrinsic::riscv_vsseg8_mask:
1835 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1836 /*IsStore*/ true,
1837 /*IsUnitStrided*/ false);
1838 case Intrinsic::riscv_vssseg2:
1839 case Intrinsic::riscv_vssseg3:
1840 case Intrinsic::riscv_vssseg4:
1841 case Intrinsic::riscv_vssseg5:
1842 case Intrinsic::riscv_vssseg6:
1843 case Intrinsic::riscv_vssseg7:
1844 case Intrinsic::riscv_vssseg8:
1845 case Intrinsic::riscv_vsoxseg2:
1846 case Intrinsic::riscv_vsoxseg3:
1847 case Intrinsic::riscv_vsoxseg4:
1848 case Intrinsic::riscv_vsoxseg5:
1849 case Intrinsic::riscv_vsoxseg6:
1850 case Intrinsic::riscv_vsoxseg7:
1851 case Intrinsic::riscv_vsoxseg8:
1852 case Intrinsic::riscv_vsuxseg2:
1853 case Intrinsic::riscv_vsuxseg3:
1854 case Intrinsic::riscv_vsuxseg4:
1855 case Intrinsic::riscv_vsuxseg5:
1856 case Intrinsic::riscv_vsuxseg6:
1857 case Intrinsic::riscv_vsuxseg7:
1858 case Intrinsic::riscv_vsuxseg8:
1859 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1860 /*IsStore*/ true,
1861 /*IsUnitStrided*/ false);
1862 case Intrinsic::riscv_vssseg2_mask:
1863 case Intrinsic::riscv_vssseg3_mask:
1864 case Intrinsic::riscv_vssseg4_mask:
1865 case Intrinsic::riscv_vssseg5_mask:
1866 case Intrinsic::riscv_vssseg6_mask:
1867 case Intrinsic::riscv_vssseg7_mask:
1868 case Intrinsic::riscv_vssseg8_mask:
1869 case Intrinsic::riscv_vsoxseg2_mask:
1870 case Intrinsic::riscv_vsoxseg3_mask:
1871 case Intrinsic::riscv_vsoxseg4_mask:
1872 case Intrinsic::riscv_vsoxseg5_mask:
1873 case Intrinsic::riscv_vsoxseg6_mask:
1874 case Intrinsic::riscv_vsoxseg7_mask:
1875 case Intrinsic::riscv_vsoxseg8_mask:
1876 case Intrinsic::riscv_vsuxseg2_mask:
1877 case Intrinsic::riscv_vsuxseg3_mask:
1878 case Intrinsic::riscv_vsuxseg4_mask:
1879 case Intrinsic::riscv_vsuxseg5_mask:
1880 case Intrinsic::riscv_vsuxseg6_mask:
1881 case Intrinsic::riscv_vsuxseg7_mask:
1882 case Intrinsic::riscv_vsuxseg8_mask:
1883 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1884 /*IsStore*/ true,
1885 /*IsUnitStrided*/ false);
1886 }
1887}
1888
1890 const AddrMode &AM, Type *Ty,
1891 unsigned AS,
1892 Instruction *I) const {
1893 // No global is ever allowed as a base.
1894 if (AM.BaseGV)
1895 return false;
1896
1897 // None of our addressing modes allows a scalable offset
1898 if (AM.ScalableOffset)
1899 return false;
1900
1901 // RVV instructions only support register addressing.
1902 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1903 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1904
1905 // Require a 12-bit signed offset.
1906 if (!isInt<12>(AM.BaseOffs))
1907 return false;
1908
1909 switch (AM.Scale) {
1910 case 0: // "r+i" or just "i", depending on HasBaseReg.
1911 break;
1912 case 1:
1913 if (!AM.HasBaseReg) // allow "r+i".
1914 break;
1915 return false; // disallow "r+r" or "r+r+i".
1916 default:
1917 return false;
1918 }
1919
1920 return true;
1921}
1922
1924 return isInt<12>(Imm);
1925}
1926
1928 return isInt<12>(Imm);
1929}
1930
1931// On RV32, 64-bit integers are split into their high and low parts and held
1932// in two different registers, so the trunc is free since the low register can
1933// just be used.
1934// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1935// isTruncateFree?
1937 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1938 return false;
1939 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1940 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1941 return (SrcBits == 64 && DestBits == 32);
1942}
1943
1945 // We consider i64->i32 free on RV64 since we have good selection of W
1946 // instructions that make promoting operations back to i64 free in many cases.
1947 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1948 !DstVT.isInteger())
1949 return false;
1950 unsigned SrcBits = SrcVT.getSizeInBits();
1951 unsigned DestBits = DstVT.getSizeInBits();
1952 return (SrcBits == 64 && DestBits == 32);
1953}
1954
1956 EVT SrcVT = Val.getValueType();
1957 // free truncate from vnsrl and vnsra
1958 if (Subtarget.hasVInstructions() &&
1959 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1960 SrcVT.isVector() && VT2.isVector()) {
1961 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1962 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1963 if (SrcBits == DestBits * 2) {
1964 return true;
1965 }
1966 }
1967 return TargetLowering::isTruncateFree(Val, VT2);
1968}
1969
1971 // Zexts are free if they can be combined with a load.
1972 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1973 // poorly with type legalization of compares preferring sext.
1974 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1975 EVT MemVT = LD->getMemoryVT();
1976 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1977 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1978 LD->getExtensionType() == ISD::ZEXTLOAD))
1979 return true;
1980 }
1981
1982 return TargetLowering::isZExtFree(Val, VT2);
1983}
1984
1986 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1987}
1988
1990 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1991}
1992
1994 return Subtarget.hasStdExtZbb() ||
1995 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1996}
1997
1999 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 const Instruction &AndI) const {
2005 // We expect to be able to match a bit extraction instruction if the Zbs
2006 // extension is supported and the mask is a power of two. However, we
2007 // conservatively return false if the mask would fit in an ANDI instruction,
2008 // on the basis that it's possible the sinking+duplication of the AND in
2009 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2010 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2011 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2012 return false;
2013 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2014 if (!Mask)
2015 return false;
2016 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2017}
2018
2020 EVT VT = Y.getValueType();
2021
2022 // FIXME: Support vectors once we have tests.
2023 if (VT.isVector())
2024 return false;
2025
2026 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2027 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2028}
2029
2031 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2032 if (Subtarget.hasStdExtZbs())
2033 return X.getValueType().isScalarInteger();
2034 auto *C = dyn_cast<ConstantSDNode>(Y);
2035 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2036 if (Subtarget.hasVendorXTHeadBs())
2037 return C != nullptr;
2038 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2039 return C && C->getAPIntValue().ule(10);
2040}
2041
2043 EVT VT) const {
2044 // Only enable for rvv.
2045 if (!VT.isVector() || !Subtarget.hasVInstructions())
2046 return false;
2047
2048 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2049 return false;
2050
2051 return true;
2052}
2053
2055 Type *Ty) const {
2056 assert(Ty->isIntegerTy());
2057
2058 unsigned BitSize = Ty->getIntegerBitWidth();
2059 if (BitSize > Subtarget.getXLen())
2060 return false;
2061
2062 // Fast path, assume 32-bit immediates are cheap.
2063 int64_t Val = Imm.getSExtValue();
2064 if (isInt<32>(Val))
2065 return true;
2066
2067 // A constant pool entry may be more aligned thant he load we're trying to
2068 // replace. If we don't support unaligned scalar mem, prefer the constant
2069 // pool.
2070 // TODO: Can the caller pass down the alignment?
2071 if (!Subtarget.enableUnalignedScalarMem())
2072 return true;
2073
2074 // Prefer to keep the load if it would require many instructions.
2075 // This uses the same threshold we use for constant pools but doesn't
2076 // check useConstantPoolForLargeInts.
2077 // TODO: Should we keep the load only when we're definitely going to emit a
2078 // constant pool?
2079
2081 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2082}
2083
2087 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2088 SelectionDAG &DAG) const {
2089 // One interesting pattern that we'd want to form is 'bit extract':
2090 // ((1 >> Y) & 1) ==/!= 0
2091 // But we also need to be careful not to try to reverse that fold.
2092
2093 // Is this '((1 >> Y) & 1)'?
2094 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2095 return false; // Keep the 'bit extract' pattern.
2096
2097 // Will this be '((1 >> Y) & 1)' after the transform?
2098 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2099 return true; // Do form the 'bit extract' pattern.
2100
2101 // If 'X' is a constant, and we transform, then we will immediately
2102 // try to undo the fold, thus causing endless combine loop.
2103 // So only do the transform if X is not a constant. This matches the default
2104 // implementation of this function.
2105 return !XC;
2106}
2107
2109 unsigned Opc = VecOp.getOpcode();
2110
2111 // Assume target opcodes can't be scalarized.
2112 // TODO - do we have any exceptions?
2113 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2114 return false;
2115
2116 // If the vector op is not supported, try to convert to scalar.
2117 EVT VecVT = VecOp.getValueType();
2118 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2119 return true;
2120
2121 // If the vector op is supported, but the scalar op is not, the transform may
2122 // not be worthwhile.
2123 // Permit a vector binary operation can be converted to scalar binary
2124 // operation which is custom lowered with illegal type.
2125 EVT ScalarVT = VecVT.getScalarType();
2126 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2127 isOperationCustom(Opc, ScalarVT);
2128}
2129
2131 const GlobalAddressSDNode *GA) const {
2132 // In order to maximise the opportunity for common subexpression elimination,
2133 // keep a separate ADD node for the global address offset instead of folding
2134 // it in the global address node. Later peephole optimisations may choose to
2135 // fold it back in when profitable.
2136 return false;
2137}
2138
2139// Returns 0-31 if the fli instruction is available for the type and this is
2140// legal FP immediate for the type. Returns -1 otherwise.
2142 if (!Subtarget.hasStdExtZfa())
2143 return -1;
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return -1;
2157
2158 return RISCVLoadFPImm::getLoadFPImm(Imm);
2159}
2160
2162 bool ForCodeSize) const {
2163 bool IsLegalVT = false;
2164 if (VT == MVT::f16)
2165 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2166 else if (VT == MVT::f32)
2167 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2168 else if (VT == MVT::f64)
2169 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2170 else if (VT == MVT::bf16)
2171 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2172
2173 if (!IsLegalVT)
2174 return false;
2175
2176 if (getLegalZfaFPImm(Imm, VT) >= 0)
2177 return true;
2178
2179 // Cannot create a 64 bit floating-point immediate value for rv32.
2180 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2181 // td can handle +0.0 or -0.0 already.
2182 // -0.0 can be created by fmv + fneg.
2183 return Imm.isZero();
2184 }
2185
2186 // Special case: fmv + fneg
2187 if (Imm.isNegZero())
2188 return true;
2189
2190 // Building an integer and then converting requires a fmv at the end of
2191 // the integer sequence. The fmv is not required for Zfinx.
2192 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2193 const int Cost =
2194 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2195 Subtarget.getXLen(), Subtarget);
2196 return Cost <= FPImmCost;
2197}
2198
2199// TODO: This is very conservative.
2201 unsigned Index) const {
2203 return false;
2204
2205 // Only support extracting a fixed from a fixed vector for now.
2206 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2207 return false;
2208
2209 EVT EltVT = ResVT.getVectorElementType();
2210 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2211
2212 // The smallest type we can slide is i8.
2213 // TODO: We can extract index 0 from a mask vector without a slide.
2214 if (EltVT == MVT::i1)
2215 return false;
2216
2217 unsigned ResElts = ResVT.getVectorNumElements();
2218 unsigned SrcElts = SrcVT.getVectorNumElements();
2219
2220 unsigned MinVLen = Subtarget.getRealMinVLen();
2221 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2222
2223 // If we're extracting only data from the first VLEN bits of the source
2224 // then we can always do this with an m1 vslidedown.vx. Restricting the
2225 // Index ensures we can use a vslidedown.vi.
2226 // TODO: We can generalize this when the exact VLEN is known.
2227 if (Index + ResElts <= MinVLMAX && Index < 31)
2228 return true;
2229
2230 // Convervatively only handle extracting half of a vector.
2231 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2232 // the upper half of a vector until we have more test coverage.
2233 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2234 // a cheap extract. However, this case is important in practice for
2235 // shuffled extracts of longer vectors. How resolve?
2236 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2237}
2238
2241 EVT VT) const {
2242 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2243 // We might still end up using a GPR but that will be decided based on ABI.
2244 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2245 !Subtarget.hasStdExtZfhminOrZhinxmin())
2246 return MVT::f32;
2247
2249
2250 return PartVT;
2251}
2252
2253unsigned
2255 std::optional<MVT> RegisterVT) const {
2256 // Pair inline assembly operand
2257 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2258 *RegisterVT == MVT::Untyped)
2259 return 1;
2260
2261 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2262}
2263
2266 EVT VT) const {
2267 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2268 // We might still end up using a GPR but that will be decided based on ABI.
2269 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2270 !Subtarget.hasStdExtZfhminOrZhinxmin())
2271 return 1;
2272
2274}
2275
2277 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2278 unsigned &NumIntermediates, MVT &RegisterVT) const {
2280 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2281
2282 return NumRegs;
2283}
2284
2285// Changes the condition code and swaps operands if necessary, so the SetCC
2286// operation matches one of the comparisons supported directly by branches
2287// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2288// with 1/-1.
2289static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2290 ISD::CondCode &CC, SelectionDAG &DAG) {
2291 // If this is a single bit test that can't be handled by ANDI, shift the
2292 // bit to be tested to the MSB and perform a signed compare with 0.
2293 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2294 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2295 isa<ConstantSDNode>(LHS.getOperand(1))) {
2296 uint64_t Mask = LHS.getConstantOperandVal(1);
2297 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2298 unsigned ShAmt = 0;
2299 if (isPowerOf2_64(Mask)) {
2301 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2302 } else {
2303 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2304 }
2305
2306 LHS = LHS.getOperand(0);
2307 if (ShAmt != 0)
2308 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2309 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2310 return;
2311 }
2312 }
2313
2314 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2315 int64_t C = RHSC->getSExtValue();
2316 switch (CC) {
2317 default: break;
2318 case ISD::SETGT:
2319 // Convert X > -1 to X >= 0.
2320 if (C == -1) {
2321 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2322 CC = ISD::SETGE;
2323 return;
2324 }
2325 break;
2326 case ISD::SETLT:
2327 // Convert X < 1 to 0 >= X.
2328 if (C == 1) {
2329 RHS = LHS;
2330 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2331 CC = ISD::SETGE;
2332 return;
2333 }
2334 break;
2335 }
2336 }
2337
2338 switch (CC) {
2339 default:
2340 break;
2341 case ISD::SETGT:
2342 case ISD::SETLE:
2343 case ISD::SETUGT:
2344 case ISD::SETULE:
2346 std::swap(LHS, RHS);
2347 break;
2348 }
2349}
2350
2352 if (VT.isRISCVVectorTuple()) {
2353 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2354 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2355 return RISCVII::LMUL_F8;
2356 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2357 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2358 return RISCVII::LMUL_F4;
2359 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2360 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2361 return RISCVII::LMUL_F2;
2362 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2363 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2364 return RISCVII::LMUL_1;
2365 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2366 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2367 return RISCVII::LMUL_2;
2368 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2369 return RISCVII::LMUL_4;
2370 llvm_unreachable("Invalid vector tuple type LMUL.");
2371 }
2372
2373 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2374 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2375 if (VT.getVectorElementType() == MVT::i1)
2376 KnownSize *= 8;
2377
2378 switch (KnownSize) {
2379 default:
2380 llvm_unreachable("Invalid LMUL.");
2381 case 8:
2383 case 16:
2385 case 32:
2387 case 64:
2389 case 128:
2391 case 256:
2393 case 512:
2395 }
2396}
2397
2399 switch (LMul) {
2400 default:
2401 llvm_unreachable("Invalid LMUL.");
2406 return RISCV::VRRegClassID;
2408 return RISCV::VRM2RegClassID;
2410 return RISCV::VRM4RegClassID;
2412 return RISCV::VRM8RegClassID;
2413 }
2414}
2415
2416unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2417 RISCVII::VLMUL LMUL = getLMUL(VT);
2418 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2419 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2420 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2421 LMUL == RISCVII::VLMUL::LMUL_1) {
2422 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm1_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2427 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm2_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2432 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm4_0 + Index;
2435 }
2436 llvm_unreachable("Invalid vector type.");
2437}
2438
2440 if (VT.isRISCVVectorTuple()) {
2441 unsigned NF = VT.getRISCVVectorTupleNumFields();
2442 unsigned RegsPerField =
2443 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2444 (NF * RISCV::RVVBitsPerBlock));
2445 switch (RegsPerField) {
2446 case 1:
2447 if (NF == 2)
2448 return RISCV::VRN2M1RegClassID;
2449 if (NF == 3)
2450 return RISCV::VRN3M1RegClassID;
2451 if (NF == 4)
2452 return RISCV::VRN4M1RegClassID;
2453 if (NF == 5)
2454 return RISCV::VRN5M1RegClassID;
2455 if (NF == 6)
2456 return RISCV::VRN6M1RegClassID;
2457 if (NF == 7)
2458 return RISCV::VRN7M1RegClassID;
2459 if (NF == 8)
2460 return RISCV::VRN8M1RegClassID;
2461 break;
2462 case 2:
2463 if (NF == 2)
2464 return RISCV::VRN2M2RegClassID;
2465 if (NF == 3)
2466 return RISCV::VRN3M2RegClassID;
2467 if (NF == 4)
2468 return RISCV::VRN4M2RegClassID;
2469 break;
2470 case 4:
2471 assert(NF == 2);
2472 return RISCV::VRN2M4RegClassID;
2473 default:
2474 break;
2475 }
2476 llvm_unreachable("Invalid vector tuple type RegClass.");
2477 }
2478
2479 if (VT.getVectorElementType() == MVT::i1)
2480 return RISCV::VRRegClassID;
2481 return getRegClassIDForLMUL(getLMUL(VT));
2482}
2483
2484// Attempt to decompose a subvector insert/extract between VecVT and
2485// SubVecVT via subregister indices. Returns the subregister index that
2486// can perform the subvector insert/extract with the given element index, as
2487// well as the index corresponding to any leftover subvectors that must be
2488// further inserted/extracted within the register class for SubVecVT.
2489std::pair<unsigned, unsigned>
2491 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2492 const RISCVRegisterInfo *TRI) {
2493 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2494 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2495 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2496 "Register classes not ordered");
2497 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2498 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2499
2500 // If VecVT is a vector tuple type, either it's the tuple type with same
2501 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2502 if (VecVT.isRISCVVectorTuple()) {
2503 if (VecRegClassID == SubRegClassID)
2504 return {RISCV::NoSubRegister, 0};
2505
2506 assert(SubVecVT.isScalableVector() &&
2507 "Only allow scalable vector subvector.");
2508 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2509 "Invalid vector tuple insert/extract for vector and subvector with "
2510 "different LMUL.");
2511 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2512 }
2513
2514 // Try to compose a subregister index that takes us from the incoming
2515 // LMUL>1 register class down to the outgoing one. At each step we half
2516 // the LMUL:
2517 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2518 // Note that this is not guaranteed to find a subregister index, such as
2519 // when we are extracting from one VR type to another.
2520 unsigned SubRegIdx = RISCV::NoSubRegister;
2521 for (const unsigned RCID :
2522 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2523 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2524 VecVT = VecVT.getHalfNumVectorElementsVT();
2525 bool IsHi =
2526 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2527 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2528 getSubregIndexByMVT(VecVT, IsHi));
2529 if (IsHi)
2530 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2531 }
2532 return {SubRegIdx, InsertExtractIdx};
2533}
2534
2535// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2536// stores for those types.
2537bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2538 return !Subtarget.useRVVForFixedLengthVectors() ||
2539 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2540}
2541
2543 if (!ScalarTy.isSimple())
2544 return false;
2545 switch (ScalarTy.getSimpleVT().SimpleTy) {
2546 case MVT::iPTR:
2547 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2548 case MVT::i8:
2549 case MVT::i16:
2550 case MVT::i32:
2551 return true;
2552 case MVT::i64:
2553 return Subtarget.hasVInstructionsI64();
2554 case MVT::f16:
2555 return Subtarget.hasVInstructionsF16Minimal();
2556 case MVT::bf16:
2557 return Subtarget.hasVInstructionsBF16Minimal();
2558 case MVT::f32:
2559 return Subtarget.hasVInstructionsF32();
2560 case MVT::f64:
2561 return Subtarget.hasVInstructionsF64();
2562 default:
2563 return false;
2564 }
2565}
2566
2567
2568unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2569 return NumRepeatedDivisors;
2570}
2571
2573 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2574 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2575 "Unexpected opcode");
2576 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2577 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2579 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2580 if (!II)
2581 return SDValue();
2582 return Op.getOperand(II->VLOperand + 1 + HasChain);
2583}
2584
2586 const RISCVSubtarget &Subtarget) {
2587 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2588 if (!Subtarget.useRVVForFixedLengthVectors())
2589 return false;
2590
2591 // We only support a set of vector types with a consistent maximum fixed size
2592 // across all supported vector element types to avoid legalization issues.
2593 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2594 // fixed-length vector type we support is 1024 bytes.
2595 if (VT.getFixedSizeInBits() > 1024 * 8)
2596 return false;
2597
2598 unsigned MinVLen = Subtarget.getRealMinVLen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601
2602 // Don't use RVV for vectors we cannot scalarize if required.
2603 switch (EltVT.SimpleTy) {
2604 // i1 is supported but has different rules.
2605 default:
2606 return false;
2607 case MVT::i1:
2608 // Masks can only use a single register.
2609 if (VT.getVectorNumElements() > MinVLen)
2610 return false;
2611 MinVLen /= 8;
2612 break;
2613 case MVT::i8:
2614 case MVT::i16:
2615 case MVT::i32:
2616 break;
2617 case MVT::i64:
2618 if (!Subtarget.hasVInstructionsI64())
2619 return false;
2620 break;
2621 case MVT::f16:
2622 if (!Subtarget.hasVInstructionsF16Minimal())
2623 return false;
2624 break;
2625 case MVT::bf16:
2626 if (!Subtarget.hasVInstructionsBF16Minimal())
2627 return false;
2628 break;
2629 case MVT::f32:
2630 if (!Subtarget.hasVInstructionsF32())
2631 return false;
2632 break;
2633 case MVT::f64:
2634 if (!Subtarget.hasVInstructionsF64())
2635 return false;
2636 break;
2637 }
2638
2639 // Reject elements larger than ELEN.
2640 if (EltVT.getSizeInBits() > Subtarget.getELen())
2641 return false;
2642
2643 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2644 // Don't use RVV for types that don't fit.
2645 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2646 return false;
2647
2648 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2649 // the base fixed length RVV support in place.
2650 if (!VT.isPow2VectorType())
2651 return false;
2652
2653 return true;
2654}
2655
2656bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2657 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2658}
2659
2660// Return the largest legal scalable vector type that matches VT's element type.
2662 const RISCVSubtarget &Subtarget) {
2663 // This may be called before legal types are setup.
2664 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2665 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2666 "Expected legal fixed length vector!");
2667
2668 unsigned MinVLen = Subtarget.getRealMinVLen();
2669 unsigned MaxELen = Subtarget.getELen();
2670
2671 MVT EltVT = VT.getVectorElementType();
2672 switch (EltVT.SimpleTy) {
2673 default:
2674 llvm_unreachable("unexpected element type for RVV container");
2675 case MVT::i1:
2676 case MVT::i8:
2677 case MVT::i16:
2678 case MVT::i32:
2679 case MVT::i64:
2680 case MVT::bf16:
2681 case MVT::f16:
2682 case MVT::f32:
2683 case MVT::f64: {
2684 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2685 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2686 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2687 unsigned NumElts =
2689 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2690 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2691 return MVT::getScalableVectorVT(EltVT, NumElts);
2692 }
2693 }
2694}
2695
2697 const RISCVSubtarget &Subtarget) {
2699 Subtarget);
2700}
2701
2703 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2704}
2705
2706// Grow V to consume an entire RVV register.
2708 const RISCVSubtarget &Subtarget) {
2709 assert(VT.isScalableVector() &&
2710 "Expected to convert into a scalable vector!");
2711 assert(V.getValueType().isFixedLengthVector() &&
2712 "Expected a fixed length vector operand!");
2713 SDLoc DL(V);
2714 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2715 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2716}
2717
2718// Shrink V so it's just big enough to maintain a VT's worth of data.
2720 const RISCVSubtarget &Subtarget) {
2722 "Expected to convert into a fixed length vector!");
2723 assert(V.getValueType().isScalableVector() &&
2724 "Expected a scalable vector operand!");
2725 SDLoc DL(V);
2726 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2727 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2728}
2729
2730/// Return the type of the mask type suitable for masking the provided
2731/// vector type. This is simply an i1 element type vector of the same
2732/// (possibly scalable) length.
2733static MVT getMaskTypeFor(MVT VecVT) {
2734 assert(VecVT.isVector());
2736 return MVT::getVectorVT(MVT::i1, EC);
2737}
2738
2739/// Creates an all ones mask suitable for masking a vector of type VecTy with
2740/// vector length VL. .
2741static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2742 SelectionDAG &DAG) {
2743 MVT MaskVT = getMaskTypeFor(VecVT);
2744 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2745}
2746
2747static std::pair<SDValue, SDValue>
2749 const RISCVSubtarget &Subtarget) {
2750 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2751 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2752 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2753 return {Mask, VL};
2754}
2755
2756static std::pair<SDValue, SDValue>
2757getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2758 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2761 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2762 return {Mask, VL};
2763}
2764
2765// Gets the two common "VL" operands: an all-ones mask and the vector length.
2766// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2767// the vector type that the fixed-length vector is contained in. Otherwise if
2768// VecVT is scalable, then ContainerVT should be the same as VecVT.
2769static std::pair<SDValue, SDValue>
2770getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2771 const RISCVSubtarget &Subtarget) {
2772 if (VecVT.isFixedLengthVector())
2773 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2774 Subtarget);
2775 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2776 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2777}
2778
2780 SelectionDAG &DAG) const {
2781 assert(VecVT.isScalableVector() && "Expected scalable vector");
2782 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2783 VecVT.getVectorElementCount());
2784}
2785
2786std::pair<unsigned, unsigned>
2788 const RISCVSubtarget &Subtarget) {
2789 assert(VecVT.isScalableVector() && "Expected scalable vector");
2790
2791 unsigned EltSize = VecVT.getScalarSizeInBits();
2792 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2793
2794 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2795 unsigned MaxVLMAX =
2796 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2797
2798 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2799 unsigned MinVLMAX =
2800 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2801
2802 return std::make_pair(MinVLMAX, MaxVLMAX);
2803}
2804
2805// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2806// of either is (currently) supported. This can get us into an infinite loop
2807// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2808// as a ..., etc.
2809// Until either (or both) of these can reliably lower any node, reporting that
2810// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2811// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2812// which is not desirable.
2814 EVT VT, unsigned DefinedValues) const {
2815 return false;
2816}
2817
2819 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2820 // implementation-defined.
2821 if (!VT.isVector())
2823 unsigned DLenFactor = Subtarget.getDLenFactor();
2824 unsigned Cost;
2825 if (VT.isScalableVector()) {
2826 unsigned LMul;
2827 bool Fractional;
2828 std::tie(LMul, Fractional) =
2830 if (Fractional)
2831 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2832 else
2833 Cost = (LMul * DLenFactor);
2834 } else {
2835 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2836 }
2837 return Cost;
2838}
2839
2840
2841/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2842/// is generally quadratic in the number of vreg implied by LMUL. Note that
2843/// operand (index and possibly mask) are handled separately.
2845 return getLMULCost(VT) * getLMULCost(VT);
2846}
2847
2848/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2849/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2855/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2856/// for the type VT. (This does not cover the vslide1up or vslide1down
2857/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2860 return getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2868 return getLMULCost(VT);
2869}
2870
2872 const RISCVSubtarget &Subtarget) {
2873 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2874 // bf16 conversions are always promoted to f32.
2875 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2876 Op.getValueType() == MVT::bf16) {
2877 bool IsStrict = Op->isStrictFPOpcode();
2878
2879 SDLoc DL(Op);
2880 if (IsStrict) {
2881 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2882 {Op.getOperand(0), Op.getOperand(1)});
2883 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2884 {Op.getValueType(), MVT::Other},
2885 {Val.getValue(1), Val.getValue(0),
2886 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2887 }
2888 return DAG.getNode(
2889 ISD::FP_ROUND, DL, Op.getValueType(),
2890 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2892 }
2893
2894 // Other operations are legal.
2895 return Op;
2896}
2897
2899 const RISCVSubtarget &Subtarget) {
2900 // RISC-V FP-to-int conversions saturate to the destination register size, but
2901 // don't produce 0 for nan. We can use a conversion instruction and fix the
2902 // nan case with a compare and a select.
2903 SDValue Src = Op.getOperand(0);
2904
2905 MVT DstVT = Op.getSimpleValueType();
2906 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2907
2908 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2909
2910 if (!DstVT.isVector()) {
2911 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2912 // the result.
2913 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2914 Src.getValueType() == MVT::bf16) {
2915 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2916 }
2917
2918 unsigned Opc;
2919 if (SatVT == DstVT)
2920 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2921 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2923 else
2924 return SDValue();
2925 // FIXME: Support other SatVTs by clamping before or after the conversion.
2926
2927 SDLoc DL(Op);
2928 SDValue FpToInt = DAG.getNode(
2929 Opc, DL, DstVT, Src,
2931
2932 if (Opc == RISCVISD::FCVT_WU_RV64)
2933 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2934
2935 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2936 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2938 }
2939
2940 // Vectors.
2941
2942 MVT DstEltVT = DstVT.getVectorElementType();
2943 MVT SrcVT = Src.getSimpleValueType();
2944 MVT SrcEltVT = SrcVT.getVectorElementType();
2945 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2946 unsigned DstEltSize = DstEltVT.getSizeInBits();
2947
2948 // Only handle saturating to the destination type.
2949 if (SatVT != DstEltVT)
2950 return SDValue();
2951
2952 MVT DstContainerVT = DstVT;
2953 MVT SrcContainerVT = SrcVT;
2954 if (DstVT.isFixedLengthVector()) {
2955 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2956 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2957 assert(DstContainerVT.getVectorElementCount() ==
2958 SrcContainerVT.getVectorElementCount() &&
2959 "Expected same element count");
2960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2961 }
2962
2963 SDLoc DL(Op);
2964
2965 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2966
2967 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2968 {Src, Src, DAG.getCondCode(ISD::SETNE),
2969 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2970
2971 // Need to widen by more than 1 step, promote the FP type, then do a widening
2972 // convert.
2973 if (DstEltSize > (2 * SrcEltSize)) {
2974 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2975 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2976 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2977 }
2978
2979 MVT CvtContainerVT = DstContainerVT;
2980 MVT CvtEltVT = DstEltVT;
2981 if (SrcEltSize > (2 * DstEltSize)) {
2982 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2983 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2984 }
2985
2986 unsigned RVVOpc =
2988 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2989
2990 while (CvtContainerVT != DstContainerVT) {
2991 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2992 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2993 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2994 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2996 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2997 }
2998
2999 SDValue SplatZero = DAG.getNode(
3000 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3001 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3002 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3003 Res, DAG.getUNDEF(DstContainerVT), VL);
3004
3005 if (DstVT.isFixedLengthVector())
3006 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3007
3008 return Res;
3009}
3010
3012 const RISCVSubtarget &Subtarget) {
3013 bool IsStrict = Op->isStrictFPOpcode();
3014 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3015
3016 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3017 // bf16 conversions are always promoted to f32.
3018 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3019 SrcVal.getValueType() == MVT::bf16) {
3020 SDLoc DL(Op);
3021 if (IsStrict) {
3022 SDValue Ext =
3023 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3024 {Op.getOperand(0), SrcVal});
3025 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3026 {Ext.getValue(1), Ext.getValue(0)});
3027 }
3028 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3029 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3030 }
3031
3032 // Other operations are legal.
3033 return Op;
3034}
3035
3037 switch (Opc) {
3038 case ISD::FROUNDEVEN:
3040 case ISD::VP_FROUNDEVEN:
3041 return RISCVFPRndMode::RNE;
3042 case ISD::FTRUNC:
3043 case ISD::STRICT_FTRUNC:
3044 case ISD::VP_FROUNDTOZERO:
3045 return RISCVFPRndMode::RTZ;
3046 case ISD::FFLOOR:
3047 case ISD::STRICT_FFLOOR:
3048 case ISD::VP_FFLOOR:
3049 return RISCVFPRndMode::RDN;
3050 case ISD::FCEIL:
3051 case ISD::STRICT_FCEIL:
3052 case ISD::VP_FCEIL:
3053 return RISCVFPRndMode::RUP;
3054 case ISD::FROUND:
3055 case ISD::STRICT_FROUND:
3056 case ISD::VP_FROUND:
3057 return RISCVFPRndMode::RMM;
3058 case ISD::FRINT:
3059 case ISD::VP_FRINT:
3060 return RISCVFPRndMode::DYN;
3061 }
3062
3064}
3065
3066// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3067// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3068// the integer domain and back. Taking care to avoid converting values that are
3069// nan or already correct.
3070static SDValue
3072 const RISCVSubtarget &Subtarget) {
3073 MVT VT = Op.getSimpleValueType();
3074 assert(VT.isVector() && "Unexpected type");
3075
3076 SDLoc DL(Op);
3077
3078 SDValue Src = Op.getOperand(0);
3079
3080 MVT ContainerVT = VT;
3081 if (VT.isFixedLengthVector()) {
3082 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3083 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3084 }
3085
3086 SDValue Mask, VL;
3087 if (Op->isVPOpcode()) {
3088 Mask = Op.getOperand(1);
3089 if (VT.isFixedLengthVector())
3090 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3091 Subtarget);
3092 VL = Op.getOperand(2);
3093 } else {
3094 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3095 }
3096
3097 // Freeze the source since we are increasing the number of uses.
3098 Src = DAG.getFreeze(Src);
3099
3100 // We do the conversion on the absolute value and fix the sign at the end.
3101 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3102
3103 // Determine the largest integer that can be represented exactly. This and
3104 // values larger than it don't have any fractional bits so don't need to
3105 // be converted.
3106 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3107 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3108 APFloat MaxVal = APFloat(FltSem);
3109 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3110 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3111 SDValue MaxValNode =
3112 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3113 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3114 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3115
3116 // If abs(Src) was larger than MaxVal or nan, keep it.
3117 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3118 Mask =
3119 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3120 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3121 Mask, Mask, VL});
3122
3123 // Truncate to integer and convert back to FP.
3124 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3125 MVT XLenVT = Subtarget.getXLenVT();
3126 SDValue Truncated;
3127
3128 switch (Op.getOpcode()) {
3129 default:
3130 llvm_unreachable("Unexpected opcode");
3131 case ISD::FRINT:
3132 case ISD::VP_FRINT:
3133 case ISD::FCEIL:
3134 case ISD::VP_FCEIL:
3135 case ISD::FFLOOR:
3136 case ISD::VP_FFLOOR:
3137 case ISD::FROUND:
3138 case ISD::FROUNDEVEN:
3139 case ISD::VP_FROUND:
3140 case ISD::VP_FROUNDEVEN:
3141 case ISD::VP_FROUNDTOZERO: {
3144 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3145 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3146 break;
3147 }
3148 case ISD::FTRUNC:
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3150 Mask, VL);
3151 break;
3152 case ISD::FNEARBYINT:
3153 case ISD::VP_FNEARBYINT:
3154 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3155 Mask, VL);
3156 break;
3157 }
3158
3159 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3160 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3161 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3162 Mask, VL);
3163
3164 // Restore the original sign so that -0.0 is preserved.
3165 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3166 Src, Src, Mask, VL);
3167
3168 if (!VT.isFixedLengthVector())
3169 return Truncated;
3170
3171 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3172}
3173
3174// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3175// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3176// qNan and coverting the new source to integer and back to FP.
3177static SDValue
3179 const RISCVSubtarget &Subtarget) {
3180 SDLoc DL(Op);
3181 MVT VT = Op.getSimpleValueType();
3182 SDValue Chain = Op.getOperand(0);
3183 SDValue Src = Op.getOperand(1);
3184
3185 MVT ContainerVT = VT;
3186 if (VT.isFixedLengthVector()) {
3187 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3188 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3189 }
3190
3191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3192
3193 // Freeze the source since we are increasing the number of uses.
3194 Src = DAG.getFreeze(Src);
3195
3196 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3197 MVT MaskVT = Mask.getSimpleValueType();
3199 DAG.getVTList(MaskVT, MVT::Other),
3200 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3201 DAG.getUNDEF(MaskVT), Mask, VL});
3202 Chain = Unorder.getValue(1);
3204 DAG.getVTList(ContainerVT, MVT::Other),
3205 {Chain, Src, Src, Src, Unorder, VL});
3206 Chain = Src.getValue(1);
3207
3208 // We do the conversion on the absolute value and fix the sign at the end.
3209 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3210
3211 // Determine the largest integer that can be represented exactly. This and
3212 // values larger than it don't have any fractional bits so don't need to
3213 // be converted.
3214 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3215 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3216 APFloat MaxVal = APFloat(FltSem);
3217 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3218 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3219 SDValue MaxValNode =
3220 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3221 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3222 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3223
3224 // If abs(Src) was larger than MaxVal or nan, keep it.
3225 Mask = DAG.getNode(
3226 RISCVISD::SETCC_VL, DL, MaskVT,
3227 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3228
3229 // Truncate to integer and convert back to FP.
3230 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3231 MVT XLenVT = Subtarget.getXLenVT();
3232 SDValue Truncated;
3233
3234 switch (Op.getOpcode()) {
3235 default:
3236 llvm_unreachable("Unexpected opcode");
3237 case ISD::STRICT_FCEIL:
3238 case ISD::STRICT_FFLOOR:
3239 case ISD::STRICT_FROUND:
3243 Truncated = DAG.getNode(
3244 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3245 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3246 break;
3247 }
3248 case ISD::STRICT_FTRUNC:
3249 Truncated =
3251 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3252 break;
3255 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3256 Mask, VL);
3257 break;
3258 }
3259 Chain = Truncated.getValue(1);
3260
3261 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3262 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3263 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3264 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3265 Truncated, Mask, VL);
3266 Chain = Truncated.getValue(1);
3267 }
3268
3269 // Restore the original sign so that -0.0 is preserved.
3270 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3271 Src, Src, Mask, VL);
3272
3273 if (VT.isFixedLengthVector())
3274 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3275 return DAG.getMergeValues({Truncated, Chain}, DL);
3276}
3277
3278static SDValue
3280 const RISCVSubtarget &Subtarget) {
3281 MVT VT = Op.getSimpleValueType();
3282 if (VT.isVector())
3283 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3284
3285 if (DAG.shouldOptForSize())
3286 return SDValue();
3287
3288 SDLoc DL(Op);
3289 SDValue Src = Op.getOperand(0);
3290
3291 // Create an integer the size of the mantissa with the MSB set. This and all
3292 // values larger than it don't have any fractional bits so don't need to be
3293 // converted.
3294 const fltSemantics &FltSem = VT.getFltSemantics();
3295 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3296 APFloat MaxVal = APFloat(FltSem);
3297 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3298 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3299 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3300
3302 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3303 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3304}
3305
3306// Expand vector LRINT and LLRINT by converting to the integer domain.
3308 const RISCVSubtarget &Subtarget) {
3309 MVT VT = Op.getSimpleValueType();
3310 assert(VT.isVector() && "Unexpected type");
3311
3312 SDLoc DL(Op);
3313 SDValue Src = Op.getOperand(0);
3314 MVT ContainerVT = VT;
3315
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3322 SDValue Truncated = DAG.getNode(
3323 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3325 VL);
3326
3327 if (!VT.isFixedLengthVector())
3328 return Truncated;
3329
3330 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3331}
3332
3333static SDValue
3335 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3336 SDValue Offset, SDValue Mask, SDValue VL,
3338 if (Passthru.isUndef())
3340 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3341 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3342 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3343}
3344
3345static SDValue
3346getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3347 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3348 SDValue VL,
3350 if (Passthru.isUndef())
3352 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3353 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3354 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3355}
3356
3357static MVT getLMUL1VT(MVT VT) {
3359 "Unexpected vector MVT");
3363}
3364
3368 int64_t Addend;
3369};
3370
3371static std::optional<APInt> getExactInteger(const APFloat &APF,
3373 // We will use a SINT_TO_FP to materialize this constant so we should use a
3374 // signed APSInt here.
3375 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3376 // We use an arbitrary rounding mode here. If a floating-point is an exact
3377 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3378 // the rounding mode changes the output value, then it is not an exact
3379 // integer.
3381 bool IsExact;
3382 // If it is out of signed integer range, it will return an invalid operation.
3383 // If it is not an exact integer, IsExact is false.
3384 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3386 !IsExact)
3387 return std::nullopt;
3388 return ValInt.extractBits(BitWidth, 0);
3389}
3390
3391// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3392// to the (non-zero) step S and start value X. This can be then lowered as the
3393// RVV sequence (VID * S) + X, for example.
3394// The step S is represented as an integer numerator divided by a positive
3395// denominator. Note that the implementation currently only identifies
3396// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3397// cannot detect 2/3, for example.
3398// Note that this method will also match potentially unappealing index
3399// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3400// determine whether this is worth generating code for.
3401//
3402// EltSizeInBits is the size of the type that the sequence will be calculated
3403// in, i.e. SEW for build_vectors or XLEN for address calculations.
3404static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3405 unsigned EltSizeInBits) {
3406 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3407 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3408 return std::nullopt;
3409 bool IsInteger = Op.getValueType().isInteger();
3410
3411 std::optional<unsigned> SeqStepDenom;
3412 std::optional<APInt> SeqStepNum;
3413 std::optional<APInt> SeqAddend;
3414 std::optional<std::pair<APInt, unsigned>> PrevElt;
3415 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3416
3417 // First extract the ops into a list of constant integer values. This may not
3418 // be possible for floats if they're not all representable as integers.
3420 const unsigned OpSize = Op.getScalarValueSizeInBits();
3421 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3422 if (Elt.isUndef()) {
3423 Elts[Idx] = std::nullopt;
3424 continue;
3425 }
3426 if (IsInteger) {
3427 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3428 } else {
3429 auto ExactInteger =
3430 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3431 if (!ExactInteger)
3432 return std::nullopt;
3433 Elts[Idx] = *ExactInteger;
3434 }
3435 }
3436
3437 for (auto [Idx, Elt] : enumerate(Elts)) {
3438 // Assume undef elements match the sequence; we just have to be careful
3439 // when interpolating across them.
3440 if (!Elt)
3441 continue;
3442
3443 if (PrevElt) {
3444 // Calculate the step since the last non-undef element, and ensure
3445 // it's consistent across the entire sequence.
3446 unsigned IdxDiff = Idx - PrevElt->second;
3447 APInt ValDiff = *Elt - PrevElt->first;
3448
3449 // A zero-value value difference means that we're somewhere in the middle
3450 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3451 // step change before evaluating the sequence.
3452 if (ValDiff == 0)
3453 continue;
3454
3455 int64_t Remainder = ValDiff.srem(IdxDiff);
3456 // Normalize the step if it's greater than 1.
3457 if (Remainder != ValDiff.getSExtValue()) {
3458 // The difference must cleanly divide the element span.
3459 if (Remainder != 0)
3460 return std::nullopt;
3461 ValDiff = ValDiff.sdiv(IdxDiff);
3462 IdxDiff = 1;
3463 }
3464
3465 if (!SeqStepNum)
3466 SeqStepNum = ValDiff;
3467 else if (ValDiff != SeqStepNum)
3468 return std::nullopt;
3469
3470 if (!SeqStepDenom)
3471 SeqStepDenom = IdxDiff;
3472 else if (IdxDiff != *SeqStepDenom)
3473 return std::nullopt;
3474 }
3475
3476 // Record this non-undef element for later.
3477 if (!PrevElt || PrevElt->first != *Elt)
3478 PrevElt = std::make_pair(*Elt, Idx);
3479 }
3480
3481 // We need to have logged a step for this to count as a legal index sequence.
3482 if (!SeqStepNum || !SeqStepDenom)
3483 return std::nullopt;
3484
3485 // Loop back through the sequence and validate elements we might have skipped
3486 // while waiting for a valid step. While doing this, log any sequence addend.
3487 for (auto [Idx, Elt] : enumerate(Elts)) {
3488 if (!Elt)
3489 continue;
3490 APInt ExpectedVal =
3491 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3492 *SeqStepNum)
3493 .sdiv(*SeqStepDenom);
3494
3495 APInt Addend = *Elt - ExpectedVal;
3496 if (!SeqAddend)
3497 SeqAddend = Addend;
3498 else if (Addend != SeqAddend)
3499 return std::nullopt;
3500 }
3501
3502 assert(SeqAddend && "Must have an addend if we have a step");
3503
3504 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3505 SeqAddend->getSExtValue()};
3506}
3507
3508// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3509// and lower it as a VRGATHER_VX_VL from the source vector.
3510static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3511 SelectionDAG &DAG,
3512 const RISCVSubtarget &Subtarget) {
3513 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3514 return SDValue();
3515 SDValue Vec = SplatVal.getOperand(0);
3516 // Don't perform this optimization for i1 vectors, or if the element types are
3517 // different
3518 // FIXME: Support i1 vectors, maybe by promoting to i8?
3519 MVT EltTy = VT.getVectorElementType();
3520 if (EltTy == MVT::i1 ||
3522 return SDValue();
3523 SDValue Idx = SplatVal.getOperand(1);
3524 // The index must be a legal type.
3525 if (Idx.getValueType() != Subtarget.getXLenVT())
3526 return SDValue();
3527
3528 // Check that Index lies within VT
3529 // TODO: Can we check if the Index is constant and known in-bounds?
3531 return SDValue();
3532
3533 MVT ContainerVT = VT;
3534 if (VT.isFixedLengthVector())
3535 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3536
3537 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3538 DAG.getUNDEF(ContainerVT), Vec,
3539 DAG.getVectorIdxConstant(0, DL));
3540
3541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3542
3543 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3544 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3545
3546 if (!VT.isFixedLengthVector())
3547 return Gather;
3548
3549 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3550}
3551
3552/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3553/// which constitute a large proportion of the elements. In such cases we can
3554/// splat a vector with the dominant element and make up the shortfall with
3555/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3556/// Note that this includes vectors of 2 elements by association. The
3557/// upper-most element is the "dominant" one, allowing us to use a splat to
3558/// "insert" the upper element, and an insert of the lower element at position
3559/// 0, which improves codegen.
3561 const RISCVSubtarget &Subtarget) {
3562 MVT VT = Op.getSimpleValueType();
3563 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3564
3565 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3566
3567 SDLoc DL(Op);
3568 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3569
3570 MVT XLenVT = Subtarget.getXLenVT();
3571 unsigned NumElts = Op.getNumOperands();
3572
3573 SDValue DominantValue;
3574 unsigned MostCommonCount = 0;
3575 DenseMap<SDValue, unsigned> ValueCounts;
3576 unsigned NumUndefElts =
3577 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3578
3579 // Track the number of scalar loads we know we'd be inserting, estimated as
3580 // any non-zero floating-point constant. Other kinds of element are either
3581 // already in registers or are materialized on demand. The threshold at which
3582 // a vector load is more desirable than several scalar materializion and
3583 // vector-insertion instructions is not known.
3584 unsigned NumScalarLoads = 0;
3585
3586 for (SDValue V : Op->op_values()) {
3587 if (V.isUndef())
3588 continue;
3589
3590 unsigned &Count = ValueCounts[V];
3591 if (0 == Count)
3592 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3593 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3594
3595 // Is this value dominant? In case of a tie, prefer the highest element as
3596 // it's cheaper to insert near the beginning of a vector than it is at the
3597 // end.
3598 if (++Count >= MostCommonCount) {
3599 DominantValue = V;
3600 MostCommonCount = Count;
3601 }
3602 }
3603
3604 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3605 unsigned NumDefElts = NumElts - NumUndefElts;
3606 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3607
3608 // Don't perform this optimization when optimizing for size, since
3609 // materializing elements and inserting them tends to cause code bloat.
3610 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3611 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3612 ((MostCommonCount > DominantValueCountThreshold) ||
3613 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3614 // Start by splatting the most common element.
3615 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3616
3617 DenseSet<SDValue> Processed{DominantValue};
3618
3619 // We can handle an insert into the last element (of a splat) via
3620 // v(f)slide1down. This is slightly better than the vslideup insert
3621 // lowering as it avoids the need for a vector group temporary. It
3622 // is also better than using vmerge.vx as it avoids the need to
3623 // materialize the mask in a vector register.
3624 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3625 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3626 LastOp != DominantValue) {
3627 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3628 auto OpCode =
3630 if (!VT.isFloatingPoint())
3631 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3632 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3633 LastOp, Mask, VL);
3634 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3635 Processed.insert(LastOp);
3636 }
3637
3638 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3639 for (const auto &OpIdx : enumerate(Op->ops())) {
3640 const SDValue &V = OpIdx.value();
3641 if (V.isUndef() || !Processed.insert(V).second)
3642 continue;
3643 if (ValueCounts[V] == 1) {
3644 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3645 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3646 } else {
3647 // Blend in all instances of this value using a VSELECT, using a
3648 // mask where each bit signals whether that element is the one
3649 // we're after.
3651 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3652 return DAG.getConstant(V == V1, DL, XLenVT);
3653 });
3654 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3655 DAG.getBuildVector(SelMaskTy, DL, Ops),
3656 DAG.getSplatBuildVector(VT, DL, V), Vec);
3657 }
3658 }
3659
3660 return Vec;
3661 }
3662
3663 return SDValue();
3664}
3665
3667 const RISCVSubtarget &Subtarget) {
3668 MVT VT = Op.getSimpleValueType();
3669 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3670
3671 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3672
3673 SDLoc DL(Op);
3674 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3675
3676 MVT XLenVT = Subtarget.getXLenVT();
3677 unsigned NumElts = Op.getNumOperands();
3678
3679 if (VT.getVectorElementType() == MVT::i1) {
3680 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3681 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3682 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3683 }
3684
3685 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3686 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3687 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3688 }
3689
3690 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3691 // scalar integer chunks whose bit-width depends on the number of mask
3692 // bits and XLEN.
3693 // First, determine the most appropriate scalar integer type to use. This
3694 // is at most XLenVT, but may be shrunk to a smaller vector element type
3695 // according to the size of the final vector - use i8 chunks rather than
3696 // XLenVT if we're producing a v8i1. This results in more consistent
3697 // codegen across RV32 and RV64.
3698 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3699 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3700 // If we have to use more than one INSERT_VECTOR_ELT then this
3701 // optimization is likely to increase code size; avoid peforming it in
3702 // such a case. We can use a load from a constant pool in this case.
3703 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3704 return SDValue();
3705 // Now we can create our integer vector type. Note that it may be larger
3706 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3707 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3708 MVT IntegerViaVecVT =
3709 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3710 IntegerViaVecElts);
3711
3712 uint64_t Bits = 0;
3713 unsigned BitPos = 0, IntegerEltIdx = 0;
3714 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3715
3716 for (unsigned I = 0; I < NumElts;) {
3717 SDValue V = Op.getOperand(I);
3718 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3719 Bits |= ((uint64_t)BitValue << BitPos);
3720 ++BitPos;
3721 ++I;
3722
3723 // Once we accumulate enough bits to fill our scalar type or process the
3724 // last element, insert into our vector and clear our accumulated data.
3725 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3726 if (NumViaIntegerBits <= 32)
3727 Bits = SignExtend64<32>(Bits);
3728 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3729 Elts[IntegerEltIdx] = Elt;
3730 Bits = 0;
3731 BitPos = 0;
3732 IntegerEltIdx++;
3733 }
3734 }
3735
3736 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3737
3738 if (NumElts < NumViaIntegerBits) {
3739 // If we're producing a smaller vector than our minimum legal integer
3740 // type, bitcast to the equivalent (known-legal) mask type, and extract
3741 // our final mask.
3742 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3743 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3744 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3745 DAG.getConstant(0, DL, XLenVT));
3746 } else {
3747 // Else we must have produced an integer type with the same size as the
3748 // mask type; bitcast for the final result.
3749 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3750 Vec = DAG.getBitcast(VT, Vec);
3751 }
3752
3753 return Vec;
3754 }
3755
3756 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3757 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3759 if (!VT.isFloatingPoint())
3760 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3761 Splat =
3762 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3763 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3764 }
3765
3766 // Try and match index sequences, which we can lower to the vid instruction
3767 // with optional modifications. An all-undef vector is matched by
3768 // getSplatValue, above.
3769 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3770 int64_t StepNumerator = SimpleVID->StepNumerator;
3771 unsigned StepDenominator = SimpleVID->StepDenominator;
3772 int64_t Addend = SimpleVID->Addend;
3773
3774 assert(StepNumerator != 0 && "Invalid step");
3775 bool Negate = false;
3776 int64_t SplatStepVal = StepNumerator;
3777 unsigned StepOpcode = ISD::MUL;
3778 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3779 // anyway as the shift of 63 won't fit in uimm5.
3780 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3781 isPowerOf2_64(std::abs(StepNumerator))) {
3782 Negate = StepNumerator < 0;
3783 StepOpcode = ISD::SHL;
3784 SplatStepVal = Log2_64(std::abs(StepNumerator));
3785 }
3786
3787 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3788 // threshold since it's the immediate value many RVV instructions accept.
3789 // There is no vmul.vi instruction so ensure multiply constant can fit in
3790 // a single addi instruction.
3791 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3792 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3793 isPowerOf2_32(StepDenominator) &&
3794 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3795 MVT VIDVT =
3797 MVT VIDContainerVT =
3798 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3799 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3800 // Convert right out of the scalable type so we can use standard ISD
3801 // nodes for the rest of the computation. If we used scalable types with
3802 // these, we'd lose the fixed-length vector info and generate worse
3803 // vsetvli code.
3804 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3805 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3806 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3807 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3808 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3809 }
3810 if (StepDenominator != 1) {
3811 SDValue SplatStep =
3812 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3813 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3814 }
3815 if (Addend != 0 || Negate) {
3816 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3817 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3818 VID);
3819 }
3820 if (VT.isFloatingPoint()) {
3821 // TODO: Use vfwcvt to reduce register pressure.
3822 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3823 }
3824 return VID;
3825 }
3826 }
3827
3828 // For very small build_vectors, use a single scalar insert of a constant.
3829 // TODO: Base this on constant rematerialization cost, not size.
3830 const unsigned EltBitSize = VT.getScalarSizeInBits();
3831 if (VT.getSizeInBits() <= 32 &&
3833 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3834 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3835 "Unexpected sequence type");
3836 // If we can use the original VL with the modified element type, this
3837 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3838 // be moved into InsertVSETVLI?
3839 unsigned ViaVecLen =
3840 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3841 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3842
3843 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3844 uint64_t SplatValue = 0;
3845 // Construct the amalgamated value at this larger vector type.
3846 for (const auto &OpIdx : enumerate(Op->op_values())) {
3847 const auto &SeqV = OpIdx.value();
3848 if (!SeqV.isUndef())
3849 SplatValue |=
3850 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3851 }
3852
3853 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3854 // achieve better constant materializion.
3855 // On RV32, we need to sign-extend to use getSignedConstant.
3856 if (ViaIntVT == MVT::i32)
3857 SplatValue = SignExtend64<32>(SplatValue);
3858
3859 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3860 DAG.getUNDEF(ViaVecVT),
3861 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3862 DAG.getVectorIdxConstant(0, DL));
3863 if (ViaVecLen != 1)
3865 MVT::getVectorVT(ViaIntVT, 1), Vec,
3866 DAG.getConstant(0, DL, XLenVT));
3867 return DAG.getBitcast(VT, Vec);
3868 }
3869
3870
3871 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3872 // when re-interpreted as a vector with a larger element type. For example,
3873 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3874 // could be instead splat as
3875 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3876 // TODO: This optimization could also work on non-constant splats, but it
3877 // would require bit-manipulation instructions to construct the splat value.
3878 SmallVector<SDValue> Sequence;
3879 const auto *BV = cast<BuildVectorSDNode>(Op);
3880 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3882 BV->getRepeatedSequence(Sequence) &&
3883 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3884 unsigned SeqLen = Sequence.size();
3885 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3886 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3887 ViaIntVT == MVT::i64) &&
3888 "Unexpected sequence type");
3889
3890 // If we can use the original VL with the modified element type, this
3891 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3892 // be moved into InsertVSETVLI?
3893 const unsigned RequiredVL = NumElts / SeqLen;
3894 const unsigned ViaVecLen =
3895 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3896 NumElts : RequiredVL;
3897 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3898
3899 unsigned EltIdx = 0;
3900 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3901 uint64_t SplatValue = 0;
3902 // Construct the amalgamated value which can be splatted as this larger
3903 // vector type.
3904 for (const auto &SeqV : Sequence) {
3905 if (!SeqV.isUndef())
3906 SplatValue |=
3907 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3908 EltIdx++;
3909 }
3910
3911 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3912 // achieve better constant materializion.
3913 // On RV32, we need to sign-extend to use getSignedConstant.
3914 if (ViaIntVT == MVT::i32)
3915 SplatValue = SignExtend64<32>(SplatValue);
3916
3917 // Since we can't introduce illegal i64 types at this stage, we can only
3918 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3919 // way we can use RVV instructions to splat.
3920 assert((ViaIntVT.bitsLE(XLenVT) ||
3921 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3922 "Unexpected bitcast sequence");
3923 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3924 SDValue ViaVL =
3925 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3926 MVT ViaContainerVT =
3927 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3928 SDValue Splat =
3929 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3930 DAG.getUNDEF(ViaContainerVT),
3931 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3932 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3933 if (ViaVecLen != RequiredVL)
3935 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3936 DAG.getConstant(0, DL, XLenVT));
3937 return DAG.getBitcast(VT, Splat);
3938 }
3939 }
3940
3941 // If the number of signbits allows, see if we can lower as a <N x i8>.
3942 // Our main goal here is to reduce LMUL (and thus work) required to
3943 // build the constant, but we will also narrow if the resulting
3944 // narrow vector is known to materialize cheaply.
3945 // TODO: We really should be costing the smaller vector. There are
3946 // profitable cases this misses.
3947 if (EltBitSize > 8 && VT.isInteger() &&
3948 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3949 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3950 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3951 DL, Op->ops());
3952 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3953 Source, DAG, Subtarget);
3954 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3955 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3956 }
3957
3958 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3959 return Res;
3960
3961 // For constant vectors, use generic constant pool lowering. Otherwise,
3962 // we'd have to materialize constants in GPRs just to move them into the
3963 // vector.
3964 return SDValue();
3965}
3966
3967static unsigned getPACKOpcode(unsigned DestBW,
3968 const RISCVSubtarget &Subtarget) {
3969 switch (DestBW) {
3970 default:
3971 llvm_unreachable("Unsupported pack size");
3972 case 16:
3973 return RISCV::PACKH;
3974 case 32:
3975 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3976 case 64:
3977 assert(Subtarget.is64Bit());
3978 return RISCV::PACK;
3979 }
3980}
3981
3982/// Double the element size of the build vector to reduce the number
3983/// of vslide1down in the build vector chain. In the worst case, this
3984/// trades three scalar operations for 1 vector operation. Scalar
3985/// operations are generally lower latency, and for out-of-order cores
3986/// we also benefit from additional parallelism.
3988 const RISCVSubtarget &Subtarget) {
3989 SDLoc DL(Op);
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992 MVT ElemVT = VT.getVectorElementType();
3993 if (!ElemVT.isInteger())
3994 return SDValue();
3995
3996 // TODO: Relax these architectural restrictions, possibly with costing
3997 // of the actual instructions required.
3998 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3999 return SDValue();
4000
4001 unsigned NumElts = VT.getVectorNumElements();
4002 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4003 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4004 NumElts % 2 != 0)
4005 return SDValue();
4006
4007 // Produce [B,A] packed into a type twice as wide. Note that all
4008 // scalars are XLenVT, possibly masked (see below).
4009 MVT XLenVT = Subtarget.getXLenVT();
4010 SDValue Mask = DAG.getConstant(
4011 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4012 auto pack = [&](SDValue A, SDValue B) {
4013 // Bias the scheduling of the inserted operations to near the
4014 // definition of the element - this tends to reduce register
4015 // pressure overall.
4016 SDLoc ElemDL(B);
4017 if (Subtarget.hasStdExtZbkb())
4018 // Note that we're relying on the high bits of the result being
4019 // don't care. For PACKW, the result is *sign* extended.
4020 return SDValue(
4021 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4022 ElemDL, XLenVT, A, B),
4023 0);
4024
4025 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4026 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4027 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4028 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4029 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4031 };
4032
4033 SmallVector<SDValue> NewOperands;
4034 NewOperands.reserve(NumElts / 2);
4035 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4036 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4037 assert(NumElts == NewOperands.size() * 2);
4038 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4039 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4040 return DAG.getNode(ISD::BITCAST, DL, VT,
4041 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4042}
4043
4045 const RISCVSubtarget &Subtarget) {
4046 MVT VT = Op.getSimpleValueType();
4047 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4048
4049 MVT EltVT = VT.getVectorElementType();
4050 MVT XLenVT = Subtarget.getXLenVT();
4051
4052 SDLoc DL(Op);
4053
4054 // Proper support for f16 requires Zvfh. bf16 always requires special
4055 // handling. We need to cast the scalar to integer and create an integer
4056 // build_vector.
4057 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4058 MVT IVT = VT.changeVectorElementType(MVT::i16);
4060 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4061 SDValue Elem = Op.getOperand(I);
4062 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4063 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4064 // Called by LegalizeDAG, we need to use XLenVT operations since we
4065 // can't create illegal types.
4066 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4067 // Manually constant fold so the integer build_vector can be lowered
4068 // better. Waiting for DAGCombine will be too late.
4069 APInt V =
4070 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4071 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4072 } else {
4073 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4074 }
4075 } else {
4076 // Called by scalar type legalizer, we can use i16.
4077 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4078 }
4079 }
4080 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4081 return DAG.getBitcast(VT, Res);
4082 }
4083
4084 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4086 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4087
4088 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4089
4090 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4091
4092 if (VT.getVectorElementType() == MVT::i1) {
4093 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4094 // vector type, we have a legal equivalently-sized i8 type, so we can use
4095 // that.
4096 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4097 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4098
4099 SDValue WideVec;
4100 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4101 // For a splat, perform a scalar truncate before creating the wider
4102 // vector.
4103 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4104 DAG.getConstant(1, DL, Splat.getValueType()));
4105 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4106 } else {
4107 SmallVector<SDValue, 8> Ops(Op->op_values());
4108 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4109 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4110 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4111 }
4112
4113 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4114 }
4115
4116 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4117 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4118 return Gather;
4119 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4121 if (!VT.isFloatingPoint())
4122 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4123 Splat =
4124 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4125 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4126 }
4127
4128 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4129 return Res;
4130
4131 // If we're compiling for an exact VLEN value, we can split our work per
4132 // register in the register group.
4133 if (const auto VLen = Subtarget.getRealVLen();
4134 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4135 MVT ElemVT = VT.getVectorElementType();
4136 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4137 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4138 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4139 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4140 assert(M1VT == getLMUL1VT(M1VT));
4141
4142 // The following semantically builds up a fixed length concat_vector
4143 // of the component build_vectors. We eagerly lower to scalable and
4144 // insert_subvector here to avoid DAG combining it back to a large
4145 // build_vector.
4146 SmallVector<SDValue> BuildVectorOps(Op->ops());
4147 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4148 SDValue Vec = DAG.getUNDEF(ContainerVT);
4149 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4150 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4151 SDValue SubBV =
4152 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4153 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4154 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4155 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4156 DAG.getVectorIdxConstant(InsertIdx, DL));
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159 }
4160
4161 // If we're about to resort to vslide1down (or stack usage), pack our
4162 // elements into the widest scalar type we can. This will force a VL/VTYPE
4163 // toggle, but reduces the critical path, the number of vslide1down ops
4164 // required, and possibly enables scalar folds of the values.
4165 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4166 return Res;
4167
4168 // For m1 vectors, if we have non-undef values in both halves of our vector,
4169 // split the vector into low and high halves, build them separately, then
4170 // use a vselect to combine them. For long vectors, this cuts the critical
4171 // path of the vslide1down sequence in half, and gives us an opportunity
4172 // to special case each half independently. Note that we don't change the
4173 // length of the sub-vectors here, so if both fallback to the generic
4174 // vslide1down path, we should be able to fold the vselect into the final
4175 // vslidedown (for the undef tail) for the first half w/ masking.
4176 unsigned NumElts = VT.getVectorNumElements();
4177 unsigned NumUndefElts =
4178 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4179 unsigned NumDefElts = NumElts - NumUndefElts;
4180 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4181 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4182 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4183 SmallVector<SDValue> MaskVals;
4184 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4185 SubVecAOps.reserve(NumElts);
4186 SubVecBOps.reserve(NumElts);
4187 for (unsigned i = 0; i < NumElts; i++) {
4188 SDValue Elem = Op->getOperand(i);
4189 if (i < NumElts / 2) {
4190 SubVecAOps.push_back(Elem);
4191 SubVecBOps.push_back(UndefElem);
4192 } else {
4193 SubVecAOps.push_back(UndefElem);
4194 SubVecBOps.push_back(Elem);
4195 }
4196 bool SelectMaskVal = (i < NumElts / 2);
4197 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4198 }
4199 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4200 MaskVals.size() == NumElts);
4201
4202 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4203 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4204 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4205 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4206 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4207 }
4208
4209 // Cap the cost at a value linear to the number of elements in the vector.
4210 // The default lowering is to use the stack. The vector store + scalar loads
4211 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4212 // being (at least) linear in LMUL. As a result, using the vslidedown
4213 // lowering for every element ends up being VL*LMUL..
4214 // TODO: Should we be directly costing the stack alternative? Doing so might
4215 // give us a more accurate upper bound.
4216 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4217
4218 // TODO: unify with TTI getSlideCost.
4219 InstructionCost PerSlideCost = 1;
4220 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4221 default: break;
4223 PerSlideCost = 2;
4224 break;
4226 PerSlideCost = 4;
4227 break;
4229 PerSlideCost = 8;
4230 break;
4231 }
4232
4233 // TODO: Should we be using the build instseq then cost + evaluate scheme
4234 // we use for integer constants here?
4235 unsigned UndefCount = 0;
4236 for (const SDValue &V : Op->ops()) {
4237 if (V.isUndef()) {
4238 UndefCount++;
4239 continue;
4240 }
4241 if (UndefCount) {
4242 LinearBudget -= PerSlideCost;
4243 UndefCount = 0;
4244 }
4245 LinearBudget -= PerSlideCost;
4246 }
4247 if (UndefCount) {
4248 LinearBudget -= PerSlideCost;
4249 }
4250
4251 if (LinearBudget < 0)
4252 return SDValue();
4253
4254 assert((!VT.isFloatingPoint() ||
4255 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4256 "Illegal type which will result in reserved encoding");
4257
4258 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4259
4260 SDValue Vec;
4261 UndefCount = 0;
4262 for (SDValue V : Op->ops()) {
4263 if (V.isUndef()) {
4264 UndefCount++;
4265 continue;
4266 }
4267
4268 // Start our sequence with a TA splat in the hopes that hardware is able to
4269 // recognize there's no dependency on the prior value of our temporary
4270 // register.
4271 if (!Vec) {
4272 Vec = DAG.getSplatVector(VT, DL, V);
4273 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4274 UndefCount = 0;
4275 continue;
4276 }
4277
4278 if (UndefCount) {
4279 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4280 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4281 Vec, Offset, Mask, VL, Policy);
4282 UndefCount = 0;
4283 }
4284 auto OpCode =
4286 if (!VT.isFloatingPoint())
4287 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4288 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4289 V, Mask, VL);
4290 }
4291 if (UndefCount) {
4292 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4293 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4294 Vec, Offset, Mask, VL, Policy);
4295 }
4296 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4297}
4298
4299static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4301 SelectionDAG &DAG) {
4302 if (!Passthru)
4303 Passthru = DAG.getUNDEF(VT);
4304 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4305 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4306 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4307 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4308 // node in order to try and match RVV vector/scalar instructions.
4309 if ((LoC >> 31) == HiC)
4310 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4311
4312 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4313 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4314 // vlmax vsetvli or vsetivli to change the VL.
4315 // FIXME: Support larger constants?
4316 // FIXME: Support non-constant VLs by saturating?
4317 if (LoC == HiC) {
4318 SDValue NewVL;
4319 if (isAllOnesConstant(VL) ||
4320 (isa<RegisterSDNode>(VL) &&
4321 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4322 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4323 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4324 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4325
4326 if (NewVL) {
4327 MVT InterVT =
4328 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4329 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4330 DAG.getUNDEF(InterVT), Lo, NewVL);
4331 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4332 }
4333 }
4334 }
4335
4336 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4337 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4338 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4339 Hi.getConstantOperandVal(1) == 31)
4340 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4341
4342 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4343 // even if it might be sign extended.
4344 if (Hi.isUndef())
4345 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4346
4347 // Fall back to a stack store and stride x0 vector load.
4348 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4349 Hi, VL);
4350}
4351
4352// Called by type legalization to handle splat of i64 on RV32.
4353// FIXME: We can optimize this when the type has sign or zero bits in one
4354// of the halves.
4355static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4356 SDValue Scalar, SDValue VL,
4357 SelectionDAG &DAG) {
4358 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4359 SDValue Lo, Hi;
4360 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4361 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4362}
4363
4364// This function lowers a splat of a scalar operand Splat with the vector
4365// length VL. It ensures the final sequence is type legal, which is useful when
4366// lowering a splat after type legalization.
4367static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4368 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4369 const RISCVSubtarget &Subtarget) {
4370 bool HasPassthru = Passthru && !Passthru.isUndef();
4371 if (!HasPassthru && !Passthru)
4372 Passthru = DAG.getUNDEF(VT);
4373
4374 MVT EltVT = VT.getVectorElementType();
4375 MVT XLenVT = Subtarget.getXLenVT();
4376
4377 if (VT.isFloatingPoint()) {
4378 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4379 EltVT == MVT::bf16) {
4380 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4381 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4382 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4383 else
4384 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4385 MVT IVT = VT.changeVectorElementType(MVT::i16);
4386 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4387 SDValue Splat =
4388 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4389 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4390 }
4391 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4392 }
4393
4394 // Simplest case is that the operand needs to be promoted to XLenVT.
4395 if (Scalar.getValueType().bitsLE(XLenVT)) {
4396 // If the operand is a constant, sign extend to increase our chances
4397 // of being able to use a .vi instruction. ANY_EXTEND would become a
4398 // a zero extend and the simm5 check in isel would fail.
4399 // FIXME: Should we ignore the upper bits in isel instead?
4400 unsigned ExtOpc =
4401 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4402 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4403 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4404 }
4405
4406 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4407 "Unexpected scalar for splat lowering!");
4408
4409 if (isOneConstant(VL) && isNullConstant(Scalar))
4410 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4411 DAG.getConstant(0, DL, XLenVT), VL);
4412
4413 // Otherwise use the more complicated splatting algorithm.
4414 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4415}
4416
4417// This function lowers an insert of a scalar operand Scalar into lane
4418// 0 of the vector regardless of the value of VL. The contents of the
4419// remaining lanes of the result vector are unspecified. VL is assumed
4420// to be non-zero.
4422 const SDLoc &DL, SelectionDAG &DAG,
4423 const RISCVSubtarget &Subtarget) {
4424 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4425
4426 const MVT XLenVT = Subtarget.getXLenVT();
4427 SDValue Passthru = DAG.getUNDEF(VT);
4428
4429 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4430 isNullConstant(Scalar.getOperand(1))) {
4431 SDValue ExtractedVal = Scalar.getOperand(0);
4432 // The element types must be the same.
4433 if (ExtractedVal.getValueType().getVectorElementType() ==
4434 VT.getVectorElementType()) {
4435 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4436 MVT ExtractedContainerVT = ExtractedVT;
4437 if (ExtractedContainerVT.isFixedLengthVector()) {
4438 ExtractedContainerVT = getContainerForFixedLengthVector(
4439 DAG, ExtractedContainerVT, Subtarget);
4440 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4441 ExtractedVal, DAG, Subtarget);
4442 }
4443 if (ExtractedContainerVT.bitsLE(VT))
4444 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4445 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4446 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4447 DAG.getVectorIdxConstant(0, DL));
4448 }
4449 }
4450
4451
4452 if (VT.isFloatingPoint())
4453 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4454 DAG.getUNDEF(VT), Scalar, VL);
4455
4456 // Avoid the tricky legalization cases by falling back to using the
4457 // splat code which already handles it gracefully.
4458 if (!Scalar.getValueType().bitsLE(XLenVT))
4459 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4460 DAG.getConstant(1, DL, XLenVT),
4461 VT, DL, DAG, Subtarget);
4462
4463 // If the operand is a constant, sign extend to increase our chances
4464 // of being able to use a .vi instruction. ANY_EXTEND would become a
4465 // a zero extend and the simm5 check in isel would fail.
4466 // FIXME: Should we ignore the upper bits in isel instead?
4467 unsigned ExtOpc =
4468 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4469 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4470 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4471 VL);
4472}
4473
4474// Can this shuffle be performed on exactly one (possibly larger) input?
4475static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4476 SDValue V2) {
4477
4478 if (V2.isUndef() &&
4480 return V1;
4481
4482 // Both input must be extracts.
4483 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4484 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4485 return SDValue();
4486
4487 // Extracting from the same source.
4488 SDValue Src = V1.getOperand(0);
4489 if (Src != V2.getOperand(0))
4490 return SDValue();
4491
4492 // Src needs to have twice the number of elements.
4493 unsigned NumElts = VT.getVectorNumElements();
4494 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4495 return SDValue();
4496
4497 // The extracts must extract the two halves of the source.
4498 if (V1.getConstantOperandVal(1) != 0 ||
4499 V2.getConstantOperandVal(1) != NumElts)
4500 return SDValue();
4501
4502 return Src;
4503}
4504
4505/// Is this shuffle interleaving contiguous elements from one vector into the
4506/// even elements and contiguous elements from another vector into the odd
4507/// elements. \p EvenSrc will contain the element that should be in the first
4508/// even element. \p OddSrc will contain the element that should be in the first
4509/// odd element. These can be the first element in a source or the element half
4510/// way through the source.
4511static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4512 int &OddSrc, const RISCVSubtarget &Subtarget) {
4513 // We need to be able to widen elements to the next larger integer type.
4514 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4515 return false;
4516
4517 int Size = Mask.size();
4518 int NumElts = VT.getVectorNumElements();
4519 assert(Size == (int)NumElts && "Unexpected mask size");
4520
4521 SmallVector<unsigned, 2> StartIndexes;
4522 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4523 return false;
4524
4525 EvenSrc = StartIndexes[0];
4526 OddSrc = StartIndexes[1];
4527
4528 // One source should be low half of first vector.
4529 if (EvenSrc != 0 && OddSrc != 0)
4530 return false;
4531
4532 // Subvectors will be subtracted from either at the start of the two input
4533 // vectors, or at the start and middle of the first vector if it's an unary
4534 // interleave.
4535 // In both cases, HalfNumElts will be extracted.
4536 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4537 // we'll create an illegal extract_subvector.
4538 // FIXME: We could support other values using a slidedown first.
4539 int HalfNumElts = NumElts / 2;
4540 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4541}
4542
4543/// Match shuffles that concatenate two vectors, rotate the concatenation,
4544/// and then extract the original number of elements from the rotated result.
4545/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4546/// returned rotation amount is for a rotate right, where elements move from
4547/// higher elements to lower elements. \p LoSrc indicates the first source
4548/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4549/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4550/// 0 or 1 if a rotation is found.
4551///
4552/// NOTE: We talk about rotate to the right which matches how bit shift and
4553/// rotate instructions are described where LSBs are on the right, but LLVM IR
4554/// and the table below write vectors with the lowest elements on the left.
4555static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4556 int Size = Mask.size();
4557
4558 // We need to detect various ways of spelling a rotation:
4559 // [11, 12, 13, 14, 15, 0, 1, 2]
4560 // [-1, 12, 13, 14, -1, -1, 1, -1]
4561 // [-1, -1, -1, -1, -1, -1, 1, 2]
4562 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4563 // [-1, 4, 5, 6, -1, -1, 9, -1]
4564 // [-1, 4, 5, 6, -1, -1, -1, -1]
4565 int Rotation = 0;
4566 LoSrc = -1;
4567 HiSrc = -1;
4568 for (int i = 0; i != Size; ++i) {
4569 int M = Mask[i];
4570 if (M < 0)
4571 continue;
4572
4573 // Determine where a rotate vector would have started.
4574 int StartIdx = i - (M % Size);
4575 // The identity rotation isn't interesting, stop.
4576 if (StartIdx == 0)
4577 return -1;
4578
4579 // If we found the tail of a vector the rotation must be the missing
4580 // front. If we found the head of a vector, it must be how much of the
4581 // head.
4582 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4583
4584 if (Rotation == 0)
4585 Rotation = CandidateRotation;
4586 else if (Rotation != CandidateRotation)
4587 // The rotations don't match, so we can't match this mask.
4588 return -1;
4589
4590 // Compute which value this mask is pointing at.
4591 int MaskSrc = M < Size ? 0 : 1;
4592
4593 // Compute which of the two target values this index should be assigned to.
4594 // This reflects whether the high elements are remaining or the low elemnts
4595 // are remaining.
4596 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4597
4598 // Either set up this value if we've not encountered it before, or check
4599 // that it remains consistent.
4600 if (TargetSrc < 0)
4601 TargetSrc = MaskSrc;
4602 else if (TargetSrc != MaskSrc)
4603 // This may be a rotation, but it pulls from the inputs in some
4604 // unsupported interleaving.
4605 return -1;
4606 }
4607
4608 // Check that we successfully analyzed the mask, and normalize the results.
4609 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4610 assert((LoSrc >= 0 || HiSrc >= 0) &&
4611 "Failed to find a rotated input vector!");
4612
4613 return Rotation;
4614}
4615
4616// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4617// 2, 4, 8 and the integer type Factor-times larger than VT's
4618// element type must be a legal element type.
4619// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4620// -> [p, q, r, s] (Factor=2, Index=1)
4622 SDValue Src, unsigned Factor,
4623 unsigned Index, SelectionDAG &DAG) {
4624 unsigned EltBits = VT.getScalarSizeInBits();
4625 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4626 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4627 SrcEC.divideCoefficientBy(Factor));
4628 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4629 SrcEC.divideCoefficientBy(Factor));
4630 Src = DAG.getBitcast(WideSrcVT, Src);
4631
4632 unsigned Shift = Index * EltBits;
4633 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4634 DAG.getConstant(Shift, DL, WideSrcVT));
4635 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4637 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4638 DAG.getVectorIdxConstant(0, DL));
4639 return DAG.getBitcast(VT, Res);
4640}
4641
4642// Lower the following shuffle to vslidedown.
4643// a)
4644// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4645// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4646// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4647// b)
4648// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4649// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4650// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4651// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4652// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4653// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4655 SDValue V1, SDValue V2,
4656 ArrayRef<int> Mask,
4657 const RISCVSubtarget &Subtarget,
4658 SelectionDAG &DAG) {
4659 auto findNonEXTRACT_SUBVECTORParent =
4660 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4661 uint64_t Offset = 0;
4662 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4663 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4664 // a scalable vector. But we don't want to match the case.
4665 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4666 Offset += Parent.getConstantOperandVal(1);
4667 Parent = Parent.getOperand(0);
4668 }
4669 return std::make_pair(Parent, Offset);
4670 };
4671
4672 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4673 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4674
4675 // Extracting from the same source.
4676 SDValue Src = V1Src;
4677 if (Src != V2Src)
4678 return SDValue();
4679
4680 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4681 SmallVector<int, 16> NewMask(Mask);
4682 for (size_t i = 0; i != NewMask.size(); ++i) {
4683 if (NewMask[i] == -1)
4684 continue;
4685
4686 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4687 NewMask[i] = NewMask[i] + V1IndexOffset;
4688 } else {
4689 // Minus NewMask.size() is needed. Otherwise, the b case would be
4690 // <5,6,7,12> instead of <5,6,7,8>.
4691 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4692 }
4693 }
4694
4695 // First index must be known and non-zero. It will be used as the slidedown
4696 // amount.
4697 if (NewMask[0] <= 0)
4698 return SDValue();
4699
4700 // NewMask is also continuous.
4701 for (unsigned i = 1; i != NewMask.size(); ++i)
4702 if (NewMask[i - 1] + 1 != NewMask[i])
4703 return SDValue();
4704
4705 MVT XLenVT = Subtarget.getXLenVT();
4706 MVT SrcVT = Src.getSimpleValueType();
4707 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4708 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4709 SDValue Slidedown =
4710 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4711 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4712 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4713 return DAG.getNode(
4715 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4716 DAG.getConstant(0, DL, XLenVT));
4717}
4718
4719// Because vslideup leaves the destination elements at the start intact, we can
4720// use it to perform shuffles that insert subvectors:
4721//
4722// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4723// ->
4724// vsetvli zero, 8, e8, mf2, ta, ma
4725// vslideup.vi v8, v9, 4
4726//
4727// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4728// ->
4729// vsetvli zero, 5, e8, mf2, tu, ma
4730// vslideup.v1 v8, v9, 2
4732 SDValue V1, SDValue V2,
4733 ArrayRef<int> Mask,
4734 const RISCVSubtarget &Subtarget,
4735 SelectionDAG &DAG) {
4736 unsigned NumElts = VT.getVectorNumElements();
4737 int NumSubElts, Index;
4738 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4739 Index))
4740 return SDValue();
4741
4742 bool OpsSwapped = Mask[Index] < (int)NumElts;
4743 SDValue InPlace = OpsSwapped ? V2 : V1;
4744 SDValue ToInsert = OpsSwapped ? V1 : V2;
4745
4746 MVT XLenVT = Subtarget.getXLenVT();
4747 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4748 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4749 // We slide up by the index that the subvector is being inserted at, and set
4750 // VL to the index + the number of elements being inserted.
4752 // If the we're adding a suffix to the in place vector, i.e. inserting right
4753 // up to the very end of it, then we don't actually care about the tail.
4754 if (NumSubElts + Index >= (int)NumElts)
4755 Policy |= RISCVII::TAIL_AGNOSTIC;
4756
4757 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4758 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4759 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4760
4761 SDValue Res;
4762 // If we're inserting into the lowest elements, use a tail undisturbed
4763 // vmv.v.v.
4764 if (Index == 0)
4765 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4766 VL);
4767 else
4768 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4769 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4770 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4771}
4772
4773/// Match v(f)slide1up/down idioms. These operations involve sliding
4774/// N-1 elements to make room for an inserted scalar at one end.
4776 SDValue V1, SDValue V2,
4777 ArrayRef<int> Mask,
4778 const RISCVSubtarget &Subtarget,
4779 SelectionDAG &DAG) {
4780 bool OpsSwapped = false;
4781 if (!isa<BuildVectorSDNode>(V1)) {
4782 if (!isa<BuildVectorSDNode>(V2))
4783 return SDValue();
4784 std::swap(V1, V2);
4785 OpsSwapped = true;
4786 }
4787 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4788 if (!Splat)
4789 return SDValue();
4790
4791 // Return true if the mask could describe a slide of Mask.size() - 1
4792 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4793 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4794 const unsigned S = (Offset > 0) ? 0 : -Offset;
4795 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4796 for (unsigned i = S; i != E; ++i)
4797 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4798 return false;
4799 return true;
4800 };
4801
4802 const unsigned NumElts = VT.getVectorNumElements();
4803 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4804 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4805 return SDValue();
4806
4807 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4808 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4809 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4810 return SDValue();
4811
4812 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4813 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4814
4815 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4816 // vslide1{down,up}.vx instead.
4817 if (VT.getVectorElementType() == MVT::bf16 ||
4818 (VT.getVectorElementType() == MVT::f16 &&
4819 !Subtarget.hasVInstructionsF16())) {
4820 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4821 Splat =
4822 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4823 V2 = DAG.getBitcast(
4824 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4825 SDValue Vec = DAG.getNode(
4827 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4828 Vec = DAG.getBitcast(ContainerVT, Vec);
4829 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4830 }
4831
4832 auto OpCode = IsVSlidedown ?
4835 if (!VT.isFloatingPoint())
4836 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4837 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4838 DAG.getUNDEF(ContainerVT),
4839 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4840 Splat, TrueMask, VL);
4841 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4842}
4843
4844// Match a mask which "spreads" the leading elements of a vector evenly
4845// across the result. Factor is the spread amount, and Index is the
4846// offset applied. (on success, Index < Factor) This is the inverse
4847// of a deinterleave with the same Factor and Index. This is analogous
4848// to an interleave, except that all but one lane is undef.
4849static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4850 SmallVector<bool> LaneIsUndef(Factor, true);
4851 for (unsigned i = 0; i < Mask.size(); i++)
4852 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4853
4854 bool Found = false;
4855 for (unsigned i = 0; i < Factor; i++) {
4856 if (LaneIsUndef[i])
4857 continue;
4858 if (Found)
4859 return false;
4860 Index = i;
4861 Found = true;
4862 }
4863 if (!Found)
4864 return false;
4865
4866 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4867 unsigned j = i * Factor + Index;
4868 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4869 return false;
4870 }
4871 return true;
4872}
4873
4874// Given a vector a, b, c, d return a vector Factor times longer
4875// with Factor-1 undef's between elements. Ex:
4876// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4877// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4878static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4879 const SDLoc &DL, SelectionDAG &DAG) {
4880
4881 MVT VT = V.getSimpleValueType();
4882 unsigned EltBits = VT.getScalarSizeInBits();
4884 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4885
4886 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4887
4888 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4889 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4890 // allow the SHL to fold away if Index is 0.
4891 if (Index != 0)
4892 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4893 DAG.getConstant(EltBits * Index, DL, WideVT));
4894 // Make sure to use original element type
4896 EC.multiplyCoefficientBy(Factor));
4897 return DAG.getBitcast(ResultVT, Result);
4898}
4899
4900// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4901// to create an interleaved vector of <[vscale x] n*2 x ty>.
4902// This requires that the size of ty is less than the subtarget's maximum ELEN.
4904 const SDLoc &DL, SelectionDAG &DAG,
4905 const RISCVSubtarget &Subtarget) {
4906
4907 // FIXME: Not only does this optimize the code, it fixes some correctness
4908 // issues because MIR does not have freeze.
4909 if (EvenV.isUndef())
4910 return getWideningSpread(OddV, 2, 1, DL, DAG);
4911 if (OddV.isUndef())
4912 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4913
4914 MVT VecVT = EvenV.getSimpleValueType();
4915 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4916 // Convert fixed vectors to scalable if needed
4917 if (VecContainerVT.isFixedLengthVector()) {
4918 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4919 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4920 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4921 }
4922
4923 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4924
4925 // We're working with a vector of the same size as the resulting
4926 // interleaved vector, but with half the number of elements and
4927 // twice the SEW (Hence the restriction on not using the maximum
4928 // ELEN)
4929 MVT WideVT =
4931 VecVT.getVectorElementCount());
4932 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4933 if (WideContainerVT.isFixedLengthVector())
4934 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4935
4936 // Bitcast the input vectors to integers in case they are FP
4937 VecContainerVT = VecContainerVT.changeTypeToInteger();
4938 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4939 OddV = DAG.getBitcast(VecContainerVT, OddV);
4940
4941 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4942 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4943
4944 SDValue Interleaved;
4945 if (Subtarget.hasStdExtZvbb()) {
4946 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4947 SDValue OffsetVec =
4948 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4949 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4950 OffsetVec, Passthru, Mask, VL);
4951 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4952 Interleaved, EvenV, Passthru, Mask, VL);
4953 } else {
4954 // FIXME: We should freeze the odd vector here. We already handled the case
4955 // of provably undef/poison above.
4956
4957 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4958 // vwaddu.vv
4959 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4960 OddV, Passthru, Mask, VL);
4961
4962 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4963 SDValue AllOnesVec = DAG.getSplatVector(
4964 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4965 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4966 OddV, AllOnesVec, Passthru, Mask, VL);
4967
4968 // Add the two together so we get
4969 // (OddV * 0xff...ff) + (OddV + EvenV)
4970 // = (OddV * 0x100...00) + EvenV
4971 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4972 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4973 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4974 Interleaved, OddsMul, Passthru, Mask, VL);
4975 }
4976
4977 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4978 MVT ResultContainerVT = MVT::getVectorVT(
4979 VecVT.getVectorElementType(), // Make sure to use original type
4980 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4981 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4982
4983 // Convert back to a fixed vector if needed
4984 MVT ResultVT =
4987 if (ResultVT.isFixedLengthVector())
4988 Interleaved =
4989 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4990
4991 return Interleaved;
4992}
4993
4994// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4995// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4997 SelectionDAG &DAG,
4998 const RISCVSubtarget &Subtarget) {
4999 SDLoc DL(SVN);
5000 MVT VT = SVN->getSimpleValueType(0);
5001 SDValue V = SVN->getOperand(0);
5002 unsigned NumElts = VT.getVectorNumElements();
5003
5004 assert(VT.getVectorElementType() == MVT::i1);
5005
5007 SVN->getMask().size()) ||
5008 !SVN->getOperand(1).isUndef())
5009 return SDValue();
5010
5011 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5012 EVT ViaVT = EVT::getVectorVT(
5013 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5014 EVT ViaBitVT =
5015 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5016
5017 // If we don't have zvbb or the larger element type > ELEN, the operation will
5018 // be illegal.
5020 ViaVT) ||
5021 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5022 return SDValue();
5023
5024 // If the bit vector doesn't fit exactly into the larger element type, we need
5025 // to insert it into the larger vector and then shift up the reversed bits
5026 // afterwards to get rid of the gap introduced.
5027 if (ViaEltSize > NumElts)
5028 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5029 V, DAG.getVectorIdxConstant(0, DL));
5030
5031 SDValue Res =
5032 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5033
5034 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5035 // element type.
5036 if (ViaEltSize > NumElts)
5037 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5038 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5039
5040 Res = DAG.getBitcast(ViaBitVT, Res);
5041
5042 if (ViaEltSize > NumElts)
5043 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5044 DAG.getVectorIdxConstant(0, DL));
5045 return Res;
5046}
5047
5049 SelectionDAG &DAG,
5050 const RISCVSubtarget &Subtarget,
5051 MVT &RotateVT, unsigned &RotateAmt) {
5052 SDLoc DL(SVN);
5053
5054 EVT VT = SVN->getValueType(0);
5055 unsigned NumElts = VT.getVectorNumElements();
5056 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5057 unsigned NumSubElts;
5058 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5059 NumElts, NumSubElts, RotateAmt))
5060 return false;
5061 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5062 NumElts / NumSubElts);
5063
5064 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5065 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5066}
5067
5068// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5069// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5070// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5072 SelectionDAG &DAG,
5073 const RISCVSubtarget &Subtarget) {
5074 SDLoc DL(SVN);
5075
5076 EVT VT = SVN->getValueType(0);
5077 unsigned RotateAmt;
5078 MVT RotateVT;
5079 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5080 return SDValue();
5081
5082 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5083
5084 SDValue Rotate;
5085 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5086 // so canonicalize to vrev8.
5087 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5088 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5089 else
5090 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5091 DAG.getConstant(RotateAmt, DL, RotateVT));
5092
5093 return DAG.getBitcast(VT, Rotate);
5094}
5095
5096// If compiling with an exactly known VLEN, see if we can split a
5097// shuffle on m2 or larger into a small number of m1 sized shuffles
5098// which write each destination registers exactly once.
5100 SelectionDAG &DAG,
5101 const RISCVSubtarget &Subtarget) {
5102 SDLoc DL(SVN);
5103 MVT VT = SVN->getSimpleValueType(0);
5104 SDValue V1 = SVN->getOperand(0);
5105 SDValue V2 = SVN->getOperand(1);
5106 ArrayRef<int> Mask = SVN->getMask();
5107
5108 // If we don't know exact data layout, not much we can do. If this
5109 // is already m1 or smaller, no point in splitting further.
5110 const auto VLen = Subtarget.getRealVLen();
5111 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5112 return SDValue();
5113
5114 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5115 // expansion for.
5116 unsigned RotateAmt;
5117 MVT RotateVT;
5118 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5119 return SDValue();
5120
5121 MVT ElemVT = VT.getVectorElementType();
5122 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5123
5124 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5125 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5126 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5127 assert(M1VT == getLMUL1VT(M1VT));
5128 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5129 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5130 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5131 unsigned NumOfDestRegs = NumElts / NumOpElts;
5132 // The following semantically builds up a fixed length concat_vector
5133 // of the component shuffle_vectors. We eagerly lower to scalable here
5134 // to avoid DAG combining it back to a large shuffle_vector again.
5135 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5136 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5138 Operands;
5140 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5141 [&]() { Operands.emplace_back(); },
5142 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5143 Operands.emplace_back().emplace_back(
5144 SrcVecIdx, UINT_MAX,
5145 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5146 },
5147 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5148 if (NewReg)
5149 Operands.emplace_back();
5150 Operands.back().emplace_back(
5151 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5152 });
5153 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5154 // Note: check that we do not emit too many shuffles here to prevent code
5155 // size explosion.
5156 // TODO: investigate, if it can be improved by extra analysis of the masks to
5157 // check if the code is more profitable.
5158 unsigned NumShuffles = std::accumulate(
5159 Operands.begin(), Operands.end(), 0u,
5160 [&](unsigned N,
5161 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5162 if (Data.empty())
5163 return N;
5164 N += Data.size();
5165 for (const auto &P : Data) {
5166 unsigned Idx2 = std::get<1>(P);
5167 ArrayRef<int> Mask = std::get<2>(P);
5168 if (Idx2 != UINT_MAX)
5169 ++N;
5170 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5171 --N;
5172 }
5173 return N;
5174 });
5175 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5176 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5177 return SDValue();
5178 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5179 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5180 DAG.getVectorIdxConstant(ExtractIdx, DL));
5181 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5182 return SubVec;
5183 };
5184 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5186 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5187 return SubVec;
5188 };
5189 SDValue Vec = DAG.getUNDEF(ContainerVT);
5190 for (auto [I, Data] : enumerate(Operands)) {
5191 if (Data.empty())
5192 continue;
5194 for (unsigned I : seq<unsigned>(Data.size())) {
5195 const auto &[Idx1, Idx2, _] = Data[I];
5196 if (Values.contains(Idx1)) {
5197 assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5198 "Expected both indices to be extracted already.");
5199 break;
5200 }
5201 SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5202 (Idx1 % NumOfSrcRegs) * NumOpElts);
5203 Values[Idx1] = V;
5204 if (Idx2 != UINT_MAX)
5205 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5206 (Idx2 % NumOfSrcRegs) * NumOpElts);
5207 }
5208 SDValue V;
5209 for (const auto &[Idx1, Idx2, Mask] : Data) {
5210 SDValue V1 = Values.at(Idx1);
5211 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5212 V = PerformShuffle(V1, V2, Mask);
5213 Values[Idx1] = V;
5214 }
5215
5216 unsigned InsertIdx = I * NumOpElts;
5217 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5218 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5219 DAG.getVectorIdxConstant(InsertIdx, DL));
5220 }
5221 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5222}
5223
5224// Matches a subset of compress masks with a contiguous prefix of output
5225// elements. This could be extended to allow gaps by deciding which
5226// source elements to spuriously demand.
5228 int Last = -1;
5229 bool SawUndef = false;
5230 for (unsigned i = 0; i < Mask.size(); i++) {
5231 if (Mask[i] == -1) {
5232 SawUndef = true;
5233 continue;
5234 }
5235 if (SawUndef)
5236 return false;
5237 if (i > (unsigned)Mask[i])
5238 return false;
5239 if (Mask[i] <= Last)
5240 return false;
5241 Last = Mask[i];
5242 }
5243 return true;
5244}
5245
5246/// Given a shuffle where the indices are disjoint between the two sources,
5247/// e.g.:
5248///
5249/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5250///
5251/// Merge the two sources into one and do a single source shuffle:
5252///
5253/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5254/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5255///
5256/// A vselect will either be merged into a masked instruction or be lowered as a
5257/// vmerge.vvm, which is cheaper than a vrgather.vv.
5259 SelectionDAG &DAG,
5260 const RISCVSubtarget &Subtarget) {
5261 MVT VT = SVN->getSimpleValueType(0);
5262 MVT XLenVT = Subtarget.getXLenVT();
5263 SDLoc DL(SVN);
5264
5265 const ArrayRef<int> Mask = SVN->getMask();
5266
5267 // Work out which source each lane will come from.
5268 SmallVector<int, 16> Srcs(Mask.size(), -1);
5269
5270 for (int Idx : Mask) {
5271 if (Idx == -1)
5272 continue;
5273 unsigned SrcIdx = Idx % Mask.size();
5274 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5275 if (Srcs[SrcIdx] == -1)
5276 // Mark this source as using this lane.
5277 Srcs[SrcIdx] = Src;
5278 else if (Srcs[SrcIdx] != Src)
5279 // The other source is using this lane: not disjoint.
5280 return SDValue();
5281 }
5282
5283 SmallVector<SDValue> SelectMaskVals;
5284 for (int Lane : Srcs) {
5285 if (Lane == -1)
5286 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5287 else
5288 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5289 }
5290 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5291 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5292 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5293 SVN->getOperand(0), SVN->getOperand(1));
5294
5295 // Move all indices relative to the first source.
5296 SmallVector<int> NewMask(Mask.size());
5297 for (unsigned I = 0; I < Mask.size(); I++) {
5298 if (Mask[I] == -1)
5299 NewMask[I] = -1;
5300 else
5301 NewMask[I] = Mask[I] % Mask.size();
5302 }
5303
5304 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5305}
5306
5307/// Try to widen element type to get a new mask value for a better permutation
5308/// sequence. This doesn't try to inspect the widened mask for profitability;
5309/// we speculate the widened form is equal or better. This has the effect of
5310/// reducing mask constant sizes - allowing cheaper materialization sequences
5311/// - and index sequence sizes - reducing register pressure and materialization
5312/// cost, at the cost of (possibly) an extra VTYPE toggle.
5314 SDLoc DL(Op);
5315 MVT VT = Op.getSimpleValueType();
5316 MVT ScalarVT = VT.getVectorElementType();
5317 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5318 SDValue V0 = Op.getOperand(0);
5319 SDValue V1 = Op.getOperand(1);
5320 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5321
5322 // Avoid wasted work leading to isTypeLegal check failing below
5323 if (ElementSize > 32)
5324 return SDValue();
5325
5326 SmallVector<int, 8> NewMask;
5327 if (!widenShuffleMaskElts(Mask, NewMask))
5328 return SDValue();
5329
5330 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5331 : MVT::getIntegerVT(ElementSize * 2);
5332 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5333 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5334 return SDValue();
5335 V0 = DAG.getBitcast(NewVT, V0);
5336 V1 = DAG.getBitcast(NewVT, V1);
5337 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5338}
5339
5341 const RISCVSubtarget &Subtarget) {
5342 SDValue V1 = Op.getOperand(0);
5343 SDValue V2 = Op.getOperand(1);
5344 SDLoc DL(Op);
5345 MVT XLenVT = Subtarget.getXLenVT();
5346 MVT VT = Op.getSimpleValueType();
5347 unsigned NumElts = VT.getVectorNumElements();
5348 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5349
5350 if (VT.getVectorElementType() == MVT::i1) {
5351 // Lower to a vror.vi of a larger element type if possible before we promote
5352 // i1s to i8s.
5353 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5354 return V;
5355 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5356 return V;
5357
5358 // Promote i1 shuffle to i8 shuffle.
5359 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5360 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5361 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5362 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5363 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5364 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5365 ISD::SETNE);
5366 }
5367
5368 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5369
5370 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5371
5372 if (SVN->isSplat()) {
5373 const int Lane = SVN->getSplatIndex();
5374 if (Lane >= 0) {
5375 MVT SVT = VT.getVectorElementType();
5376
5377 // Turn splatted vector load into a strided load with an X0 stride.
5378 SDValue V = V1;
5379 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5380 // with undef.
5381 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5382 int Offset = Lane;
5383 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5384 int OpElements =
5385 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5386 V = V.getOperand(Offset / OpElements);
5387 Offset %= OpElements;
5388 }
5389
5390 // We need to ensure the load isn't atomic or volatile.
5391 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5392 auto *Ld = cast<LoadSDNode>(V);
5393 Offset *= SVT.getStoreSize();
5394 SDValue NewAddr = DAG.getMemBasePlusOffset(
5395 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5396
5397 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5398 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5399 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5400 SDValue IntID =
5401 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5402 SDValue Ops[] = {Ld->getChain(),
5403 IntID,
5404 DAG.getUNDEF(ContainerVT),
5405 NewAddr,
5406 DAG.getRegister(RISCV::X0, XLenVT),
5407 VL};
5408 SDValue NewLoad = DAG.getMemIntrinsicNode(
5409 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5411 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5412 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5413 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5414 }
5415
5416 MVT SplatVT = ContainerVT;
5417
5418 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5419 if (SVT == MVT::bf16 ||
5420 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5421 SVT = MVT::i16;
5422 SplatVT = ContainerVT.changeVectorElementType(SVT);
5423 }
5424
5425 // Otherwise use a scalar load and splat. This will give the best
5426 // opportunity to fold a splat into the operation. ISel can turn it into
5427 // the x0 strided load if we aren't able to fold away the select.
5428 if (SVT.isFloatingPoint())
5429 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5430 Ld->getPointerInfo().getWithOffset(Offset),
5431 Ld->getOriginalAlign(),
5432 Ld->getMemOperand()->getFlags());
5433 else
5434 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5435 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5436 Ld->getOriginalAlign(),
5437 Ld->getMemOperand()->getFlags());
5439
5440 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5442 SDValue Splat =
5443 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5444 Splat = DAG.getBitcast(ContainerVT, Splat);
5445 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5446 }
5447
5448 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5449 assert(Lane < (int)NumElts && "Unexpected lane!");
5450 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5451 V1, DAG.getConstant(Lane, DL, XLenVT),
5452 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5453 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5454 }
5455 }
5456
5457 // For exact VLEN m2 or greater, try to split to m1 operations if we
5458 // can split cleanly.
5459 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5460 return V;
5461
5462 ArrayRef<int> Mask = SVN->getMask();
5463
5464 if (SDValue V =
5465 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5466 return V;
5467
5468 if (SDValue V =
5469 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5470 return V;
5471
5472 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5473 // available.
5474 if (Subtarget.hasStdExtZvkb())
5475 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5476 return V;
5477
5478 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5479 // be undef which can be handled with a single SLIDEDOWN/UP.
5480 int LoSrc, HiSrc;
5481 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5482 if (Rotation > 0) {
5483 SDValue LoV, HiV;
5484 if (LoSrc >= 0) {
5485 LoV = LoSrc == 0 ? V1 : V2;
5486 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5487 }
5488 if (HiSrc >= 0) {
5489 HiV = HiSrc == 0 ? V1 : V2;
5490 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5491 }
5492
5493 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5494 // to slide LoV up by (NumElts - Rotation).
5495 unsigned InvRotate = NumElts - Rotation;
5496
5497 SDValue Res = DAG.getUNDEF(ContainerVT);
5498 if (HiV) {
5499 // Even though we could use a smaller VL, don't to avoid a vsetivli
5500 // toggle.
5501 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5502 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5503 }
5504 if (LoV)
5505 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5506 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5508
5509 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5510 }
5511
5512 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5513 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5514
5515 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5516 // use shift and truncate to perform the shuffle.
5517 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5518 // shift-and-trunc reducing total cost for everything except an mf8 result.
5519 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5520 // to do the entire operation.
5521 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5522 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5523 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5524 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5525 unsigned Index = 0;
5526 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5527 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5528 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5529 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5530 }
5531 }
5532 }
5533
5534 if (SDValue V =
5535 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5536 return V;
5537
5538 // Detect an interleave shuffle and lower to
5539 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5540 int EvenSrc, OddSrc;
5541 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5542 // Extract the halves of the vectors.
5543 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5544
5545 // Recognize if one half is actually undef; the matching above will
5546 // otherwise reuse the even stream for the undef one. This improves
5547 // spread(2) shuffles.
5548 bool LaneIsUndef[2] = { true, true};
5549 for (unsigned i = 0; i < Mask.size(); i++)
5550 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5551
5552 int Size = Mask.size();
5553 SDValue EvenV, OddV;
5554 if (LaneIsUndef[0]) {
5555 EvenV = DAG.getUNDEF(HalfVT);
5556 } else {
5557 assert(EvenSrc >= 0 && "Undef source?");
5558 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5559 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5560 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5561 }
5562
5563 if (LaneIsUndef[1]) {
5564 OddV = DAG.getUNDEF(HalfVT);
5565 } else {
5566 assert(OddSrc >= 0 && "Undef source?");
5567 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5568 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5569 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5570 }
5571
5572 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5573 }
5574
5575
5576 // Handle any remaining single source shuffles
5577 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5578 if (V2.isUndef()) {
5579 // We might be able to express the shuffle as a bitrotate. But even if we
5580 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5581 // shifts and a vor will have a higher throughput than a vrgather.
5582 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5583 return V;
5584
5585 // Before hitting generic lowering fallbacks, try to widen the mask
5586 // to a wider SEW.
5587 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5588 return V;
5589
5590 // Can we generate a vcompress instead of a vrgather? These scale better
5591 // at high LMUL, at the cost of not being able to fold a following select
5592 // into them. The mask constants are also smaller than the index vector
5593 // constants, and thus easier to materialize.
5594 if (isCompressMask(Mask)) {
5595 SmallVector<SDValue> MaskVals(NumElts,
5596 DAG.getConstant(false, DL, XLenVT));
5597 for (auto Idx : Mask) {
5598 if (Idx == -1)
5599 break;
5600 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5601 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5602 }
5603 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5604 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5605 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5606 DAG.getUNDEF(VT));
5607 }
5608
5609 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5610 // is fully covered in interleave(2) above, so it is ignored here.
5611 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5612 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5613 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5614 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5615 unsigned Index;
5616 if (isSpreadMask(Mask, Factor, Index)) {
5617 MVT NarrowVT =
5618 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5619 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5620 DAG.getVectorIdxConstant(0, DL));
5621 return getWideningSpread(Src, Factor, Index, DL, DAG);
5622 }
5623 }
5624 }
5625
5626 if (VT.getScalarSizeInBits() == 8 &&
5627 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5628 // On such a vector we're unable to use i8 as the index type.
5629 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5630 // may involve vector splitting if we're already at LMUL=8, or our
5631 // user-supplied maximum fixed-length LMUL.
5632 return SDValue();
5633 }
5634
5635 // Base case for the two operand recursion below - handle the worst case
5636 // single source shuffle.
5637 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5638 MVT IndexVT = VT.changeTypeToInteger();
5639 // Since we can't introduce illegal index types at this stage, use i16 and
5640 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5641 // than XLenVT.
5642 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5643 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5644 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5645 }
5646
5647 // If the mask allows, we can do all the index computation in 16 bits. This
5648 // requires less work and less register pressure at high LMUL, and creates
5649 // smaller constants which may be cheaper to materialize.
5650 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5651 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5652 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5653 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5654 }
5655
5656 MVT IndexContainerVT =
5657 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5658
5659 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5660 SmallVector<SDValue> GatherIndicesLHS;
5661 for (int MaskIndex : Mask) {
5662 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5663 GatherIndicesLHS.push_back(IsLHSIndex
5664 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5665 : DAG.getUNDEF(XLenVT));
5666 }
5667 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5668 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5669 Subtarget);
5670 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5671 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5672 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5673 }
5674
5675 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5676 // merged with a second vrgather.
5677 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5678
5679 // Now construct the mask that will be used by the blended vrgather operation.
5680 // Construct the appropriate indices into each vector.
5681 for (int MaskIndex : Mask) {
5682 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5683 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5684 ? MaskIndex : -1);
5685 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5686 }
5687
5688 // If the mask indices are disjoint between the two sources, we can lower it
5689 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5690 // operands may end up being lowered to something cheaper than a vrgather.vv.
5691 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5692 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5693 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5694 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5695 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5696 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5697 return V;
5698
5699 // Before hitting generic lowering fallbacks, try to widen the mask
5700 // to a wider SEW.
5701 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5702 return V;
5703
5704 // Try to pick a profitable operand order.
5705 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5706 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5707
5708 // Recursively invoke lowering for each operand if we had two
5709 // independent single source shuffles, and then combine the result via a
5710 // vselect. Note that the vselect will likely be folded back into the
5711 // second permute (vrgather, or other) by the post-isel combine.
5712 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5713 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5714
5715 SmallVector<SDValue> MaskVals;
5716 for (int MaskIndex : Mask) {
5717 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5718 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5719 }
5720
5721 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5722 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5723 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5724
5725 if (SwapOps)
5726 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5727 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5728}
5729
5731 // Support splats for any type. These should type legalize well.
5732 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5733 return true;
5734
5735 // Only support legal VTs for other shuffles for now.
5736 if (!isTypeLegal(VT))
5737 return false;
5738
5739 MVT SVT = VT.getSimpleVT();
5740
5741 // Not for i1 vectors.
5742 if (SVT.getScalarType() == MVT::i1)
5743 return false;
5744
5745 int Dummy1, Dummy2;
5746 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5747 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5748}
5749
5750// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5751// the exponent.
5752SDValue
5753RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5754 SelectionDAG &DAG) const {
5755 MVT VT = Op.getSimpleValueType();
5756 unsigned EltSize = VT.getScalarSizeInBits();
5757 SDValue Src = Op.getOperand(0);
5758 SDLoc DL(Op);
5759 MVT ContainerVT = VT;
5760
5761 SDValue Mask, VL;
5762 if (Op->isVPOpcode()) {
5763 Mask = Op.getOperand(1);
5764 if (VT.isFixedLengthVector())
5765 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5766 Subtarget);
5767 VL = Op.getOperand(2);
5768 }
5769
5770 // We choose FP type that can represent the value if possible. Otherwise, we
5771 // use rounding to zero conversion for correct exponent of the result.
5772 // TODO: Use f16 for i8 when possible?
5773 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5774 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5775 FloatEltVT = MVT::f32;
5776 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5777
5778 // Legal types should have been checked in the RISCVTargetLowering
5779 // constructor.
5780 // TODO: Splitting may make sense in some cases.
5781 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5782 "Expected legal float type!");
5783
5784 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5785 // The trailing zero count is equal to log2 of this single bit value.
5786 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5787 SDValue Neg = DAG.getNegative(Src, DL, VT);
5788 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5789 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5790 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5791 Src, Mask, VL);
5792 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5793 }
5794
5795 // We have a legal FP type, convert to it.
5796 SDValue FloatVal;
5797 if (FloatVT.bitsGT(VT)) {
5798 if (Op->isVPOpcode())
5799 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5800 else
5801 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5802 } else {
5803 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5804 if (VT.isFixedLengthVector()) {
5805 ContainerVT = getContainerForFixedLengthVector(VT);
5806 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5807 }
5808 if (!Op->isVPOpcode())
5809 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5810 SDValue RTZRM =
5812 MVT ContainerFloatVT =
5813 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5814 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5815 Src, Mask, RTZRM, VL);
5816 if (VT.isFixedLengthVector())
5817 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5818 }
5819 // Bitcast to integer and shift the exponent to the LSB.
5820 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5821 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5822 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5823
5824 SDValue Exp;
5825 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5826 if (Op->isVPOpcode()) {
5827 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5828 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5829 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5830 } else {
5831 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5832 DAG.getConstant(ShiftAmt, DL, IntVT));
5833 if (IntVT.bitsLT(VT))
5834 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5835 else if (IntVT.bitsGT(VT))
5836 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5837 }
5838
5839 // The exponent contains log2 of the value in biased form.
5840 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5841 // For trailing zeros, we just need to subtract the bias.
5842 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5843 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5844 DAG.getConstant(ExponentBias, DL, VT));
5845 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5846 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5847 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5848
5849 // For leading zeros, we need to remove the bias and convert from log2 to
5850 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5851 unsigned Adjust = ExponentBias + (EltSize - 1);
5852 SDValue Res;
5853 if (Op->isVPOpcode())
5854 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5855 Mask, VL);
5856 else
5857 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5858
5859 // The above result with zero input equals to Adjust which is greater than
5860 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5861 if (Op.getOpcode() == ISD::CTLZ)
5862 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5863 else if (Op.getOpcode() == ISD::VP_CTLZ)
5864 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5865 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5866 return Res;
5867}
5868
5869SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5870 SelectionDAG &DAG) const {
5871 SDLoc DL(Op);
5872 MVT XLenVT = Subtarget.getXLenVT();
5873 SDValue Source = Op->getOperand(0);
5874 MVT SrcVT = Source.getSimpleValueType();
5875 SDValue Mask = Op->getOperand(1);
5876 SDValue EVL = Op->getOperand(2);
5877
5878 if (SrcVT.isFixedLengthVector()) {
5879 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5880 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5881 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5882 Subtarget);
5883 SrcVT = ContainerVT;
5884 }
5885
5886 // Convert to boolean vector.
5887 if (SrcVT.getScalarType() != MVT::i1) {
5888 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5889 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5890 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5891 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5892 DAG.getUNDEF(SrcVT), Mask, EVL});
5893 }
5894
5895 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5896 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5897 // In this case, we can interpret poison as -1, so nothing to do further.
5898 return Res;
5899
5900 // Convert -1 to VL.
5901 SDValue SetCC =
5902 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5903 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5904 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5905}
5906
5907// While RVV has alignment restrictions, we should always be able to load as a
5908// legal equivalently-sized byte-typed vector instead. This method is
5909// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5910// the load is already correctly-aligned, it returns SDValue().
5911SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5912 SelectionDAG &DAG) const {
5913 auto *Load = cast<LoadSDNode>(Op);
5914 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5915
5917 Load->getMemoryVT(),
5918 *Load->getMemOperand()))
5919 return SDValue();
5920
5921 SDLoc DL(Op);
5922 MVT VT = Op.getSimpleValueType();
5923 unsigned EltSizeBits = VT.getScalarSizeInBits();
5924 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5925 "Unexpected unaligned RVV load type");
5926 MVT NewVT =
5927 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5928 assert(NewVT.isValid() &&
5929 "Expecting equally-sized RVV vector types to be legal");
5930 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5931 Load->getPointerInfo(), Load->getOriginalAlign(),
5932 Load->getMemOperand()->getFlags());
5933 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5934}
5935
5936// While RVV has alignment restrictions, we should always be able to store as a
5937// legal equivalently-sized byte-typed vector instead. This method is
5938// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5939// returns SDValue() if the store is already correctly aligned.
5940SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5941 SelectionDAG &DAG) const {
5942 auto *Store = cast<StoreSDNode>(Op);
5943 assert(Store && Store->getValue().getValueType().isVector() &&
5944 "Expected vector store");
5945
5947 Store->getMemoryVT(),
5948 *Store->getMemOperand()))
5949 return SDValue();
5950
5951 SDLoc DL(Op);
5952 SDValue StoredVal = Store->getValue();
5953 MVT VT = StoredVal.getSimpleValueType();
5954 unsigned EltSizeBits = VT.getScalarSizeInBits();
5955 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5956 "Unexpected unaligned RVV store type");
5957 MVT NewVT =
5958 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5959 assert(NewVT.isValid() &&
5960 "Expecting equally-sized RVV vector types to be legal");
5961 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5962 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5963 Store->getPointerInfo(), Store->getOriginalAlign(),
5964 Store->getMemOperand()->getFlags());
5965}
5966
5968 const RISCVSubtarget &Subtarget) {
5969 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5970
5971 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5972
5973 // All simm32 constants should be handled by isel.
5974 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5975 // this check redundant, but small immediates are common so this check
5976 // should have better compile time.
5977 if (isInt<32>(Imm))
5978 return Op;
5979
5980 // We only need to cost the immediate, if constant pool lowering is enabled.
5981 if (!Subtarget.useConstantPoolForLargeInts())
5982 return Op;
5983
5985 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5986 return Op;
5987
5988 // Optimizations below are disabled for opt size. If we're optimizing for
5989 // size, use a constant pool.
5990 if (DAG.shouldOptForSize())
5991 return SDValue();
5992
5993 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5994 // that if it will avoid a constant pool.
5995 // It will require an extra temporary register though.
5996 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5997 // low and high 32 bits are the same and bit 31 and 63 are set.
5998 unsigned ShiftAmt, AddOpc;
5999 RISCVMatInt::InstSeq SeqLo =
6000 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6001 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6002 return Op;
6003
6004 return SDValue();
6005}
6006
6007SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6008 SelectionDAG &DAG) const {
6009 MVT VT = Op.getSimpleValueType();
6010 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6011
6012 // Can this constant be selected by a Zfa FLI instruction?
6013 bool Negate = false;
6014 int Index = getLegalZfaFPImm(Imm, VT);
6015
6016 // If the constant is negative, try negating.
6017 if (Index < 0 && Imm.isNegative()) {
6018 Index = getLegalZfaFPImm(-Imm, VT);
6019 Negate = true;
6020 }
6021
6022 // If we couldn't find a FLI lowering, fall back to generic code.
6023 if (Index < 0)
6024 return SDValue();
6025
6026 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6027 SDLoc DL(Op);
6028 SDValue Const =
6029 DAG.getNode(RISCVISD::FLI, DL, VT,
6030 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6031 if (!Negate)
6032 return Const;
6033
6034 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6035}
6036
6038 const RISCVSubtarget &Subtarget) {
6039 SDLoc dl(Op);
6040 AtomicOrdering FenceOrdering =
6041 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6042 SyncScope::ID FenceSSID =
6043 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6044
6045 if (Subtarget.hasStdExtZtso()) {
6046 // The only fence that needs an instruction is a sequentially-consistent
6047 // cross-thread fence.
6048 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6049 FenceSSID == SyncScope::System)
6050 return Op;
6051
6052 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6053 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6054 }
6055
6056 // singlethread fences only synchronize with signal handlers on the same
6057 // thread and thus only need to preserve instruction order, not actually
6058 // enforce memory ordering.
6059 if (FenceSSID == SyncScope::SingleThread)
6060 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6061 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6062
6063 return Op;
6064}
6065
6066SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6067 SelectionDAG &DAG) const {
6068 SDLoc DL(Op);
6069 MVT VT = Op.getSimpleValueType();
6070 MVT XLenVT = Subtarget.getXLenVT();
6071 unsigned Check = Op.getConstantOperandVal(1);
6072 unsigned TDCMask = 0;
6073 if (Check & fcSNan)
6074 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6075 if (Check & fcQNan)
6076 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6077 if (Check & fcPosInf)
6079 if (Check & fcNegInf)
6081 if (Check & fcPosNormal)
6083 if (Check & fcNegNormal)
6085 if (Check & fcPosSubnormal)
6087 if (Check & fcNegSubnormal)
6089 if (Check & fcPosZero)
6090 TDCMask |= RISCV::FPMASK_Positive_Zero;
6091 if (Check & fcNegZero)
6092 TDCMask |= RISCV::FPMASK_Negative_Zero;
6093
6094 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6095
6096 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6097
6098 if (VT.isVector()) {
6099 SDValue Op0 = Op.getOperand(0);
6100 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6101
6102 if (VT.isScalableVector()) {
6104 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6105 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6106 Mask = Op.getOperand(2);
6107 VL = Op.getOperand(3);
6108 }
6109 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6110 VL, Op->getFlags());
6111 if (IsOneBitMask)
6112 return DAG.getSetCC(DL, VT, FPCLASS,
6113 DAG.getConstant(TDCMask, DL, DstVT),
6115 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6116 DAG.getConstant(TDCMask, DL, DstVT));
6117 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6118 ISD::SETNE);
6119 }
6120
6121 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6122 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6123 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6124 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6125 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6126 Mask = Op.getOperand(2);
6127 MVT MaskContainerVT =
6128 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6129 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6130 VL = Op.getOperand(3);
6131 }
6132 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6133
6134 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6135 Mask, VL, Op->getFlags());
6136
6137 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6138 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6139 if (IsOneBitMask) {
6140 SDValue VMSEQ =
6141 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6142 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6143 DAG.getUNDEF(ContainerVT), Mask, VL});
6144 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6145 }
6146 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6147 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6148
6149 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6150 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6151 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6152
6153 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6154 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6155 DAG.getUNDEF(ContainerVT), Mask, VL});
6156 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6157 }
6158
6159 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6160 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6161 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6163 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6164}
6165
6166// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6167// operations propagate nans.
6169 const RISCVSubtarget &Subtarget) {
6170 SDLoc DL(Op);
6171 MVT VT = Op.getSimpleValueType();
6172
6173 SDValue X = Op.getOperand(0);
6174 SDValue Y = Op.getOperand(1);
6175
6176 if (!VT.isVector()) {
6177 MVT XLenVT = Subtarget.getXLenVT();
6178
6179 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6180 // ensures that when one input is a nan, the other will also be a nan
6181 // allowing the nan to propagate. If both inputs are nan, this will swap the
6182 // inputs which is harmless.
6183
6184 SDValue NewY = Y;
6185 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6186 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6187 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6188 }
6189
6190 SDValue NewX = X;
6191 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6192 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6193 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6194 }
6195
6196 unsigned Opc =
6197 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6198 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6199 }
6200
6201 // Check no NaNs before converting to fixed vector scalable.
6202 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6203 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6204
6205 MVT ContainerVT = VT;
6206 if (VT.isFixedLengthVector()) {
6207 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6208 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6209 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6210 }
6211
6212 SDValue Mask, VL;
6213 if (Op->isVPOpcode()) {
6214 Mask = Op.getOperand(2);
6215 if (VT.isFixedLengthVector())
6216 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6217 Subtarget);
6218 VL = Op.getOperand(3);
6219 } else {
6220 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6221 }
6222
6223 SDValue NewY = Y;
6224 if (!XIsNeverNan) {
6225 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6226 {X, X, DAG.getCondCode(ISD::SETOEQ),
6227 DAG.getUNDEF(ContainerVT), Mask, VL});
6228 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6229 DAG.getUNDEF(ContainerVT), VL);
6230 }
6231
6232 SDValue NewX = X;
6233 if (!YIsNeverNan) {
6234 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6235 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6236 DAG.getUNDEF(ContainerVT), Mask, VL});
6237 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6238 DAG.getUNDEF(ContainerVT), VL);
6239 }
6240
6241 unsigned Opc =
6242 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6245 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6246 DAG.getUNDEF(ContainerVT), Mask, VL);
6247 if (VT.isFixedLengthVector())
6248 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6249 return Res;
6250}
6251
6253 const RISCVSubtarget &Subtarget) {
6254 bool IsFABS = Op.getOpcode() == ISD::FABS;
6255 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6256 "Wrong opcode for lowering FABS or FNEG.");
6257
6258 MVT XLenVT = Subtarget.getXLenVT();
6259 MVT VT = Op.getSimpleValueType();
6260 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6261
6262 SDLoc DL(Op);
6263 SDValue Fmv =
6264 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6265
6266 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6267 Mask = Mask.sext(Subtarget.getXLen());
6268
6269 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6270 SDValue Logic =
6271 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6272 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6273}
6274
6276 const RISCVSubtarget &Subtarget) {
6277 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6278
6279 MVT XLenVT = Subtarget.getXLenVT();
6280 MVT VT = Op.getSimpleValueType();
6281 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6282
6283 SDValue Mag = Op.getOperand(0);
6284 SDValue Sign = Op.getOperand(1);
6285
6286 SDLoc DL(Op);
6287
6288 // Get sign bit into an integer value.
6289 SDValue SignAsInt;
6290 unsigned SignSize = Sign.getValueSizeInBits();
6291 if (SignSize == Subtarget.getXLen()) {
6292 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6293 } else if (SignSize == 16) {
6294 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6295 } else if (SignSize == 32) {
6296 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6297 } else if (SignSize == 64) {
6298 assert(XLenVT == MVT::i32 && "Unexpected type");
6299 // Copy the upper word to integer.
6300 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6301 .getValue(1);
6302 SignSize = 32;
6303 } else
6304 llvm_unreachable("Unexpected sign size");
6305
6306 // Get the signbit at the right position for MagAsInt.
6307 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6308 if (ShiftAmount > 0) {
6309 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6310 DAG.getConstant(ShiftAmount, DL, XLenVT));
6311 } else if (ShiftAmount < 0) {
6312 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6313 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6314 }
6315
6316 // Mask the sign bit and any bits above it. The extra bits will be dropped
6317 // when we convert back to FP.
6318 SDValue SignMask = DAG.getConstant(
6319 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6320 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6321
6322 // Transform Mag value to integer, and clear the sign bit.
6323 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6324 SDValue ClearSignMask = DAG.getConstant(
6325 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6326 SDValue ClearedSign =
6327 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6328
6329 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6331
6332 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6333}
6334
6335/// Get a RISC-V target specified VL op for a given SDNode.
6336static unsigned getRISCVVLOp(SDValue Op) {
6337#define OP_CASE(NODE) \
6338 case ISD::NODE: \
6339 return RISCVISD::NODE##_VL;
6340#define VP_CASE(NODE) \
6341 case ISD::VP_##NODE: \
6342 return RISCVISD::NODE##_VL;
6343 // clang-format off
6344 switch (Op.getOpcode()) {
6345 default:
6346 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6347 OP_CASE(ADD)
6348 OP_CASE(SUB)
6349 OP_CASE(MUL)
6350 OP_CASE(MULHS)
6351 OP_CASE(MULHU)
6352 OP_CASE(SDIV)
6353 OP_CASE(SREM)
6354 OP_CASE(UDIV)
6355 OP_CASE(UREM)
6356 OP_CASE(SHL)
6357 OP_CASE(SRA)
6358 OP_CASE(SRL)
6359 OP_CASE(ROTL)
6360 OP_CASE(ROTR)
6361 OP_CASE(BSWAP)
6362 OP_CASE(CTTZ)
6363 OP_CASE(CTLZ)
6364 OP_CASE(CTPOP)
6365 OP_CASE(BITREVERSE)
6366 OP_CASE(SADDSAT)
6367 OP_CASE(UADDSAT)
6368 OP_CASE(SSUBSAT)
6369 OP_CASE(USUBSAT)
6370 OP_CASE(AVGFLOORS)
6371 OP_CASE(AVGFLOORU)
6372 OP_CASE(AVGCEILS)
6373 OP_CASE(AVGCEILU)
6374 OP_CASE(FADD)
6375 OP_CASE(FSUB)
6376 OP_CASE(FMUL)
6377 OP_CASE(FDIV)
6378 OP_CASE(FNEG)
6379 OP_CASE(FABS)
6380 OP_CASE(FSQRT)
6381 OP_CASE(SMIN)
6382 OP_CASE(SMAX)
6383 OP_CASE(UMIN)
6384 OP_CASE(UMAX)
6385 OP_CASE(STRICT_FADD)
6386 OP_CASE(STRICT_FSUB)
6387 OP_CASE(STRICT_FMUL)
6388 OP_CASE(STRICT_FDIV)
6389 OP_CASE(STRICT_FSQRT)
6390 VP_CASE(ADD) // VP_ADD
6391 VP_CASE(SUB) // VP_SUB
6392 VP_CASE(MUL) // VP_MUL
6393 VP_CASE(SDIV) // VP_SDIV
6394 VP_CASE(SREM) // VP_SREM
6395 VP_CASE(UDIV) // VP_UDIV
6396 VP_CASE(UREM) // VP_UREM
6397 VP_CASE(SHL) // VP_SHL
6398 VP_CASE(FADD) // VP_FADD
6399 VP_CASE(FSUB) // VP_FSUB
6400 VP_CASE(FMUL) // VP_FMUL
6401 VP_CASE(FDIV) // VP_FDIV
6402 VP_CASE(FNEG) // VP_FNEG
6403 VP_CASE(FABS) // VP_FABS
6404 VP_CASE(SMIN) // VP_SMIN
6405 VP_CASE(SMAX) // VP_SMAX
6406 VP_CASE(UMIN) // VP_UMIN
6407 VP_CASE(UMAX) // VP_UMAX
6408 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6409 VP_CASE(SETCC) // VP_SETCC
6410 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6411 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6412 VP_CASE(BITREVERSE) // VP_BITREVERSE
6413 VP_CASE(SADDSAT) // VP_SADDSAT
6414 VP_CASE(UADDSAT) // VP_UADDSAT
6415 VP_CASE(SSUBSAT) // VP_SSUBSAT
6416 VP_CASE(USUBSAT) // VP_USUBSAT
6417 VP_CASE(BSWAP) // VP_BSWAP
6418 VP_CASE(CTLZ) // VP_CTLZ
6419 VP_CASE(CTTZ) // VP_CTTZ
6420 VP_CASE(CTPOP) // VP_CTPOP
6422 case ISD::VP_CTLZ_ZERO_UNDEF:
6423 return RISCVISD::CTLZ_VL;
6425 case ISD::VP_CTTZ_ZERO_UNDEF:
6426 return RISCVISD::CTTZ_VL;
6427 case ISD::FMA:
6428 case ISD::VP_FMA:
6429 return RISCVISD::VFMADD_VL;
6430 case ISD::STRICT_FMA:
6432 case ISD::AND:
6433 case ISD::VP_AND:
6434 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6435 return RISCVISD::VMAND_VL;
6436 return RISCVISD::AND_VL;
6437 case ISD::OR:
6438 case ISD::VP_OR:
6439 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6440 return RISCVISD::VMOR_VL;
6441 return RISCVISD::OR_VL;
6442 case ISD::XOR:
6443 case ISD::VP_XOR:
6444 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6445 return RISCVISD::VMXOR_VL;
6446 return RISCVISD::XOR_VL;
6447 case ISD::VP_SELECT:
6448 case ISD::VP_MERGE:
6449 return RISCVISD::VMERGE_VL;
6450 case ISD::VP_SRA:
6451 return RISCVISD::SRA_VL;
6452 case ISD::VP_SRL:
6453 return RISCVISD::SRL_VL;
6454 case ISD::VP_SQRT:
6455 return RISCVISD::FSQRT_VL;
6456 case ISD::VP_SIGN_EXTEND:
6457 return RISCVISD::VSEXT_VL;
6458 case ISD::VP_ZERO_EXTEND:
6459 return RISCVISD::VZEXT_VL;
6460 case ISD::VP_FP_TO_SINT:
6462 case ISD::VP_FP_TO_UINT:
6464 case ISD::FMINNUM:
6465 case ISD::VP_FMINNUM:
6466 return RISCVISD::VFMIN_VL;
6467 case ISD::FMAXNUM:
6468 case ISD::VP_FMAXNUM:
6469 return RISCVISD::VFMAX_VL;
6470 case ISD::LRINT:
6471 case ISD::VP_LRINT:
6472 case ISD::LLRINT:
6473 case ISD::VP_LLRINT:
6475 }
6476 // clang-format on
6477#undef OP_CASE
6478#undef VP_CASE
6479}
6480
6481/// Return true if a RISC-V target specified op has a passthru operand.
6482static bool hasPassthruOp(unsigned Opcode) {
6483 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6485 "not a RISC-V target specific op");
6486 static_assert(
6489 "adding target specific op should update this function");
6490 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6491 return true;
6492 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6493 return true;
6494 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6495 return true;
6496 if (Opcode == RISCVISD::SETCC_VL)
6497 return true;
6498 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6499 return true;
6500 if (Opcode == RISCVISD::VMERGE_VL)
6501 return true;
6502 return false;
6503}
6504
6505/// Return true if a RISC-V target specified op has a mask operand.
6506static bool hasMaskOp(unsigned Opcode) {
6507 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6509 "not a RISC-V target specific op");
6510 static_assert(
6513 "adding target specific op should update this function");
6514 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6515 return true;
6516 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6517 return true;
6518 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6520 return true;
6521 return false;
6522}
6523
6525 const RISCVSubtarget &Subtarget) {
6526 if (Op.getValueType() == MVT::nxv32f16 &&
6527 (Subtarget.hasVInstructionsF16Minimal() &&
6528 !Subtarget.hasVInstructionsF16()))
6529 return true;
6530 if (Op.getValueType() == MVT::nxv32bf16)
6531 return true;
6532 return false;
6533}
6534
6536 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6537 SDLoc DL(Op);
6538
6541
6542 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6543 if (!Op.getOperand(j).getValueType().isVector()) {
6544 LoOperands[j] = Op.getOperand(j);
6545 HiOperands[j] = Op.getOperand(j);
6546 continue;
6547 }
6548 std::tie(LoOperands[j], HiOperands[j]) =
6549 DAG.SplitVector(Op.getOperand(j), DL);
6550 }
6551
6552 SDValue LoRes =
6553 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6554 SDValue HiRes =
6555 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6556
6557 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6558}
6559
6561 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6562 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6563 SDLoc DL(Op);
6564
6567
6568 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6569 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6570 std::tie(LoOperands[j], HiOperands[j]) =
6571 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6572 continue;
6573 }
6574 if (!Op.getOperand(j).getValueType().isVector()) {
6575 LoOperands[j] = Op.getOperand(j);
6576 HiOperands[j] = Op.getOperand(j);
6577 continue;
6578 }
6579 std::tie(LoOperands[j], HiOperands[j]) =
6580 DAG.SplitVector(Op.getOperand(j), DL);
6581 }
6582
6583 SDValue LoRes =
6584 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6585 SDValue HiRes =
6586 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6587
6588 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6589}
6590
6592 SDLoc DL(Op);
6593
6594 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6595 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6596 auto [EVLLo, EVLHi] =
6597 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6598
6599 SDValue ResLo =
6600 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6601 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6602 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6603 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6604}
6605
6607
6608 assert(Op->isStrictFPOpcode());
6609
6610 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6611
6612 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6613 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6614
6615 SDLoc DL(Op);
6616
6619
6620 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6621 if (!Op.getOperand(j).getValueType().isVector()) {
6622 LoOperands[j] = Op.getOperand(j);
6623 HiOperands[j] = Op.getOperand(j);
6624 continue;
6625 }
6626 std::tie(LoOperands[j], HiOperands[j]) =
6627 DAG.SplitVector(Op.getOperand(j), DL);
6628 }
6629
6630 SDValue LoRes =
6631 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6632 HiOperands[0] = LoRes.getValue(1);
6633 SDValue HiRes =
6634 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6635
6636 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6637 LoRes.getValue(0), HiRes.getValue(0));
6638 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6639}
6640
6642 SelectionDAG &DAG) const {
6643 switch (Op.getOpcode()) {
6644 default:
6645 report_fatal_error("unimplemented operand");
6646 case ISD::ATOMIC_FENCE:
6647 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6648 case ISD::GlobalAddress:
6649 return lowerGlobalAddress(Op, DAG);
6650 case ISD::BlockAddress:
6651 return lowerBlockAddress(Op, DAG);
6652 case ISD::ConstantPool:
6653 return lowerConstantPool(Op, DAG);
6654 case ISD::JumpTable:
6655 return lowerJumpTable(Op, DAG);
6657 return lowerGlobalTLSAddress(Op, DAG);
6658 case ISD::Constant:
6659 return lowerConstant(Op, DAG, Subtarget);
6660 case ISD::ConstantFP:
6661 return lowerConstantFP(Op, DAG);
6662 case ISD::SELECT:
6663 return lowerSELECT(Op, DAG);
6664 case ISD::BRCOND:
6665 return lowerBRCOND(Op, DAG);
6666 case ISD::VASTART:
6667 return lowerVASTART(Op, DAG);
6668 case ISD::FRAMEADDR:
6669 return lowerFRAMEADDR(Op, DAG);
6670 case ISD::RETURNADDR:
6671 return lowerRETURNADDR(Op, DAG);
6672 case ISD::SHL_PARTS:
6673 return lowerShiftLeftParts(Op, DAG);
6674 case ISD::SRA_PARTS:
6675 return lowerShiftRightParts(Op, DAG, true);
6676 case ISD::SRL_PARTS:
6677 return lowerShiftRightParts(Op, DAG, false);
6678 case ISD::ROTL:
6679 case ISD::ROTR:
6680 if (Op.getValueType().isFixedLengthVector()) {
6681 assert(Subtarget.hasStdExtZvkb());
6682 return lowerToScalableOp(Op, DAG);
6683 }
6684 assert(Subtarget.hasVendorXTHeadBb() &&
6685 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6686 "Unexpected custom legalization");
6687 // XTHeadBb only supports rotate by constant.
6688 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6689 return SDValue();
6690 return Op;
6691 case ISD::BITCAST: {
6692 SDLoc DL(Op);
6693 EVT VT = Op.getValueType();
6694 SDValue Op0 = Op.getOperand(0);
6695 EVT Op0VT = Op0.getValueType();
6696 MVT XLenVT = Subtarget.getXLenVT();
6697 if (Op0VT == MVT::i16 &&
6698 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6699 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6700 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6701 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6702 }
6703 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6704 Subtarget.hasStdExtFOrZfinx()) {
6705 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6706 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6707 }
6708 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6709 Subtarget.hasStdExtDOrZdinx()) {
6710 SDValue Lo, Hi;
6711 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6712 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6713 }
6714
6715 // Consider other scalar<->scalar casts as legal if the types are legal.
6716 // Otherwise expand them.
6717 if (!VT.isVector() && !Op0VT.isVector()) {
6718 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6719 return Op;
6720 return SDValue();
6721 }
6722
6723 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6724 "Unexpected types");
6725
6726 if (VT.isFixedLengthVector()) {
6727 // We can handle fixed length vector bitcasts with a simple replacement
6728 // in isel.
6729 if (Op0VT.isFixedLengthVector())
6730 return Op;
6731 // When bitcasting from scalar to fixed-length vector, insert the scalar
6732 // into a one-element vector of the result type, and perform a vector
6733 // bitcast.
6734 if (!Op0VT.isVector()) {
6735 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6736 if (!isTypeLegal(BVT))
6737 return SDValue();
6738 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6739 DAG.getUNDEF(BVT), Op0,
6740 DAG.getVectorIdxConstant(0, DL)));
6741 }
6742 return SDValue();
6743 }
6744 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6745 // thus: bitcast the vector to a one-element vector type whose element type
6746 // is the same as the result type, and extract the first element.
6747 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6748 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6749 if (!isTypeLegal(BVT))
6750 return SDValue();
6751 SDValue BVec = DAG.getBitcast(BVT, Op0);
6752 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6753 DAG.getVectorIdxConstant(0, DL));
6754 }
6755 return SDValue();
6756 }
6758 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6760 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6762 return LowerINTRINSIC_VOID(Op, DAG);
6763 case ISD::IS_FPCLASS:
6764 return LowerIS_FPCLASS(Op, DAG);
6765 case ISD::BITREVERSE: {
6766 MVT VT = Op.getSimpleValueType();
6767 if (VT.isFixedLengthVector()) {
6768 assert(Subtarget.hasStdExtZvbb());
6769 return lowerToScalableOp(Op, DAG);
6770 }
6771 SDLoc DL(Op);
6772 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6773 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6774 // Expand bitreverse to a bswap(rev8) followed by brev8.
6775 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6776 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6777 }
6778 case ISD::TRUNCATE:
6781 // Only custom-lower vector truncates
6782 if (!Op.getSimpleValueType().isVector())
6783 return Op;
6784 return lowerVectorTruncLike(Op, DAG);
6785 case ISD::ANY_EXTEND:
6786 case ISD::ZERO_EXTEND:
6787 if (Op.getOperand(0).getValueType().isVector() &&
6788 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6789 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6790 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6791 case ISD::SIGN_EXTEND:
6792 if (Op.getOperand(0).getValueType().isVector() &&
6793 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6794 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6795 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6797 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6799 return lowerINSERT_VECTOR_ELT(Op, DAG);
6801 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6802 case ISD::SCALAR_TO_VECTOR: {
6803 MVT VT = Op.getSimpleValueType();
6804 SDLoc DL(Op);
6805 SDValue Scalar = Op.getOperand(0);
6806 if (VT.getVectorElementType() == MVT::i1) {
6807 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6808 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6809 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6810 }
6811 MVT ContainerVT = VT;
6812 if (VT.isFixedLengthVector())
6813 ContainerVT = getContainerForFixedLengthVector(VT);
6814 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6815
6816 SDValue V;
6817 if (VT.isFloatingPoint()) {
6818 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6819 DAG.getUNDEF(ContainerVT), Scalar, VL);
6820 } else {
6821 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6822 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6823 DAG.getUNDEF(ContainerVT), Scalar, VL);
6824 }
6825 if (VT.isFixedLengthVector())
6826 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6827 return V;
6828 }
6829 case ISD::VSCALE: {
6830 MVT XLenVT = Subtarget.getXLenVT();
6831 MVT VT = Op.getSimpleValueType();
6832 SDLoc DL(Op);
6833 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6834 // We define our scalable vector types for lmul=1 to use a 64 bit known
6835 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6836 // vscale as VLENB / 8.
6837 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6838 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6839 report_fatal_error("Support for VLEN==32 is incomplete.");
6840 // We assume VLENB is a multiple of 8. We manually choose the best shift
6841 // here because SimplifyDemandedBits isn't always able to simplify it.
6842 uint64_t Val = Op.getConstantOperandVal(0);
6843 if (isPowerOf2_64(Val)) {
6844 uint64_t Log2 = Log2_64(Val);
6845 if (Log2 < 3)
6846 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6847 DAG.getConstant(3 - Log2, DL, VT));
6848 else if (Log2 > 3)
6849 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6850 DAG.getConstant(Log2 - 3, DL, XLenVT));
6851 } else if ((Val % 8) == 0) {
6852 // If the multiplier is a multiple of 8, scale it down to avoid needing
6853 // to shift the VLENB value.
6854 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6855 DAG.getConstant(Val / 8, DL, XLenVT));
6856 } else {
6857 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6858 DAG.getConstant(3, DL, XLenVT));
6859 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6860 DAG.getConstant(Val, DL, XLenVT));
6861 }
6862 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6863 }
6864 case ISD::FPOWI: {
6865 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6866 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6867 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6868 Op.getOperand(1).getValueType() == MVT::i32) {
6869 SDLoc DL(Op);
6870 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6871 SDValue Powi =
6872 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6873 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6874 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6875 }
6876 return SDValue();
6877 }
6878 case ISD::FMAXIMUM:
6879 case ISD::FMINIMUM:
6880 if (isPromotedOpNeedingSplit(Op, Subtarget))
6881 return SplitVectorOp(Op, DAG);
6882 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6883 case ISD::FP_EXTEND:
6884 case ISD::FP_ROUND:
6885 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6888 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6889 case ISD::SINT_TO_FP:
6890 case ISD::UINT_TO_FP:
6891 if (Op.getValueType().isVector() &&
6892 ((Op.getValueType().getScalarType() == MVT::f16 &&
6893 (Subtarget.hasVInstructionsF16Minimal() &&
6894 !Subtarget.hasVInstructionsF16())) ||
6895 Op.getValueType().getScalarType() == MVT::bf16)) {
6896 if (isPromotedOpNeedingSplit(Op, Subtarget))
6897 return SplitVectorOp(Op, DAG);
6898 // int -> f32
6899 SDLoc DL(Op);
6900 MVT NVT =
6901 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6902 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6903 // f32 -> [b]f16
6904 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6905 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6906 }
6907 [[fallthrough]];
6908 case ISD::FP_TO_SINT:
6909 case ISD::FP_TO_UINT:
6910 if (SDValue Op1 = Op.getOperand(0);
6911 Op1.getValueType().isVector() &&
6912 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6913 (Subtarget.hasVInstructionsF16Minimal() &&
6914 !Subtarget.hasVInstructionsF16())) ||
6915 Op1.getValueType().getScalarType() == MVT::bf16)) {
6916 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6917 return SplitVectorOp(Op, DAG);
6918 // [b]f16 -> f32
6919 SDLoc DL(Op);
6920 MVT NVT = MVT::getVectorVT(MVT::f32,
6921 Op1.getValueType().getVectorElementCount());
6922 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6923 // f32 -> int
6924 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6925 }
6926 [[fallthrough]];
6931 // RVV can only do fp<->int conversions to types half/double the size as
6932 // the source. We custom-lower any conversions that do two hops into
6933 // sequences.
6934 MVT VT = Op.getSimpleValueType();
6935 if (VT.isScalarInteger())
6936 return lowerFP_TO_INT(Op, DAG, Subtarget);
6937 bool IsStrict = Op->isStrictFPOpcode();
6938 SDValue Src = Op.getOperand(0 + IsStrict);
6939 MVT SrcVT = Src.getSimpleValueType();
6940 if (SrcVT.isScalarInteger())
6941 return lowerINT_TO_FP(Op, DAG, Subtarget);
6942 if (!VT.isVector())
6943 return Op;
6944 SDLoc DL(Op);
6945 MVT EltVT = VT.getVectorElementType();
6946 MVT SrcEltVT = SrcVT.getVectorElementType();
6947 unsigned EltSize = EltVT.getSizeInBits();
6948 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6949 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6950 "Unexpected vector element types");
6951
6952 bool IsInt2FP = SrcEltVT.isInteger();
6953 // Widening conversions
6954 if (EltSize > (2 * SrcEltSize)) {
6955 if (IsInt2FP) {
6956 // Do a regular integer sign/zero extension then convert to float.
6957 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6959 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6960 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6963 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6964 if (IsStrict)
6965 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6966 Op.getOperand(0), Ext);
6967 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6968 }
6969 // FP2Int
6970 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6971 // Do one doubling fp_extend then complete the operation by converting
6972 // to int.
6973 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6974 if (IsStrict) {
6975 auto [FExt, Chain] =
6976 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6977 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6978 }
6979 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6980 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6981 }
6982
6983 // Narrowing conversions
6984 if (SrcEltSize > (2 * EltSize)) {
6985 if (IsInt2FP) {
6986 // One narrowing int_to_fp, then an fp_round.
6987 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6988 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6989 if (IsStrict) {
6990 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6991 DAG.getVTList(InterimFVT, MVT::Other),
6992 Op.getOperand(0), Src);
6993 SDValue Chain = Int2FP.getValue(1);
6994 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6995 }
6996 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6997 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6998 }
6999 // FP2Int
7000 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7001 // representable by the integer, the result is poison.
7002 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7004 if (IsStrict) {
7005 SDValue FP2Int =
7006 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7007 Op.getOperand(0), Src);
7008 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7009 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7010 }
7011 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7012 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7013 }
7014
7015 // Scalable vectors can exit here. Patterns will handle equally-sized
7016 // conversions halving/doubling ones.
7017 if (!VT.isFixedLengthVector())
7018 return Op;
7019
7020 // For fixed-length vectors we lower to a custom "VL" node.
7021 unsigned RVVOpc = 0;
7022 switch (Op.getOpcode()) {
7023 default:
7024 llvm_unreachable("Impossible opcode");
7025 case ISD::FP_TO_SINT:
7027 break;
7028 case ISD::FP_TO_UINT:
7030 break;
7031 case ISD::SINT_TO_FP:
7032 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7033 break;
7034 case ISD::UINT_TO_FP:
7035 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7036 break;
7039 break;
7042 break;
7045 break;
7048 break;
7049 }
7050
7051 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7052 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7053 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7054 "Expected same element count");
7055
7056 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7057
7058 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7059 if (IsStrict) {
7060 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7061 Op.getOperand(0), Src, Mask, VL);
7062 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7063 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7064 }
7065 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7066 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7067 }
7070 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7071 case ISD::FP_TO_BF16: {
7072 // Custom lower to ensure the libcall return is passed in an FPR on hard
7073 // float ABIs.
7074 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7075 SDLoc DL(Op);
7076 MakeLibCallOptions CallOptions;
7077 RTLIB::Libcall LC =
7078 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7079 SDValue Res =
7080 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7081 if (Subtarget.is64Bit())
7082 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7083 return DAG.getBitcast(MVT::i32, Res);
7084 }
7085 case ISD::BF16_TO_FP: {
7086 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7087 MVT VT = Op.getSimpleValueType();
7088 SDLoc DL(Op);
7089 Op = DAG.getNode(
7090 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7091 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7092 SDValue Res = Subtarget.is64Bit()
7093 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7094 : DAG.getBitcast(MVT::f32, Op);
7095 // fp_extend if the target VT is bigger than f32.
7096 if (VT != MVT::f32)
7097 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7098 return Res;
7099 }
7101 case ISD::FP_TO_FP16: {
7102 // Custom lower to ensure the libcall return is passed in an FPR on hard
7103 // float ABIs.
7104 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7105 SDLoc DL(Op);
7106 MakeLibCallOptions CallOptions;
7107 bool IsStrict = Op->isStrictFPOpcode();
7108 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7109 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7110 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7111 SDValue Res;
7112 std::tie(Res, Chain) =
7113 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7114 if (Subtarget.is64Bit())
7115 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7116 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7117 if (IsStrict)
7118 return DAG.getMergeValues({Result, Chain}, DL);
7119 return Result;
7120 }
7122 case ISD::FP16_TO_FP: {
7123 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7124 // float ABIs.
7125 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7126 SDLoc DL(Op);
7127 MakeLibCallOptions CallOptions;
7128 bool IsStrict = Op->isStrictFPOpcode();
7129 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7130 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7131 SDValue Arg = Subtarget.is64Bit()
7132 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7133 : DAG.getBitcast(MVT::f32, Op0);
7134 SDValue Res;
7135 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7136 CallOptions, DL, Chain);
7137 if (IsStrict)
7138 return DAG.getMergeValues({Res, Chain}, DL);
7139 return Res;
7140 }
7141 case ISD::FTRUNC:
7142 case ISD::FCEIL:
7143 case ISD::FFLOOR:
7144 case ISD::FNEARBYINT:
7145 case ISD::FRINT:
7146 case ISD::FROUND:
7147 case ISD::FROUNDEVEN:
7148 if (isPromotedOpNeedingSplit(Op, Subtarget))
7149 return SplitVectorOp(Op, DAG);
7150 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7151 case ISD::LRINT:
7152 case ISD::LLRINT:
7153 if (Op.getValueType().isVector())
7154 return lowerVectorXRINT(Op, DAG, Subtarget);
7155 [[fallthrough]];
7156 case ISD::LROUND:
7157 case ISD::LLROUND: {
7158 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7159 "Unexpected custom legalisation");
7160 SDLoc DL(Op);
7161 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7162 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7163 }
7164 case ISD::STRICT_LRINT:
7165 case ISD::STRICT_LLRINT:
7166 case ISD::STRICT_LROUND:
7167 case ISD::STRICT_LLROUND: {
7168 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7169 "Unexpected custom legalisation");
7170 SDLoc DL(Op);
7171 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7172 {Op.getOperand(0), Op.getOperand(1)});
7173 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7174 {Ext.getValue(1), Ext.getValue(0)});
7175 }
7176 case ISD::VECREDUCE_ADD:
7181 return lowerVECREDUCE(Op, DAG);
7182 case ISD::VECREDUCE_AND:
7183 case ISD::VECREDUCE_OR:
7184 case ISD::VECREDUCE_XOR:
7185 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7186 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7187 return lowerVECREDUCE(Op, DAG);
7194 return lowerFPVECREDUCE(Op, DAG);
7195 case ISD::VP_REDUCE_ADD:
7196 case ISD::VP_REDUCE_UMAX:
7197 case ISD::VP_REDUCE_SMAX:
7198 case ISD::VP_REDUCE_UMIN:
7199 case ISD::VP_REDUCE_SMIN:
7200 case ISD::VP_REDUCE_FADD:
7201 case ISD::VP_REDUCE_SEQ_FADD:
7202 case ISD::VP_REDUCE_FMIN:
7203 case ISD::VP_REDUCE_FMAX:
7204 case ISD::VP_REDUCE_FMINIMUM:
7205 case ISD::VP_REDUCE_FMAXIMUM:
7206 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7207 return SplitVectorReductionOp(Op, DAG);
7208 return lowerVPREDUCE(Op, DAG);
7209 case ISD::VP_REDUCE_AND:
7210 case ISD::VP_REDUCE_OR:
7211 case ISD::VP_REDUCE_XOR:
7212 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7213 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7214 return lowerVPREDUCE(Op, DAG);
7215 case ISD::VP_CTTZ_ELTS:
7216 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7217 return lowerVPCttzElements(Op, DAG);
7218 case ISD::UNDEF: {
7219 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7220 return convertFromScalableVector(Op.getSimpleValueType(),
7221 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7222 }
7224 return lowerINSERT_SUBVECTOR(Op, DAG);
7226 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7228 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7230 return lowerVECTOR_INTERLEAVE(Op, DAG);
7231 case ISD::STEP_VECTOR:
7232 return lowerSTEP_VECTOR(Op, DAG);
7234 return lowerVECTOR_REVERSE(Op, DAG);
7235 case ISD::VECTOR_SPLICE:
7236 return lowerVECTOR_SPLICE(Op, DAG);
7237 case ISD::BUILD_VECTOR:
7238 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7239 case ISD::SPLAT_VECTOR: {
7240 MVT VT = Op.getSimpleValueType();
7241 MVT EltVT = VT.getVectorElementType();
7242 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7243 EltVT == MVT::bf16) {
7244 SDLoc DL(Op);
7245 SDValue Elt;
7246 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7247 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7248 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7249 Op.getOperand(0));
7250 else
7251 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7252 MVT IVT = VT.changeVectorElementType(MVT::i16);
7253 return DAG.getNode(ISD::BITCAST, DL, VT,
7254 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7255 }
7256
7257 if (EltVT == MVT::i1)
7258 return lowerVectorMaskSplat(Op, DAG);
7259 return SDValue();
7260 }
7262 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7263 case ISD::CONCAT_VECTORS: {
7264 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7265 // better than going through the stack, as the default expansion does.
7266 SDLoc DL(Op);
7267 MVT VT = Op.getSimpleValueType();
7268 MVT ContainerVT = VT;
7269 if (VT.isFixedLengthVector())
7270 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7271
7272 // Recursively split concat_vectors with more than 2 operands:
7273 //
7274 // concat_vector op1, op2, op3, op4
7275 // ->
7276 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7277 //
7278 // This reduces the length of the chain of vslideups and allows us to
7279 // perform the vslideups at a smaller LMUL, limited to MF2.
7280 if (Op.getNumOperands() > 2 &&
7281 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7282 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7284 size_t HalfNumOps = Op.getNumOperands() / 2;
7285 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7286 Op->ops().take_front(HalfNumOps));
7287 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7288 Op->ops().drop_front(HalfNumOps));
7289 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7290 }
7291
7292 unsigned NumOpElts =
7293 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7294 SDValue Vec = DAG.getUNDEF(VT);
7295 for (const auto &OpIdx : enumerate(Op->ops())) {
7296 SDValue SubVec = OpIdx.value();
7297 // Don't insert undef subvectors.
7298 if (SubVec.isUndef())
7299 continue;
7300 Vec =
7301 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7302 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7303 }
7304 return Vec;
7305 }
7306 case ISD::LOAD: {
7307 auto *Load = cast<LoadSDNode>(Op);
7308 EVT VecTy = Load->getMemoryVT();
7309 // Handle normal vector tuple load.
7310 if (VecTy.isRISCVVectorTuple()) {
7311 SDLoc DL(Op);
7312 MVT XLenVT = Subtarget.getXLenVT();
7313 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7314 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7315 unsigned NumElts = Sz / (NF * 8);
7316 int Log2LMUL = Log2_64(NumElts) - 3;
7317
7318 auto Flag = SDNodeFlags();
7319 Flag.setNoUnsignedWrap(true);
7320 SDValue Ret = DAG.getUNDEF(VecTy);
7321 SDValue BasePtr = Load->getBasePtr();
7322 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7323 VROffset =
7324 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7325 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7326 SmallVector<SDValue, 8> OutChains;
7327
7328 // Load NF vector registers and combine them to a vector tuple.
7329 for (unsigned i = 0; i < NF; ++i) {
7330 SDValue LoadVal = DAG.getLoad(
7331 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7332 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7333 OutChains.push_back(LoadVal.getValue(1));
7334 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7335 DAG.getVectorIdxConstant(i, DL));
7336 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7337 }
7338 return DAG.getMergeValues(
7339 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7340 }
7341
7342 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7343 return V;
7344 if (Op.getValueType().isFixedLengthVector())
7345 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7346 return Op;
7347 }
7348 case ISD::STORE: {
7349 auto *Store = cast<StoreSDNode>(Op);
7350 SDValue StoredVal = Store->getValue();
7351 EVT VecTy = StoredVal.getValueType();
7352 // Handle normal vector tuple store.
7353 if (VecTy.isRISCVVectorTuple()) {
7354 SDLoc DL(Op);
7355 MVT XLenVT = Subtarget.getXLenVT();
7356 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7357 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7358 unsigned NumElts = Sz / (NF * 8);
7359 int Log2LMUL = Log2_64(NumElts) - 3;
7360
7361 auto Flag = SDNodeFlags();
7362 Flag.setNoUnsignedWrap(true);
7363 SDValue Ret;
7364 SDValue Chain = Store->getChain();
7365 SDValue BasePtr = Store->getBasePtr();
7366 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7367 VROffset =
7368 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7369 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7370
7371 // Extract subregisters in a vector tuple and store them individually.
7372 for (unsigned i = 0; i < NF; ++i) {
7373 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7374 MVT::getScalableVectorVT(MVT::i8, NumElts),
7375 StoredVal, DAG.getVectorIdxConstant(i, DL));
7376 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7377 MachinePointerInfo(Store->getAddressSpace()),
7378 Store->getOriginalAlign(),
7379 Store->getMemOperand()->getFlags());
7380 Chain = Ret.getValue(0);
7381 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7382 }
7383 return Ret;
7384 }
7385
7386 if (auto V = expandUnalignedRVVStore(Op, DAG))
7387 return V;
7388 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7389 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7390 return Op;
7391 }
7392 case ISD::MLOAD:
7393 case ISD::VP_LOAD:
7394 return lowerMaskedLoad(Op, DAG);
7395 case ISD::MSTORE:
7396 case ISD::VP_STORE:
7397 return lowerMaskedStore(Op, DAG);
7399 return lowerVectorCompress(Op, DAG);
7400 case ISD::SELECT_CC: {
7401 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7402 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7403 // into separate SETCC+SELECT just like LegalizeDAG.
7404 SDValue Tmp1 = Op.getOperand(0);
7405 SDValue Tmp2 = Op.getOperand(1);
7406 SDValue True = Op.getOperand(2);
7407 SDValue False = Op.getOperand(3);
7408 EVT VT = Op.getValueType();
7409 SDValue CC = Op.getOperand(4);
7410 EVT CmpVT = Tmp1.getValueType();
7411 EVT CCVT =
7412 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7413 SDLoc DL(Op);
7414 SDValue Cond =
7415 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7416 return DAG.getSelect(DL, VT, Cond, True, False);
7417 }
7418 case ISD::SETCC: {
7419 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7420 if (OpVT.isScalarInteger()) {
7421 MVT VT = Op.getSimpleValueType();
7422 SDValue LHS = Op.getOperand(0);
7423 SDValue RHS = Op.getOperand(1);
7424 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7425 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7426 "Unexpected CondCode");
7427
7428 SDLoc DL(Op);
7429
7430 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7431 // convert this to the equivalent of (set(u)ge X, C+1) by using
7432 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7433 // in a register.
7434 if (isa<ConstantSDNode>(RHS)) {
7435 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7436 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7437 // If this is an unsigned compare and the constant is -1, incrementing
7438 // the constant would change behavior. The result should be false.
7439 if (CCVal == ISD::SETUGT && Imm == -1)
7440 return DAG.getConstant(0, DL, VT);
7441 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7442 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7443 SDValue SetCC = DAG.getSetCC(
7444 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7445 return DAG.getLogicalNOT(DL, SetCC, VT);
7446 }
7447 }
7448
7449 // Not a constant we could handle, swap the operands and condition code to
7450 // SETLT/SETULT.
7451 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7452 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7453 }
7454
7455 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7456 return SplitVectorOp(Op, DAG);
7457
7458 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7459 }
7460 case ISD::ADD:
7461 case ISD::SUB:
7462 case ISD::MUL:
7463 case ISD::MULHS:
7464 case ISD::MULHU:
7465 case ISD::AND:
7466 case ISD::OR:
7467 case ISD::XOR:
7468 case ISD::SDIV:
7469 case ISD::SREM:
7470 case ISD::UDIV:
7471 case ISD::UREM:
7472 case ISD::BSWAP:
7473 case ISD::CTPOP:
7474 return lowerToScalableOp(Op, DAG);
7475 case ISD::SHL:
7476 case ISD::SRA:
7477 case ISD::SRL:
7478 if (Op.getSimpleValueType().isFixedLengthVector())
7479 return lowerToScalableOp(Op, DAG);
7480 // This can be called for an i32 shift amount that needs to be promoted.
7481 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7482 "Unexpected custom legalisation");
7483 return SDValue();
7484 case ISD::FABS:
7485 case ISD::FNEG:
7486 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7487 return lowerFABSorFNEG(Op, DAG, Subtarget);
7488 [[fallthrough]];
7489 case ISD::FADD:
7490 case ISD::FSUB:
7491 case ISD::FMUL:
7492 case ISD::FDIV:
7493 case ISD::FSQRT:
7494 case ISD::FMA:
7495 case ISD::FMINNUM:
7496 case ISD::FMAXNUM:
7497 if (isPromotedOpNeedingSplit(Op, Subtarget))
7498 return SplitVectorOp(Op, DAG);
7499 [[fallthrough]];
7500 case ISD::AVGFLOORS:
7501 case ISD::AVGFLOORU:
7502 case ISD::AVGCEILS:
7503 case ISD::AVGCEILU:
7504 case ISD::SMIN:
7505 case ISD::SMAX:
7506 case ISD::UMIN:
7507 case ISD::UMAX:
7508 case ISD::UADDSAT:
7509 case ISD::USUBSAT:
7510 case ISD::SADDSAT:
7511 case ISD::SSUBSAT:
7512 return lowerToScalableOp(Op, DAG);
7513 case ISD::ABDS:
7514 case ISD::ABDU: {
7515 SDLoc dl(Op);
7516 EVT VT = Op->getValueType(0);
7517 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7518 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7519 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7520
7521 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7522 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7523 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7524 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7525 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7526 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7527 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7528 }
7529 case ISD::ABS:
7530 case ISD::VP_ABS:
7531 return lowerABS(Op, DAG);
7532 case ISD::CTLZ:
7534 case ISD::CTTZ:
7536 if (Subtarget.hasStdExtZvbb())
7537 return lowerToScalableOp(Op, DAG);
7538 assert(Op.getOpcode() != ISD::CTTZ);
7539 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7540 case ISD::VSELECT:
7541 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7542 case ISD::FCOPYSIGN:
7543 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7544 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7545 if (isPromotedOpNeedingSplit(Op, Subtarget))
7546 return SplitVectorOp(Op, DAG);
7547 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7548 case ISD::STRICT_FADD:
7549 case ISD::STRICT_FSUB:
7550 case ISD::STRICT_FMUL:
7551 case ISD::STRICT_FDIV:
7552 case ISD::STRICT_FSQRT:
7553 case ISD::STRICT_FMA:
7554 if (isPromotedOpNeedingSplit(Op, Subtarget))
7555 return SplitStrictFPVectorOp(Op, DAG);
7556 return lowerToScalableOp(Op, DAG);
7557 case ISD::STRICT_FSETCC:
7559 return lowerVectorStrictFSetcc(Op, DAG);
7560 case ISD::STRICT_FCEIL:
7561 case ISD::STRICT_FRINT:
7562 case ISD::STRICT_FFLOOR:
7563 case ISD::STRICT_FTRUNC:
7565 case ISD::STRICT_FROUND:
7567 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7568 case ISD::MGATHER:
7569 case ISD::VP_GATHER:
7570 return lowerMaskedGather(Op, DAG);
7571 case ISD::MSCATTER:
7572 case ISD::VP_SCATTER:
7573 return lowerMaskedScatter(Op, DAG);
7574 case ISD::GET_ROUNDING:
7575 return lowerGET_ROUNDING(Op, DAG);
7576 case ISD::SET_ROUNDING:
7577 return lowerSET_ROUNDING(Op, DAG);
7578 case ISD::EH_DWARF_CFA:
7579 return lowerEH_DWARF_CFA(Op, DAG);
7580 case ISD::VP_MERGE:
7581 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7582 return lowerVPMergeMask(Op, DAG);
7583 [[fallthrough]];
7584 case ISD::VP_SELECT:
7585 case ISD::VP_ADD:
7586 case ISD::VP_SUB:
7587 case ISD::VP_MUL:
7588 case ISD::VP_SDIV:
7589 case ISD::VP_UDIV:
7590 case ISD::VP_SREM:
7591 case ISD::VP_UREM:
7592 case ISD::VP_UADDSAT:
7593 case ISD::VP_USUBSAT:
7594 case ISD::VP_SADDSAT:
7595 case ISD::VP_SSUBSAT:
7596 case ISD::VP_LRINT:
7597 case ISD::VP_LLRINT:
7598 return lowerVPOp(Op, DAG);
7599 case ISD::VP_AND:
7600 case ISD::VP_OR:
7601 case ISD::VP_XOR:
7602 return lowerLogicVPOp(Op, DAG);
7603 case ISD::VP_FADD:
7604 case ISD::VP_FSUB:
7605 case ISD::VP_FMUL:
7606 case ISD::VP_FDIV:
7607 case ISD::VP_FNEG:
7608 case ISD::VP_FABS:
7609 case ISD::VP_SQRT:
7610 case ISD::VP_FMA:
7611 case ISD::VP_FMINNUM:
7612 case ISD::VP_FMAXNUM:
7613 case ISD::VP_FCOPYSIGN:
7614 if (isPromotedOpNeedingSplit(Op, Subtarget))
7615 return SplitVPOp(Op, DAG);
7616 [[fallthrough]];
7617 case ISD::VP_SRA:
7618 case ISD::VP_SRL:
7619 case ISD::VP_SHL:
7620 return lowerVPOp(Op, DAG);
7621 case ISD::VP_IS_FPCLASS:
7622 return LowerIS_FPCLASS(Op, DAG);
7623 case ISD::VP_SIGN_EXTEND:
7624 case ISD::VP_ZERO_EXTEND:
7625 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7626 return lowerVPExtMaskOp(Op, DAG);
7627 return lowerVPOp(Op, DAG);
7628 case ISD::VP_TRUNCATE:
7629 return lowerVectorTruncLike(Op, DAG);
7630 case ISD::VP_FP_EXTEND:
7631 case ISD::VP_FP_ROUND:
7632 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7633 case ISD::VP_SINT_TO_FP:
7634 case ISD::VP_UINT_TO_FP:
7635 if (Op.getValueType().isVector() &&
7636 ((Op.getValueType().getScalarType() == MVT::f16 &&
7637 (Subtarget.hasVInstructionsF16Minimal() &&
7638 !Subtarget.hasVInstructionsF16())) ||
7639 Op.getValueType().getScalarType() == MVT::bf16)) {
7640 if (isPromotedOpNeedingSplit(Op, Subtarget))
7641 return SplitVectorOp(Op, DAG);
7642 // int -> f32
7643 SDLoc DL(Op);
7644 MVT NVT =
7645 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7646 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7647 // f32 -> [b]f16
7648 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7649 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7650 }
7651 [[fallthrough]];
7652 case ISD::VP_FP_TO_SINT:
7653 case ISD::VP_FP_TO_UINT:
7654 if (SDValue Op1 = Op.getOperand(0);
7655 Op1.getValueType().isVector() &&
7656 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7657 (Subtarget.hasVInstructionsF16Minimal() &&
7658 !Subtarget.hasVInstructionsF16())) ||
7659 Op1.getValueType().getScalarType() == MVT::bf16)) {
7660 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7661 return SplitVectorOp(Op, DAG);
7662 // [b]f16 -> f32
7663 SDLoc DL(Op);
7664 MVT NVT = MVT::getVectorVT(MVT::f32,
7665 Op1.getValueType().getVectorElementCount());
7666 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7667 // f32 -> int
7668 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7669 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7670 }
7671 return lowerVPFPIntConvOp(Op, DAG);
7672 case ISD::VP_SETCC:
7673 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7674 return SplitVPOp(Op, DAG);
7675 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7676 return lowerVPSetCCMaskOp(Op, DAG);
7677 [[fallthrough]];
7678 case ISD::VP_SMIN:
7679 case ISD::VP_SMAX:
7680 case ISD::VP_UMIN:
7681 case ISD::VP_UMAX:
7682 case ISD::VP_BITREVERSE:
7683 case ISD::VP_BSWAP:
7684 return lowerVPOp(Op, DAG);
7685 case ISD::VP_CTLZ:
7686 case ISD::VP_CTLZ_ZERO_UNDEF:
7687 if (Subtarget.hasStdExtZvbb())
7688 return lowerVPOp(Op, DAG);
7689 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7690 case ISD::VP_CTTZ:
7691 case ISD::VP_CTTZ_ZERO_UNDEF:
7692 if (Subtarget.hasStdExtZvbb())
7693 return lowerVPOp(Op, DAG);
7694 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7695 case ISD::VP_CTPOP:
7696 return lowerVPOp(Op, DAG);
7697 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7698 return lowerVPStridedLoad(Op, DAG);
7699 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7700 return lowerVPStridedStore(Op, DAG);
7701 case ISD::VP_FCEIL:
7702 case ISD::VP_FFLOOR:
7703 case ISD::VP_FRINT:
7704 case ISD::VP_FNEARBYINT:
7705 case ISD::VP_FROUND:
7706 case ISD::VP_FROUNDEVEN:
7707 case ISD::VP_FROUNDTOZERO:
7708 if (isPromotedOpNeedingSplit(Op, Subtarget))
7709 return SplitVPOp(Op, DAG);
7710 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7711 case ISD::VP_FMAXIMUM:
7712 case ISD::VP_FMINIMUM:
7713 if (isPromotedOpNeedingSplit(Op, Subtarget))
7714 return SplitVPOp(Op, DAG);
7715 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7716 case ISD::EXPERIMENTAL_VP_SPLICE:
7717 return lowerVPSpliceExperimental(Op, DAG);
7718 case ISD::EXPERIMENTAL_VP_REVERSE:
7719 return lowerVPReverseExperimental(Op, DAG);
7720 case ISD::EXPERIMENTAL_VP_SPLAT:
7721 return lowerVPSplatExperimental(Op, DAG);
7722 case ISD::CLEAR_CACHE: {
7723 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7724 "llvm.clear_cache only needs custom lower on Linux targets");
7725 SDLoc DL(Op);
7726 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7727 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7728 Op.getOperand(2), Flags, DL);
7729 }
7731 return lowerINIT_TRAMPOLINE(Op, DAG);
7733 return lowerADJUST_TRAMPOLINE(Op, DAG);
7734 }
7735}
7736
7737SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7738 SDValue Start, SDValue End,
7739 SDValue Flags, SDLoc DL) const {
7740 MakeLibCallOptions CallOptions;
7741 std::pair<SDValue, SDValue> CallResult =
7742 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7743 {Start, End, Flags}, CallOptions, DL, InChain);
7744
7745 // This function returns void so only the out chain matters.
7746 return CallResult.second;
7747}
7748
7749SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7750 SelectionDAG &DAG) const {
7751 if (!Subtarget.is64Bit())
7752 llvm::report_fatal_error("Trampolines only implemented for RV64");
7753
7754 // Create an MCCodeEmitter to encode instructions.
7756 assert(TLO);
7757 MCContext &MCCtx = TLO->getContext();
7758
7759 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7760 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7761
7762 SDValue Root = Op.getOperand(0);
7763 SDValue Trmp = Op.getOperand(1); // trampoline
7764 SDLoc dl(Op);
7765
7766 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7767
7768 // We store in the trampoline buffer the following instructions and data.
7769 // Offset:
7770 // 0: auipc t2, 0
7771 // 4: ld t0, 24(t2)
7772 // 8: ld t2, 16(t2)
7773 // 12: jalr t0
7774 // 16: <StaticChainOffset>
7775 // 24: <FunctionAddressOffset>
7776 // 32:
7777
7778 constexpr unsigned StaticChainOffset = 16;
7779 constexpr unsigned FunctionAddressOffset = 24;
7780
7782 assert(STI);
7783 auto GetEncoding = [&](const MCInst &MC) {
7786 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7787 uint32_t Encoding = support::endian::read32le(CB.data());
7788 return Encoding;
7789 };
7790
7791 SDValue OutChains[6];
7792
7793 uint32_t Encodings[] = {
7794 // auipc t2, 0
7795 // Loads the current PC into t2.
7796 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7797 // ld t0, 24(t2)
7798 // Loads the function address into t0. Note that we are using offsets
7799 // pc-relative to the first instruction of the trampoline.
7800 GetEncoding(
7801 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7802 FunctionAddressOffset)),
7803 // ld t2, 16(t2)
7804 // Load the value of the static chain.
7805 GetEncoding(
7806 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7807 StaticChainOffset)),
7808 // jalr t0
7809 // Jump to the function.
7810 GetEncoding(MCInstBuilder(RISCV::JALR)
7811 .addReg(RISCV::X0)
7812 .addReg(RISCV::X5)
7813 .addImm(0))};
7814
7815 // Store encoded instructions.
7816 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7817 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7818 DAG.getConstant(Idx * 4, dl, MVT::i64))
7819 : Trmp;
7820 OutChains[Idx] = DAG.getTruncStore(
7821 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7822 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7823 }
7824
7825 // Now store the variable part of the trampoline.
7826 SDValue FunctionAddress = Op.getOperand(2);
7827 SDValue StaticChain = Op.getOperand(3);
7828
7829 // Store the given static chain and function pointer in the trampoline buffer.
7830 struct OffsetValuePair {
7831 const unsigned Offset;
7832 const SDValue Value;
7833 SDValue Addr = SDValue(); // Used to cache the address.
7834 } OffsetValues[] = {
7835 {StaticChainOffset, StaticChain},
7836 {FunctionAddressOffset, FunctionAddress},
7837 };
7838 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7839 SDValue Addr =
7840 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7841 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7842 OffsetValue.Addr = Addr;
7843 OutChains[Idx + 4] =
7844 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7845 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7846 }
7847
7848 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7849
7850 // The end of instructions of trampoline is the same as the static chain
7851 // address that we computed earlier.
7852 SDValue EndOfTrmp = OffsetValues[0].Addr;
7853
7854 // Call clear cache on the trampoline instructions.
7855 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7856 Trmp, EndOfTrmp);
7857
7858 return Chain;
7859}
7860
7861SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7862 SelectionDAG &DAG) const {
7863 if (!Subtarget.is64Bit())
7864 llvm::report_fatal_error("Trampolines only implemented for RV64");
7865
7866 return Op.getOperand(0);
7867}
7868
7870 SelectionDAG &DAG, unsigned Flags) {
7871 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7872}
7873
7875 SelectionDAG &DAG, unsigned Flags) {
7876 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7877 Flags);
7878}
7879
7881 SelectionDAG &DAG, unsigned Flags) {
7882 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7883 N->getOffset(), Flags);
7884}
7885
7887 SelectionDAG &DAG, unsigned Flags) {
7888 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7889}
7890
7892 EVT Ty, SelectionDAG &DAG) {
7894 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7895 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7896 return DAG.getLoad(
7897 Ty, DL, DAG.getEntryNode(), LC,
7899}
7900
7902 EVT Ty, SelectionDAG &DAG) {
7904 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7905 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7906 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7907 return DAG.getLoad(
7908 Ty, DL, DAG.getEntryNode(), LC,
7910}
7911
7912template <class NodeTy>
7913SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7914 bool IsLocal, bool IsExternWeak) const {
7915 SDLoc DL(N);
7916 EVT Ty = getPointerTy(DAG.getDataLayout());
7917
7918 // When HWASAN is used and tagging of global variables is enabled
7919 // they should be accessed via the GOT, since the tagged address of a global
7920 // is incompatible with existing code models. This also applies to non-pic
7921 // mode.
7922 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7923 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7924 if (IsLocal && !Subtarget.allowTaggedGlobals())
7925 // Use PC-relative addressing to access the symbol. This generates the
7926 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7927 // %pcrel_lo(auipc)).
7928 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7929
7930 // Use PC-relative addressing to access the GOT for this symbol, then load
7931 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7932 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7933 SDValue Load =
7934 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7940 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7941 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7942 return Load;
7943 }
7944
7945 switch (getTargetMachine().getCodeModel()) {
7946 default:
7947 report_fatal_error("Unsupported code model for lowering");
7948 case CodeModel::Small: {
7949 // Generate a sequence for accessing addresses within the first 2 GiB of
7950 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7951 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7952 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7953 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7954 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7955 }
7956 case CodeModel::Medium: {
7957 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7958 if (IsExternWeak) {
7959 // An extern weak symbol may be undefined, i.e. have value 0, which may
7960 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7961 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7962 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7963 SDValue Load =
7964 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7970 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7971 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7972 return Load;
7973 }
7974
7975 // Generate a sequence for accessing addresses within any 2GiB range within
7976 // the address space. This generates the pattern (PseudoLLA sym), which
7977 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7978 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7979 }
7980 case CodeModel::Large: {
7981 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
7982 return getLargeGlobalAddress(G, DL, Ty, DAG);
7983
7984 // Using pc-relative mode for other node type.
7985 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7986 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7987 }
7988 }
7989}
7990
7991SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7994 assert(N->getOffset() == 0 && "unexpected offset in global node");
7995 const GlobalValue *GV = N->getGlobal();
7996 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7997}
7998
7999SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8000 SelectionDAG &DAG) const {
8001 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8002
8003 return getAddr(N, DAG);
8004}
8005
8006SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8007 SelectionDAG &DAG) const {
8008 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8009
8010 return getAddr(N, DAG);
8011}
8012
8013SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8014 SelectionDAG &DAG) const {
8015 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8016
8017 return getAddr(N, DAG);
8018}
8019
8020SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8021 SelectionDAG &DAG,
8022 bool UseGOT) const {
8023 SDLoc DL(N);
8024 EVT Ty = getPointerTy(DAG.getDataLayout());
8025 const GlobalValue *GV = N->getGlobal();
8026 MVT XLenVT = Subtarget.getXLenVT();
8027
8028 if (UseGOT) {
8029 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8030 // load the address from the GOT and add the thread pointer. This generates
8031 // the pattern (PseudoLA_TLS_IE sym), which expands to
8032 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8033 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8034 SDValue Load =
8035 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8041 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8042 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8043
8044 // Add the thread pointer.
8045 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8046 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8047 }
8048
8049 // Generate a sequence for accessing the address relative to the thread
8050 // pointer, with the appropriate adjustment for the thread pointer offset.
8051 // This generates the pattern
8052 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8053 SDValue AddrHi =
8055 SDValue AddrAdd =
8057 SDValue AddrLo =
8059
8060 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8061 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8062 SDValue MNAdd =
8063 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8064 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8065}
8066
8067SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8068 SelectionDAG &DAG) const {
8069 SDLoc DL(N);
8070 EVT Ty = getPointerTy(DAG.getDataLayout());
8071 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8072 const GlobalValue *GV = N->getGlobal();
8073
8074 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8075 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8076 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8077 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8078 SDValue Load =
8079 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8080
8081 // Prepare argument list to generate call.
8083 ArgListEntry Entry;
8084 Entry.Node = Load;
8085 Entry.Ty = CallTy;
8086 Args.push_back(Entry);
8087
8088 // Setup call to __tls_get_addr.
8090 CLI.setDebugLoc(DL)
8091 .setChain(DAG.getEntryNode())
8092 .setLibCallee(CallingConv::C, CallTy,
8093 DAG.getExternalSymbol("__tls_get_addr", Ty),
8094 std::move(Args));
8095
8096 return LowerCallTo(CLI).first;
8097}
8098
8099SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8100 SelectionDAG &DAG) const {
8101 SDLoc DL(N);
8102 EVT Ty = getPointerTy(DAG.getDataLayout());
8103 const GlobalValue *GV = N->getGlobal();
8104
8105 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8106 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8107 //
8108 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8109 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8110 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8111 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8112 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8113 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8114}
8115
8116SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8117 SelectionDAG &DAG) const {
8118 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8119 assert(N->getOffset() == 0 && "unexpected offset in global node");
8120
8121 if (DAG.getTarget().useEmulatedTLS())
8122 return LowerToTLSEmulatedModel(N, DAG);
8123
8125
8128 report_fatal_error("In GHC calling convention TLS is not supported");
8129
8130 SDValue Addr;
8131 switch (Model) {
8133 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8134 break;
8136 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8137 break;
8140 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8141 : getDynamicTLSAddr(N, DAG);
8142 break;
8143 }
8144
8145 return Addr;
8146}
8147
8148// Return true if Val is equal to (setcc LHS, RHS, CC).
8149// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8150// Otherwise, return std::nullopt.
8151static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8152 ISD::CondCode CC, SDValue Val) {
8153 assert(Val->getOpcode() == ISD::SETCC);
8154 SDValue LHS2 = Val.getOperand(0);
8155 SDValue RHS2 = Val.getOperand(1);
8156 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8157
8158 if (LHS == LHS2 && RHS == RHS2) {
8159 if (CC == CC2)
8160 return true;
8161 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8162 return false;
8163 } else if (LHS == RHS2 && RHS == LHS2) {
8165 if (CC == CC2)
8166 return true;
8167 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8168 return false;
8169 }
8170
8171 return std::nullopt;
8172}
8173
8175 const RISCVSubtarget &Subtarget) {
8176 SDValue CondV = N->getOperand(0);
8177 SDValue TrueV = N->getOperand(1);
8178 SDValue FalseV = N->getOperand(2);
8179 MVT VT = N->getSimpleValueType(0);
8180 SDLoc DL(N);
8181
8182 if (!Subtarget.hasConditionalMoveFusion()) {
8183 // (select c, -1, y) -> -c | y
8184 if (isAllOnesConstant(TrueV)) {
8185 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8186 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8187 }
8188 // (select c, y, -1) -> (c-1) | y
8189 if (isAllOnesConstant(FalseV)) {
8190 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8191 DAG.getAllOnesConstant(DL, VT));
8192 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8193 }
8194
8195 // (select c, 0, y) -> (c-1) & y
8196 if (isNullConstant(TrueV)) {
8197 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8198 DAG.getAllOnesConstant(DL, VT));
8199 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8200 }
8201 // (select c, y, 0) -> -c & y
8202 if (isNullConstant(FalseV)) {
8203 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8204 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8205 }
8206 }
8207
8208 // select c, ~x, x --> xor -c, x
8209 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8210 const APInt &TrueVal = TrueV->getAsAPIntVal();
8211 const APInt &FalseVal = FalseV->getAsAPIntVal();
8212 if (~TrueVal == FalseVal) {
8213 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8214 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8215 }
8216 }
8217
8218 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8219 // when both truev and falsev are also setcc.
8220 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8221 FalseV.getOpcode() == ISD::SETCC) {
8222 SDValue LHS = CondV.getOperand(0);
8223 SDValue RHS = CondV.getOperand(1);
8224 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8225
8226 // (select x, x, y) -> x | y
8227 // (select !x, x, y) -> x & y
8228 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8229 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8230 DAG.getFreeze(FalseV));
8231 }
8232 // (select x, y, x) -> x & y
8233 // (select !x, y, x) -> x | y
8234 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8235 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8236 DAG.getFreeze(TrueV), FalseV);
8237 }
8238 }
8239
8240 return SDValue();
8241}
8242
8243// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8244// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8245// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8246// being `0` or `-1`. In such cases we can replace `select` with `and`.
8247// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8248// than `c0`?
8249static SDValue
8251 const RISCVSubtarget &Subtarget) {
8252 if (Subtarget.hasShortForwardBranchOpt())
8253 return SDValue();
8254
8255 unsigned SelOpNo = 0;
8256 SDValue Sel = BO->getOperand(0);
8257 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8258 SelOpNo = 1;
8259 Sel = BO->getOperand(1);
8260 }
8261
8262 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8263 return SDValue();
8264
8265 unsigned ConstSelOpNo = 1;
8266 unsigned OtherSelOpNo = 2;
8267 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8268 ConstSelOpNo = 2;
8269 OtherSelOpNo = 1;
8270 }
8271 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8272 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8273 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8274 return SDValue();
8275
8276 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8277 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8278 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8279 return SDValue();
8280
8281 SDLoc DL(Sel);
8282 EVT VT = BO->getValueType(0);
8283
8284 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8285 if (SelOpNo == 1)
8286 std::swap(NewConstOps[0], NewConstOps[1]);
8287
8288 SDValue NewConstOp =
8289 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8290 if (!NewConstOp)
8291 return SDValue();
8292
8293 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8294 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8295 return SDValue();
8296
8297 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8298 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8299 if (SelOpNo == 1)
8300 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8301 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8302
8303 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8304 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8305 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8306}
8307
8308SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8309 SDValue CondV = Op.getOperand(0);
8310 SDValue TrueV = Op.getOperand(1);
8311 SDValue FalseV = Op.getOperand(2);
8312 SDLoc DL(Op);
8313 MVT VT = Op.getSimpleValueType();
8314 MVT XLenVT = Subtarget.getXLenVT();
8315
8316 // Lower vector SELECTs to VSELECTs by splatting the condition.
8317 if (VT.isVector()) {
8318 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8319 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8320 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8321 }
8322
8323 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8324 // nodes to implement the SELECT. Performing the lowering here allows for
8325 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8326 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8327 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8328 VT.isScalarInteger()) {
8329 // (select c, t, 0) -> (czero_eqz t, c)
8330 if (isNullConstant(FalseV))
8331 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8332 // (select c, 0, f) -> (czero_nez f, c)
8333 if (isNullConstant(TrueV))
8334 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8335
8336 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8337 if (TrueV.getOpcode() == ISD::AND &&
8338 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8339 return DAG.getNode(
8340 ISD::OR, DL, VT, TrueV,
8341 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8342 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8343 if (FalseV.getOpcode() == ISD::AND &&
8344 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8345 return DAG.getNode(
8346 ISD::OR, DL, VT, FalseV,
8347 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8348
8349 // Try some other optimizations before falling back to generic lowering.
8350 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8351 return V;
8352
8353 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8354 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8355 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8356 const APInt &TrueVal = TrueV->getAsAPIntVal();
8357 const APInt &FalseVal = FalseV->getAsAPIntVal();
8358 const int TrueValCost = RISCVMatInt::getIntMatCost(
8359 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8360 const int FalseValCost = RISCVMatInt::getIntMatCost(
8361 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8362 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8363 SDValue LHSVal = DAG.getConstant(
8364 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8365 SDValue RHSVal =
8366 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8367 SDValue CMOV =
8369 DL, VT, LHSVal, CondV);
8370 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8371 }
8372
8373 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8374 // Unless we have the short forward branch optimization.
8375 if (!Subtarget.hasConditionalMoveFusion())
8376 return DAG.getNode(
8377 ISD::OR, DL, VT,
8378 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8379 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8380 }
8381
8382 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8383 return V;
8384
8385 if (Op.hasOneUse()) {
8386 unsigned UseOpc = Op->user_begin()->getOpcode();
8387 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8388 SDNode *BinOp = *Op->user_begin();
8389 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8390 DAG, Subtarget)) {
8391 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8392 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8393 // may return a constant node and cause crash in lowerSELECT.
8394 if (NewSel.getOpcode() == ISD::SELECT)
8395 return lowerSELECT(NewSel, DAG);
8396 return NewSel;
8397 }
8398 }
8399 }
8400
8401 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8402 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8403 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8404 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8405 if (FPTV && FPFV) {
8406 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8407 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8408 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8409 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8410 DAG.getConstant(1, DL, XLenVT));
8411 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8412 }
8413 }
8414
8415 // If the condition is not an integer SETCC which operates on XLenVT, we need
8416 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8417 // (select condv, truev, falsev)
8418 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8419 if (CondV.getOpcode() != ISD::SETCC ||
8420 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8421 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8422 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8423
8424 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8425
8426 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8427 }
8428
8429 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8430 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8431 // advantage of the integer compare+branch instructions. i.e.:
8432 // (select (setcc lhs, rhs, cc), truev, falsev)
8433 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8434 SDValue LHS = CondV.getOperand(0);
8435 SDValue RHS = CondV.getOperand(1);
8436 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8437
8438 // Special case for a select of 2 constants that have a diffence of 1.
8439 // Normally this is done by DAGCombine, but if the select is introduced by
8440 // type legalization or op legalization, we miss it. Restricting to SETLT
8441 // case for now because that is what signed saturating add/sub need.
8442 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8443 // but we would probably want to swap the true/false values if the condition
8444 // is SETGE/SETLE to avoid an XORI.
8445 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8446 CCVal == ISD::SETLT) {
8447 const APInt &TrueVal = TrueV->getAsAPIntVal();
8448 const APInt &FalseVal = FalseV->getAsAPIntVal();
8449 if (TrueVal - 1 == FalseVal)
8450 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8451 if (TrueVal + 1 == FalseVal)
8452 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8453 }
8454
8455 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8456 // 1 < x ? x : 1 -> 0 < x ? x : 1
8457 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8458 RHS == TrueV && LHS == FalseV) {
8459 LHS = DAG.getConstant(0, DL, VT);
8460 // 0 <u x is the same as x != 0.
8461 if (CCVal == ISD::SETULT) {
8462 std::swap(LHS, RHS);
8463 CCVal = ISD::SETNE;
8464 }
8465 }
8466
8467 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8468 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8469 RHS == FalseV) {
8470 RHS = DAG.getConstant(0, DL, VT);
8471 }
8472
8473 SDValue TargetCC = DAG.getCondCode(CCVal);
8474
8475 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8476 // (select (setcc lhs, rhs, CC), constant, falsev)
8477 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8478 std::swap(TrueV, FalseV);
8479 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8480 }
8481
8482 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8483 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8484}
8485
8486SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8487 SDValue CondV = Op.getOperand(1);
8488 SDLoc DL(Op);
8489 MVT XLenVT = Subtarget.getXLenVT();
8490
8491 if (CondV.getOpcode() == ISD::SETCC &&
8492 CondV.getOperand(0).getValueType() == XLenVT) {
8493 SDValue LHS = CondV.getOperand(0);
8494 SDValue RHS = CondV.getOperand(1);
8495 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8496
8497 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8498
8499 SDValue TargetCC = DAG.getCondCode(CCVal);
8500 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8501 LHS, RHS, TargetCC, Op.getOperand(2));
8502 }
8503
8504 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8505 CondV, DAG.getConstant(0, DL, XLenVT),
8506 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8507}
8508
8509SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8512
8513 SDLoc DL(Op);
8514 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8516
8517 // vastart just stores the address of the VarArgsFrameIndex slot into the
8518 // memory location argument.
8519 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8520 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8521 MachinePointerInfo(SV));
8522}
8523
8524SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8525 SelectionDAG &DAG) const {
8526 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8528 MachineFrameInfo &MFI = MF.getFrameInfo();
8529 MFI.setFrameAddressIsTaken(true);
8530 Register FrameReg = RI.getFrameRegister(MF);
8531 int XLenInBytes = Subtarget.getXLen() / 8;
8532
8533 EVT VT = Op.getValueType();
8534 SDLoc DL(Op);
8535 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8536 unsigned Depth = Op.getConstantOperandVal(0);
8537 while (Depth--) {
8538 int Offset = -(XLenInBytes * 2);
8539 SDValue Ptr = DAG.getNode(
8540 ISD::ADD, DL, VT, FrameAddr,
8542 FrameAddr =
8543 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8544 }
8545 return FrameAddr;
8546}
8547
8548SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8549 SelectionDAG &DAG) const {
8550 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8552 MachineFrameInfo &MFI = MF.getFrameInfo();
8553 MFI.setReturnAddressIsTaken(true);
8554 MVT XLenVT = Subtarget.getXLenVT();
8555 int XLenInBytes = Subtarget.getXLen() / 8;
8556
8558 return SDValue();
8559
8560 EVT VT = Op.getValueType();
8561 SDLoc DL(Op);
8562 unsigned Depth = Op.getConstantOperandVal(0);
8563 if (Depth) {
8564 int Off = -XLenInBytes;
8565 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8566 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8567 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8568 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8570 }
8571
8572 // Return the value of the return address register, marking it an implicit
8573 // live-in.
8574 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8575 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8576}
8577
8578SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8579 SelectionDAG &DAG) const {
8580 SDLoc DL(Op);
8581 SDValue Lo = Op.getOperand(0);
8582 SDValue Hi = Op.getOperand(1);
8583 SDValue Shamt = Op.getOperand(2);
8584 EVT VT = Lo.getValueType();
8585
8586 // if Shamt-XLEN < 0: // Shamt < XLEN
8587 // Lo = Lo << Shamt
8588 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8589 // else:
8590 // Lo = 0
8591 // Hi = Lo << (Shamt-XLEN)
8592
8593 SDValue Zero = DAG.getConstant(0, DL, VT);
8594 SDValue One = DAG.getConstant(1, DL, VT);
8595 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8596 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8597 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8598 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8599
8600 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8601 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8602 SDValue ShiftRightLo =
8603 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8604 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8605 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8606 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8607
8608 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8609
8610 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8611 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8612
8613 SDValue Parts[2] = {Lo, Hi};
8614 return DAG.getMergeValues(Parts, DL);
8615}
8616
8617SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8618 bool IsSRA) const {
8619 SDLoc DL(Op);
8620 SDValue Lo = Op.getOperand(0);
8621 SDValue Hi = Op.getOperand(1);
8622 SDValue Shamt = Op.getOperand(2);
8623 EVT VT = Lo.getValueType();
8624
8625 // SRA expansion:
8626 // if Shamt-XLEN < 0: // Shamt < XLEN
8627 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8628 // Hi = Hi >>s Shamt
8629 // else:
8630 // Lo = Hi >>s (Shamt-XLEN);
8631 // Hi = Hi >>s (XLEN-1)
8632 //
8633 // SRL expansion:
8634 // if Shamt-XLEN < 0: // Shamt < XLEN
8635 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8636 // Hi = Hi >>u Shamt
8637 // else:
8638 // Lo = Hi >>u (Shamt-XLEN);
8639 // Hi = 0;
8640
8641 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8642
8643 SDValue Zero = DAG.getConstant(0, DL, VT);
8644 SDValue One = DAG.getConstant(1, DL, VT);
8645 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8646 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8647 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8648 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8649
8650 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8651 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8652 SDValue ShiftLeftHi =
8653 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8654 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8655 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8656 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8657 SDValue HiFalse =
8658 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8659
8660 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8661
8662 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8663 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8664
8665 SDValue Parts[2] = {Lo, Hi};
8666 return DAG.getMergeValues(Parts, DL);
8667}
8668
8669// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8670// legal equivalently-sized i8 type, so we can use that as a go-between.
8671SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8672 SelectionDAG &DAG) const {
8673 SDLoc DL(Op);
8674 MVT VT = Op.getSimpleValueType();
8675 SDValue SplatVal = Op.getOperand(0);
8676 // All-zeros or all-ones splats are handled specially.
8677 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8678 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8679 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8680 }
8681 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8682 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8683 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8684 }
8685 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8686 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8687 DAG.getConstant(1, DL, SplatVal.getValueType()));
8688 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8689 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8690 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8691}
8692
8693// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8694// illegal (currently only vXi64 RV32).
8695// FIXME: We could also catch non-constant sign-extended i32 values and lower
8696// them to VMV_V_X_VL.
8697SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8698 SelectionDAG &DAG) const {
8699 SDLoc DL(Op);
8700 MVT VecVT = Op.getSimpleValueType();
8701 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8702 "Unexpected SPLAT_VECTOR_PARTS lowering");
8703
8704 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8705 SDValue Lo = Op.getOperand(0);
8706 SDValue Hi = Op.getOperand(1);
8707
8708 MVT ContainerVT = VecVT;
8709 if (VecVT.isFixedLengthVector())
8710 ContainerVT = getContainerForFixedLengthVector(VecVT);
8711
8712 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8713
8714 SDValue Res =
8715 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8716
8717 if (VecVT.isFixedLengthVector())
8718 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8719
8720 return Res;
8721}
8722
8723// Custom-lower extensions from mask vectors by using a vselect either with 1
8724// for zero/any-extension or -1 for sign-extension:
8725// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8726// Note that any-extension is lowered identically to zero-extension.
8727SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8728 int64_t ExtTrueVal) const {
8729 SDLoc DL(Op);
8730 MVT VecVT = Op.getSimpleValueType();
8731 SDValue Src = Op.getOperand(0);
8732 // Only custom-lower extensions from mask types
8733 assert(Src.getValueType().isVector() &&
8734 Src.getValueType().getVectorElementType() == MVT::i1);
8735
8736 if (VecVT.isScalableVector()) {
8737 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8738 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8739 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8740 }
8741
8742 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8743 MVT I1ContainerVT =
8744 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8745
8746 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8747
8748 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8749
8750 MVT XLenVT = Subtarget.getXLenVT();
8751 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8752 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8753
8754 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8755 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8756 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8757 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8758 SDValue Select =
8759 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8760 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8761
8762 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8763}
8764
8765SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8766 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8767 MVT ExtVT = Op.getSimpleValueType();
8768 // Only custom-lower extensions from fixed-length vector types.
8769 if (!ExtVT.isFixedLengthVector())
8770 return Op;
8771 MVT VT = Op.getOperand(0).getSimpleValueType();
8772 // Grab the canonical container type for the extended type. Infer the smaller
8773 // type from that to ensure the same number of vector elements, as we know
8774 // the LMUL will be sufficient to hold the smaller type.
8775 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8776 // Get the extended container type manually to ensure the same number of
8777 // vector elements between source and dest.
8778 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8779 ContainerExtVT.getVectorElementCount());
8780
8781 SDValue Op1 =
8782 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8783
8784 SDLoc DL(Op);
8785 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8786
8787 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8788
8789 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8790}
8791
8792// Custom-lower truncations from vectors to mask vectors by using a mask and a
8793// setcc operation:
8794// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8795SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8796 SelectionDAG &DAG) const {
8797 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8798 SDLoc DL(Op);
8799 EVT MaskVT = Op.getValueType();
8800 // Only expect to custom-lower truncations to mask types
8801 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8802 "Unexpected type for vector mask lowering");
8803 SDValue Src = Op.getOperand(0);
8804 MVT VecVT = Src.getSimpleValueType();
8805 SDValue Mask, VL;
8806 if (IsVPTrunc) {
8807 Mask = Op.getOperand(1);
8808 VL = Op.getOperand(2);
8809 }
8810 // If this is a fixed vector, we need to convert it to a scalable vector.
8811 MVT ContainerVT = VecVT;
8812
8813 if (VecVT.isFixedLengthVector()) {
8814 ContainerVT = getContainerForFixedLengthVector(VecVT);
8815 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8816 if (IsVPTrunc) {
8817 MVT MaskContainerVT =
8818 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8819 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8820 }
8821 }
8822
8823 if (!IsVPTrunc) {
8824 std::tie(Mask, VL) =
8825 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8826 }
8827
8828 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8829 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8830
8831 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8832 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8833 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8834 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8835
8836 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8837 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8838 DAG.getUNDEF(ContainerVT), Mask, VL);
8839 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8840 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8841 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8842 if (MaskVT.isFixedLengthVector())
8843 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8844 return Trunc;
8845}
8846
8847SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8848 SelectionDAG &DAG) const {
8849 unsigned Opc = Op.getOpcode();
8850 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8851 SDLoc DL(Op);
8852
8853 MVT VT = Op.getSimpleValueType();
8854 // Only custom-lower vector truncates
8855 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8856
8857 // Truncates to mask types are handled differently
8858 if (VT.getVectorElementType() == MVT::i1)
8859 return lowerVectorMaskTruncLike(Op, DAG);
8860
8861 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8862 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8863 // truncate by one power of two at a time.
8864 MVT DstEltVT = VT.getVectorElementType();
8865
8866 SDValue Src = Op.getOperand(0);
8867 MVT SrcVT = Src.getSimpleValueType();
8868 MVT SrcEltVT = SrcVT.getVectorElementType();
8869
8870 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8871 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8872 "Unexpected vector truncate lowering");
8873
8874 MVT ContainerVT = SrcVT;
8875 SDValue Mask, VL;
8876 if (IsVPTrunc) {
8877 Mask = Op.getOperand(1);
8878 VL = Op.getOperand(2);
8879 }
8880 if (SrcVT.isFixedLengthVector()) {
8881 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8882 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8883 if (IsVPTrunc) {
8884 MVT MaskVT = getMaskTypeFor(ContainerVT);
8885 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8886 }
8887 }
8888
8889 SDValue Result = Src;
8890 if (!IsVPTrunc) {
8891 std::tie(Mask, VL) =
8892 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8893 }
8894
8895 unsigned NewOpc;
8896 if (Opc == ISD::TRUNCATE_SSAT_S)
8898 else if (Opc == ISD::TRUNCATE_USAT_U)
8900 else
8902
8903 do {
8904 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8905 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8906 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8907 } while (SrcEltVT != DstEltVT);
8908
8909 if (SrcVT.isFixedLengthVector())
8910 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8911
8912 return Result;
8913}
8914
8915SDValue
8916RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8917 SelectionDAG &DAG) const {
8918 SDLoc DL(Op);
8919 SDValue Chain = Op.getOperand(0);
8920 SDValue Src = Op.getOperand(1);
8921 MVT VT = Op.getSimpleValueType();
8922 MVT SrcVT = Src.getSimpleValueType();
8923 MVT ContainerVT = VT;
8924 if (VT.isFixedLengthVector()) {
8925 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8926 ContainerVT =
8927 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8928 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8929 }
8930
8931 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8932
8933 // RVV can only widen/truncate fp to types double/half the size as the source.
8934 if ((VT.getVectorElementType() == MVT::f64 &&
8935 (SrcVT.getVectorElementType() == MVT::f16 ||
8936 SrcVT.getVectorElementType() == MVT::bf16)) ||
8937 ((VT.getVectorElementType() == MVT::f16 ||
8938 VT.getVectorElementType() == MVT::bf16) &&
8939 SrcVT.getVectorElementType() == MVT::f64)) {
8940 // For double rounding, the intermediate rounding should be round-to-odd.
8941 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8944 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8945 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8946 Chain, Src, Mask, VL);
8947 Chain = Src.getValue(1);
8948 }
8949
8950 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8953 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8954 Chain, Src, Mask, VL);
8955 if (VT.isFixedLengthVector()) {
8956 // StrictFP operations have two result values. Their lowered result should
8957 // have same result count.
8958 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8959 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8960 }
8961 return Res;
8962}
8963
8964SDValue
8965RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8966 SelectionDAG &DAG) const {
8967 bool IsVP =
8968 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8969 bool IsExtend =
8970 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8971 // RVV can only do truncate fp to types half the size as the source. We
8972 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8973 // conversion instruction.
8974 SDLoc DL(Op);
8975 MVT VT = Op.getSimpleValueType();
8976
8977 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8978
8979 SDValue Src = Op.getOperand(0);
8980 MVT SrcVT = Src.getSimpleValueType();
8981
8982 bool IsDirectExtend =
8983 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8984 (SrcVT.getVectorElementType() != MVT::f16 &&
8985 SrcVT.getVectorElementType() != MVT::bf16));
8986 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8987 VT.getVectorElementType() != MVT::bf16) ||
8988 SrcVT.getVectorElementType() != MVT::f64);
8989
8990 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8991
8992 // Prepare any fixed-length vector operands.
8993 MVT ContainerVT = VT;
8994 SDValue Mask, VL;
8995 if (IsVP) {
8996 Mask = Op.getOperand(1);
8997 VL = Op.getOperand(2);
8998 }
8999 if (VT.isFixedLengthVector()) {
9000 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9001 ContainerVT =
9002 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9003 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9004 if (IsVP) {
9005 MVT MaskVT = getMaskTypeFor(ContainerVT);
9006 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9007 }
9008 }
9009
9010 if (!IsVP)
9011 std::tie(Mask, VL) =
9012 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9013
9014 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9015
9016 if (IsDirectConv) {
9017 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9018 if (VT.isFixedLengthVector())
9019 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9020 return Src;
9021 }
9022
9023 unsigned InterConvOpc =
9025
9026 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9027 SDValue IntermediateConv =
9028 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9029 SDValue Result =
9030 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9031 if (VT.isFixedLengthVector())
9032 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9033 return Result;
9034}
9035
9036// Given a scalable vector type and an index into it, returns the type for the
9037// smallest subvector that the index fits in. This can be used to reduce LMUL
9038// for operations like vslidedown.
9039//
9040// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9041static std::optional<MVT>
9042getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9043 const RISCVSubtarget &Subtarget) {
9044 assert(VecVT.isScalableVector());
9045 const unsigned EltSize = VecVT.getScalarSizeInBits();
9046 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9047 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9048 MVT SmallerVT;
9049 if (MaxIdx < MinVLMAX)
9050 SmallerVT = getLMUL1VT(VecVT);
9051 else if (MaxIdx < MinVLMAX * 2)
9052 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9053 else if (MaxIdx < MinVLMAX * 4)
9054 SmallerVT = getLMUL1VT(VecVT)
9057 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9058 return std::nullopt;
9059 return SmallerVT;
9060}
9061
9062// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9063// first position of a vector, and that vector is slid up to the insert index.
9064// By limiting the active vector length to index+1 and merging with the
9065// original vector (with an undisturbed tail policy for elements >= VL), we
9066// achieve the desired result of leaving all elements untouched except the one
9067// at VL-1, which is replaced with the desired value.
9068SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9069 SelectionDAG &DAG) const {
9070 SDLoc DL(Op);
9071 MVT VecVT = Op.getSimpleValueType();
9072 MVT XLenVT = Subtarget.getXLenVT();
9073 SDValue Vec = Op.getOperand(0);
9074 SDValue Val = Op.getOperand(1);
9075 MVT ValVT = Val.getSimpleValueType();
9076 SDValue Idx = Op.getOperand(2);
9077
9078 if (VecVT.getVectorElementType() == MVT::i1) {
9079 // FIXME: For now we just promote to an i8 vector and insert into that,
9080 // but this is probably not optimal.
9081 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9082 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9083 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9084 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9085 }
9086
9087 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9088 ValVT == MVT::bf16) {
9089 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9090 MVT IntVT = VecVT.changeTypeToInteger();
9091 SDValue IntInsert = DAG.getNode(
9092 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9093 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9094 return DAG.getBitcast(VecVT, IntInsert);
9095 }
9096
9097 MVT ContainerVT = VecVT;
9098 // If the operand is a fixed-length vector, convert to a scalable one.
9099 if (VecVT.isFixedLengthVector()) {
9100 ContainerVT = getContainerForFixedLengthVector(VecVT);
9101 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9102 }
9103
9104 // If we know the index we're going to insert at, we can shrink Vec so that
9105 // we're performing the scalar inserts and slideup on a smaller LMUL.
9106 MVT OrigContainerVT = ContainerVT;
9107 SDValue OrigVec = Vec;
9108 SDValue AlignedIdx;
9109 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9110 const unsigned OrigIdx = IdxC->getZExtValue();
9111 // Do we know an upper bound on LMUL?
9112 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9113 DL, DAG, Subtarget)) {
9114 ContainerVT = *ShrunkVT;
9115 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9116 }
9117
9118 // If we're compiling for an exact VLEN value, we can always perform
9119 // the insert in m1 as we can determine the register corresponding to
9120 // the index in the register group.
9121 const MVT M1VT = getLMUL1VT(ContainerVT);
9122 if (auto VLEN = Subtarget.getRealVLen();
9123 VLEN && ContainerVT.bitsGT(M1VT)) {
9124 EVT ElemVT = VecVT.getVectorElementType();
9125 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9126 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9127 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9128 unsigned ExtractIdx =
9129 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9130 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9131 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9132 ContainerVT = M1VT;
9133 }
9134
9135 if (AlignedIdx)
9136 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9137 AlignedIdx);
9138 }
9139
9140 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9141 // Even i64-element vectors on RV32 can be lowered without scalar
9142 // legalization if the most-significant 32 bits of the value are not affected
9143 // by the sign-extension of the lower 32 bits.
9144 // TODO: We could also catch sign extensions of a 32-bit value.
9145 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9146 const auto *CVal = cast<ConstantSDNode>(Val);
9147 if (isInt<32>(CVal->getSExtValue())) {
9148 IsLegalInsert = true;
9149 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9150 }
9151 }
9152
9153 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9154
9155 SDValue ValInVec;
9156
9157 if (IsLegalInsert) {
9158 unsigned Opc =
9160 if (isNullConstant(Idx)) {
9161 if (!VecVT.isFloatingPoint())
9162 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9163 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9164
9165 if (AlignedIdx)
9166 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9167 Vec, AlignedIdx);
9168 if (!VecVT.isFixedLengthVector())
9169 return Vec;
9170 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9171 }
9172 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9173 } else {
9174 // On RV32, i64-element vectors must be specially handled to place the
9175 // value at element 0, by using two vslide1down instructions in sequence on
9176 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9177 // this.
9178 SDValue ValLo, ValHi;
9179 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9180 MVT I32ContainerVT =
9181 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9182 SDValue I32Mask =
9183 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9184 // Limit the active VL to two.
9185 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9186 // If the Idx is 0 we can insert directly into the vector.
9187 if (isNullConstant(Idx)) {
9188 // First slide in the lo value, then the hi in above it. We use slide1down
9189 // to avoid the register group overlap constraint of vslide1up.
9190 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9191 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9192 // If the source vector is undef don't pass along the tail elements from
9193 // the previous slide1down.
9194 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9195 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9196 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9197 // Bitcast back to the right container type.
9198 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9199
9200 if (AlignedIdx)
9201 ValInVec =
9202 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9203 ValInVec, AlignedIdx);
9204 if (!VecVT.isFixedLengthVector())
9205 return ValInVec;
9206 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9207 }
9208
9209 // First slide in the lo value, then the hi in above it. We use slide1down
9210 // to avoid the register group overlap constraint of vslide1up.
9211 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9212 DAG.getUNDEF(I32ContainerVT),
9213 DAG.getUNDEF(I32ContainerVT), ValLo,
9214 I32Mask, InsertI64VL);
9215 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9216 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9217 I32Mask, InsertI64VL);
9218 // Bitcast back to the right container type.
9219 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9220 }
9221
9222 // Now that the value is in a vector, slide it into position.
9223 SDValue InsertVL =
9224 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9225
9226 // Use tail agnostic policy if Idx is the last index of Vec.
9228 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9229 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9230 Policy = RISCVII::TAIL_AGNOSTIC;
9231 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9232 Idx, Mask, InsertVL, Policy);
9233
9234 if (AlignedIdx)
9235 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9236 Slideup, AlignedIdx);
9237 if (!VecVT.isFixedLengthVector())
9238 return Slideup;
9239 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9240}
9241
9242// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9243// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9244// types this is done using VMV_X_S to allow us to glean information about the
9245// sign bits of the result.
9246SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9247 SelectionDAG &DAG) const {
9248 SDLoc DL(Op);
9249 SDValue Idx = Op.getOperand(1);
9250 SDValue Vec = Op.getOperand(0);
9251 EVT EltVT = Op.getValueType();
9252 MVT VecVT = Vec.getSimpleValueType();
9253 MVT XLenVT = Subtarget.getXLenVT();
9254
9255 if (VecVT.getVectorElementType() == MVT::i1) {
9256 // Use vfirst.m to extract the first bit.
9257 if (isNullConstant(Idx)) {
9258 MVT ContainerVT = VecVT;
9259 if (VecVT.isFixedLengthVector()) {
9260 ContainerVT = getContainerForFixedLengthVector(VecVT);
9261 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9262 }
9263 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9264 SDValue Vfirst =
9265 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9266 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9267 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9268 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9269 }
9270 if (VecVT.isFixedLengthVector()) {
9271 unsigned NumElts = VecVT.getVectorNumElements();
9272 if (NumElts >= 8) {
9273 MVT WideEltVT;
9274 unsigned WidenVecLen;
9275 SDValue ExtractElementIdx;
9276 SDValue ExtractBitIdx;
9277 unsigned MaxEEW = Subtarget.getELen();
9278 MVT LargestEltVT = MVT::getIntegerVT(
9279 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9280 if (NumElts <= LargestEltVT.getSizeInBits()) {
9281 assert(isPowerOf2_32(NumElts) &&
9282 "the number of elements should be power of 2");
9283 WideEltVT = MVT::getIntegerVT(NumElts);
9284 WidenVecLen = 1;
9285 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9286 ExtractBitIdx = Idx;
9287 } else {
9288 WideEltVT = LargestEltVT;
9289 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9290 // extract element index = index / element width
9291 ExtractElementIdx = DAG.getNode(
9292 ISD::SRL, DL, XLenVT, Idx,
9293 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9294 // mask bit index = index % element width
9295 ExtractBitIdx = DAG.getNode(
9296 ISD::AND, DL, XLenVT, Idx,
9297 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9298 }
9299 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9300 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9301 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9302 Vec, ExtractElementIdx);
9303 // Extract the bit from GPR.
9304 SDValue ShiftRight =
9305 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9306 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9307 DAG.getConstant(1, DL, XLenVT));
9308 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9309 }
9310 }
9311 // Otherwise, promote to an i8 vector and extract from that.
9312 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9313 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9314 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9315 }
9316
9317 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9318 EltVT == MVT::bf16) {
9319 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9320 MVT IntVT = VecVT.changeTypeToInteger();
9321 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9322 SDValue IntExtract =
9323 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9324 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9325 }
9326
9327 // If this is a fixed vector, we need to convert it to a scalable vector.
9328 MVT ContainerVT = VecVT;
9329 if (VecVT.isFixedLengthVector()) {
9330 ContainerVT = getContainerForFixedLengthVector(VecVT);
9331 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9332 }
9333
9334 // If we're compiling for an exact VLEN value and we have a known
9335 // constant index, we can always perform the extract in m1 (or
9336 // smaller) as we can determine the register corresponding to
9337 // the index in the register group.
9338 const auto VLen = Subtarget.getRealVLen();
9339 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9340 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9341 MVT M1VT = getLMUL1VT(ContainerVT);
9342 unsigned OrigIdx = IdxC->getZExtValue();
9343 EVT ElemVT = VecVT.getVectorElementType();
9344 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9345 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9346 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9347 unsigned ExtractIdx =
9348 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9349 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9350 DAG.getVectorIdxConstant(ExtractIdx, DL));
9351 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9352 ContainerVT = M1VT;
9353 }
9354
9355 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9356 // contains our index.
9357 std::optional<uint64_t> MaxIdx;
9358 if (VecVT.isFixedLengthVector())
9359 MaxIdx = VecVT.getVectorNumElements() - 1;
9360 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9361 MaxIdx = IdxC->getZExtValue();
9362 if (MaxIdx) {
9363 if (auto SmallerVT =
9364 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9365 ContainerVT = *SmallerVT;
9366 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9367 DAG.getConstant(0, DL, XLenVT));
9368 }
9369 }
9370
9371 // If after narrowing, the required slide is still greater than LMUL2,
9372 // fallback to generic expansion and go through the stack. This is done
9373 // for a subtle reason: extracting *all* elements out of a vector is
9374 // widely expected to be linear in vector size, but because vslidedown
9375 // is linear in LMUL, performing N extracts using vslidedown becomes
9376 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9377 // seems to have the same problem (the store is linear in LMUL), but the
9378 // generic expansion *memoizes* the store, and thus for many extracts of
9379 // the same vector we end up with one store and a bunch of loads.
9380 // TODO: We don't have the same code for insert_vector_elt because we
9381 // have BUILD_VECTOR and handle the degenerate case there. Should we
9382 // consider adding an inverse BUILD_VECTOR node?
9383 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9384 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9385 return SDValue();
9386
9387 // If the index is 0, the vector is already in the right position.
9388 if (!isNullConstant(Idx)) {
9389 // Use a VL of 1 to avoid processing more elements than we need.
9390 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9391 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9392 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9393 }
9394
9395 if (!EltVT.isInteger()) {
9396 // Floating-point extracts are handled in TableGen.
9397 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9398 DAG.getVectorIdxConstant(0, DL));
9399 }
9400
9401 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9402 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9403}
9404
9405// Some RVV intrinsics may claim that they want an integer operand to be
9406// promoted or expanded.
9408 const RISCVSubtarget &Subtarget) {
9409 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9410 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9411 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9412 "Unexpected opcode");
9413
9414 if (!Subtarget.hasVInstructions())
9415 return SDValue();
9416
9417 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9418 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9419 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9420
9421 SDLoc DL(Op);
9422
9424 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9425 if (!II || !II->hasScalarOperand())
9426 return SDValue();
9427
9428 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9429 assert(SplatOp < Op.getNumOperands());
9430
9432 SDValue &ScalarOp = Operands[SplatOp];
9433 MVT OpVT = ScalarOp.getSimpleValueType();
9434 MVT XLenVT = Subtarget.getXLenVT();
9435
9436 // If this isn't a scalar, or its type is XLenVT we're done.
9437 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9438 return SDValue();
9439
9440 // Simplest case is that the operand needs to be promoted to XLenVT.
9441 if (OpVT.bitsLT(XLenVT)) {
9442 // If the operand is a constant, sign extend to increase our chances
9443 // of being able to use a .vi instruction. ANY_EXTEND would become a
9444 // a zero extend and the simm5 check in isel would fail.
9445 // FIXME: Should we ignore the upper bits in isel instead?
9446 unsigned ExtOpc =
9447 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9448 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9449 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9450 }
9451
9452 // Use the previous operand to get the vXi64 VT. The result might be a mask
9453 // VT for compares. Using the previous operand assumes that the previous
9454 // operand will never have a smaller element size than a scalar operand and
9455 // that a widening operation never uses SEW=64.
9456 // NOTE: If this fails the below assert, we can probably just find the
9457 // element count from any operand or result and use it to construct the VT.
9458 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9459 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9460
9461 // The more complex case is when the scalar is larger than XLenVT.
9462 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9463 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9464
9465 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9466 // instruction to sign-extend since SEW>XLEN.
9467 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9468 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9469 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9470 }
9471
9472 switch (IntNo) {
9473 case Intrinsic::riscv_vslide1up:
9474 case Intrinsic::riscv_vslide1down:
9475 case Intrinsic::riscv_vslide1up_mask:
9476 case Intrinsic::riscv_vslide1down_mask: {
9477 // We need to special case these when the scalar is larger than XLen.
9478 unsigned NumOps = Op.getNumOperands();
9479 bool IsMasked = NumOps == 7;
9480
9481 // Convert the vector source to the equivalent nxvXi32 vector.
9482 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9483 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9484 SDValue ScalarLo, ScalarHi;
9485 std::tie(ScalarLo, ScalarHi) =
9486 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9487
9488 // Double the VL since we halved SEW.
9489 SDValue AVL = getVLOperand(Op);
9490 SDValue I32VL;
9491
9492 // Optimize for constant AVL
9493 if (isa<ConstantSDNode>(AVL)) {
9494 const auto [MinVLMAX, MaxVLMAX] =
9496
9497 uint64_t AVLInt = AVL->getAsZExtVal();
9498 if (AVLInt <= MinVLMAX) {
9499 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9500 } else if (AVLInt >= 2 * MaxVLMAX) {
9501 // Just set vl to VLMAX in this situation
9502 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9503 } else {
9504 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9505 // is related to the hardware implementation.
9506 // So let the following code handle
9507 }
9508 }
9509 if (!I32VL) {
9511 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9512 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9513 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9514 SDValue SETVL =
9515 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9516 // Using vsetvli instruction to get actually used length which related to
9517 // the hardware implementation
9518 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9519 SEW, LMUL);
9520 I32VL =
9521 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9522 }
9523
9524 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9525
9526 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9527 // instructions.
9528 SDValue Passthru;
9529 if (IsMasked)
9530 Passthru = DAG.getUNDEF(I32VT);
9531 else
9532 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9533
9534 if (IntNo == Intrinsic::riscv_vslide1up ||
9535 IntNo == Intrinsic::riscv_vslide1up_mask) {
9536 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9537 ScalarHi, I32Mask, I32VL);
9538 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9539 ScalarLo, I32Mask, I32VL);
9540 } else {
9541 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9542 ScalarLo, I32Mask, I32VL);
9543 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9544 ScalarHi, I32Mask, I32VL);
9545 }
9546
9547 // Convert back to nxvXi64.
9548 Vec = DAG.getBitcast(VT, Vec);
9549
9550 if (!IsMasked)
9551 return Vec;
9552 // Apply mask after the operation.
9553 SDValue Mask = Operands[NumOps - 3];
9554 SDValue MaskedOff = Operands[1];
9555 // Assume Policy operand is the last operand.
9556 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9557 // We don't need to select maskedoff if it's undef.
9558 if (MaskedOff.isUndef())
9559 return Vec;
9560 // TAMU
9561 if (Policy == RISCVII::TAIL_AGNOSTIC)
9562 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9563 DAG.getUNDEF(VT), AVL);
9564 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9565 // It's fine because vmerge does not care mask policy.
9566 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9567 MaskedOff, AVL);
9568 }
9569 }
9570
9571 // We need to convert the scalar to a splat vector.
9572 SDValue VL = getVLOperand(Op);
9573 assert(VL.getValueType() == XLenVT);
9574 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9575 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9576}
9577
9578// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9579// scalable vector llvm.get.vector.length for now.
9580//
9581// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9582// (vscale * VF). The vscale and VF are independent of element width. We use
9583// SEW=8 for the vsetvli because it is the only element width that supports all
9584// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9585// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9586// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9587// SEW and LMUL are better for the surrounding vector instructions.
9589 const RISCVSubtarget &Subtarget) {
9590 MVT XLenVT = Subtarget.getXLenVT();
9591
9592 // The smallest LMUL is only valid for the smallest element width.
9593 const unsigned ElementWidth = 8;
9594
9595 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9596 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9597 // We don't support VF==1 with ELEN==32.
9598 [[maybe_unused]] unsigned MinVF =
9599 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9600
9601 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9602 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9603 "Unexpected VF");
9604
9605 bool Fractional = VF < LMul1VF;
9606 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9607 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9608 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9609
9610 SDLoc DL(N);
9611
9612 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9613 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9614
9615 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9616
9617 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9618 SDValue Res =
9619 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9620 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9621}
9622
9624 const RISCVSubtarget &Subtarget) {
9625 SDValue Op0 = N->getOperand(1);
9626 MVT OpVT = Op0.getSimpleValueType();
9627 MVT ContainerVT = OpVT;
9628 if (OpVT.isFixedLengthVector()) {
9629 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9630 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9631 }
9632 MVT XLenVT = Subtarget.getXLenVT();
9633 SDLoc DL(N);
9634 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9635 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9636 if (isOneConstant(N->getOperand(2)))
9637 return Res;
9638
9639 // Convert -1 to VL.
9640 SDValue Setcc =
9641 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9642 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9643 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9644}
9645
9646static inline void promoteVCIXScalar(const SDValue &Op,
9648 SelectionDAG &DAG) {
9649 const RISCVSubtarget &Subtarget =
9651
9652 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9653 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9654 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9655 SDLoc DL(Op);
9656
9658 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9659 if (!II || !II->hasScalarOperand())
9660 return;
9661
9662 unsigned SplatOp = II->ScalarOperand + 1;
9663 assert(SplatOp < Op.getNumOperands());
9664
9665 SDValue &ScalarOp = Operands[SplatOp];
9666 MVT OpVT = ScalarOp.getSimpleValueType();
9667 MVT XLenVT = Subtarget.getXLenVT();
9668
9669 // The code below is partially copied from lowerVectorIntrinsicScalars.
9670 // If this isn't a scalar, or its type is XLenVT we're done.
9671 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9672 return;
9673
9674 // Manually emit promote operation for scalar operation.
9675 if (OpVT.bitsLT(XLenVT)) {
9676 unsigned ExtOpc =
9677 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9678 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9679 }
9680}
9681
9682static void processVCIXOperands(SDValue &OrigOp,
9684 SelectionDAG &DAG) {
9685 promoteVCIXScalar(OrigOp, Operands, DAG);
9686 const RISCVSubtarget &Subtarget =
9688 for (SDValue &V : Operands) {
9689 EVT ValType = V.getValueType();
9690 if (ValType.isVector() && ValType.isFloatingPoint()) {
9691 MVT InterimIVT =
9692 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9693 ValType.getVectorElementCount());
9694 V = DAG.getBitcast(InterimIVT, V);
9695 }
9696 if (ValType.isFixedLengthVector()) {
9697 MVT OpContainerVT = getContainerForFixedLengthVector(
9698 DAG, V.getSimpleValueType(), Subtarget);
9699 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9700 }
9701 }
9702}
9703
9704// LMUL * VLEN should be greater than or equal to EGS * SEW
9705static inline bool isValidEGW(int EGS, EVT VT,
9706 const RISCVSubtarget &Subtarget) {
9707 return (Subtarget.getRealMinVLen() *
9709 EGS * VT.getScalarSizeInBits();
9710}
9711
9712SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9713 SelectionDAG &DAG) const {
9714 unsigned IntNo = Op.getConstantOperandVal(0);
9715 SDLoc DL(Op);
9716 MVT XLenVT = Subtarget.getXLenVT();
9717
9718 switch (IntNo) {
9719 default:
9720 break; // Don't custom lower most intrinsics.
9721 case Intrinsic::riscv_tuple_insert: {
9722 SDValue Vec = Op.getOperand(1);
9723 SDValue SubVec = Op.getOperand(2);
9724 SDValue Index = Op.getOperand(3);
9725
9726 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9727 SubVec, Index);
9728 }
9729 case Intrinsic::riscv_tuple_extract: {
9730 SDValue Vec = Op.getOperand(1);
9731 SDValue Index = Op.getOperand(2);
9732
9733 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9734 Index);
9735 }
9736 case Intrinsic::thread_pointer: {
9737 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9738 return DAG.getRegister(RISCV::X4, PtrVT);
9739 }
9740 case Intrinsic::riscv_orc_b:
9741 case Intrinsic::riscv_brev8:
9742 case Intrinsic::riscv_sha256sig0:
9743 case Intrinsic::riscv_sha256sig1:
9744 case Intrinsic::riscv_sha256sum0:
9745 case Intrinsic::riscv_sha256sum1:
9746 case Intrinsic::riscv_sm3p0:
9747 case Intrinsic::riscv_sm3p1: {
9748 unsigned Opc;
9749 switch (IntNo) {
9750 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9751 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9752 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9753 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9754 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9755 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9756 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9757 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9758 }
9759
9760 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9761 }
9762 case Intrinsic::riscv_sm4ks:
9763 case Intrinsic::riscv_sm4ed: {
9764 unsigned Opc =
9765 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9766
9767 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9768 Op.getOperand(3));
9769 }
9770 case Intrinsic::riscv_zip:
9771 case Intrinsic::riscv_unzip: {
9772 unsigned Opc =
9773 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9774 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9775 }
9776 case Intrinsic::riscv_mopr:
9777 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9778 Op.getOperand(2));
9779
9780 case Intrinsic::riscv_moprr: {
9781 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9782 Op.getOperand(2), Op.getOperand(3));
9783 }
9784 case Intrinsic::riscv_clmul:
9785 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9786 Op.getOperand(2));
9787 case Intrinsic::riscv_clmulh:
9788 case Intrinsic::riscv_clmulr: {
9789 unsigned Opc =
9790 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9791 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9792 }
9793 case Intrinsic::experimental_get_vector_length:
9794 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9795 case Intrinsic::experimental_cttz_elts:
9796 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9797 case Intrinsic::riscv_vmv_x_s: {
9798 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9799 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9800 }
9801 case Intrinsic::riscv_vfmv_f_s:
9802 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9803 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9804 case Intrinsic::riscv_vmv_v_x:
9805 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9806 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9807 Subtarget);
9808 case Intrinsic::riscv_vfmv_v_f:
9809 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9810 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9811 case Intrinsic::riscv_vmv_s_x: {
9812 SDValue Scalar = Op.getOperand(2);
9813
9814 if (Scalar.getValueType().bitsLE(XLenVT)) {
9815 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9816 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9817 Op.getOperand(1), Scalar, Op.getOperand(3));
9818 }
9819
9820 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9821
9822 // This is an i64 value that lives in two scalar registers. We have to
9823 // insert this in a convoluted way. First we build vXi64 splat containing
9824 // the two values that we assemble using some bit math. Next we'll use
9825 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9826 // to merge element 0 from our splat into the source vector.
9827 // FIXME: This is probably not the best way to do this, but it is
9828 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9829 // point.
9830 // sw lo, (a0)
9831 // sw hi, 4(a0)
9832 // vlse vX, (a0)
9833 //
9834 // vid.v vVid
9835 // vmseq.vx mMask, vVid, 0
9836 // vmerge.vvm vDest, vSrc, vVal, mMask
9837 MVT VT = Op.getSimpleValueType();
9838 SDValue Vec = Op.getOperand(1);
9839 SDValue VL = getVLOperand(Op);
9840
9841 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9842 if (Op.getOperand(1).isUndef())
9843 return SplattedVal;
9844 SDValue SplattedIdx =
9845 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9846 DAG.getConstant(0, DL, MVT::i32), VL);
9847
9848 MVT MaskVT = getMaskTypeFor(VT);
9849 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9850 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9851 SDValue SelectCond =
9852 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9853 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9854 DAG.getUNDEF(MaskVT), Mask, VL});
9855 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9856 Vec, DAG.getUNDEF(VT), VL);
9857 }
9858 case Intrinsic::riscv_vfmv_s_f:
9859 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9860 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9861 // EGS * EEW >= 128 bits
9862 case Intrinsic::riscv_vaesdf_vv:
9863 case Intrinsic::riscv_vaesdf_vs:
9864 case Intrinsic::riscv_vaesdm_vv:
9865 case Intrinsic::riscv_vaesdm_vs:
9866 case Intrinsic::riscv_vaesef_vv:
9867 case Intrinsic::riscv_vaesef_vs:
9868 case Intrinsic::riscv_vaesem_vv:
9869 case Intrinsic::riscv_vaesem_vs:
9870 case Intrinsic::riscv_vaeskf1:
9871 case Intrinsic::riscv_vaeskf2:
9872 case Intrinsic::riscv_vaesz_vs:
9873 case Intrinsic::riscv_vsm4k:
9874 case Intrinsic::riscv_vsm4r_vv:
9875 case Intrinsic::riscv_vsm4r_vs: {
9876 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9877 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9878 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9879 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9880 return Op;
9881 }
9882 // EGS * EEW >= 256 bits
9883 case Intrinsic::riscv_vsm3c:
9884 case Intrinsic::riscv_vsm3me: {
9885 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9886 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9887 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9888 return Op;
9889 }
9890 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9891 case Intrinsic::riscv_vsha2ch:
9892 case Intrinsic::riscv_vsha2cl:
9893 case Intrinsic::riscv_vsha2ms: {
9894 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9895 !Subtarget.hasStdExtZvknhb())
9896 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9897 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9898 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9899 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9900 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9901 return Op;
9902 }
9903 case Intrinsic::riscv_sf_vc_v_x:
9904 case Intrinsic::riscv_sf_vc_v_i:
9905 case Intrinsic::riscv_sf_vc_v_xv:
9906 case Intrinsic::riscv_sf_vc_v_iv:
9907 case Intrinsic::riscv_sf_vc_v_vv:
9908 case Intrinsic::riscv_sf_vc_v_fv:
9909 case Intrinsic::riscv_sf_vc_v_xvv:
9910 case Intrinsic::riscv_sf_vc_v_ivv:
9911 case Intrinsic::riscv_sf_vc_v_vvv:
9912 case Intrinsic::riscv_sf_vc_v_fvv:
9913 case Intrinsic::riscv_sf_vc_v_xvw:
9914 case Intrinsic::riscv_sf_vc_v_ivw:
9915 case Intrinsic::riscv_sf_vc_v_vvw:
9916 case Intrinsic::riscv_sf_vc_v_fvw: {
9917 MVT VT = Op.getSimpleValueType();
9918
9919 SmallVector<SDValue> Operands{Op->op_values()};
9921
9922 MVT RetVT = VT;
9923 if (VT.isFixedLengthVector())
9925 else if (VT.isFloatingPoint())
9928
9929 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9930
9931 if (VT.isFixedLengthVector())
9932 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9933 else if (VT.isFloatingPoint())
9934 NewNode = DAG.getBitcast(VT, NewNode);
9935
9936 if (Op == NewNode)
9937 break;
9938
9939 return NewNode;
9940 }
9941 }
9942
9943 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9944}
9945
9947 unsigned Type) {
9948 SDLoc DL(Op);
9949 SmallVector<SDValue> Operands{Op->op_values()};
9950 Operands.erase(Operands.begin() + 1);
9951
9952 const RISCVSubtarget &Subtarget =
9954 MVT VT = Op.getSimpleValueType();
9955 MVT RetVT = VT;
9956 MVT FloatVT = VT;
9957
9958 if (VT.isFloatingPoint()) {
9959 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9961 FloatVT = RetVT;
9962 }
9963 if (VT.isFixedLengthVector())
9965 Subtarget);
9966
9968
9969 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9970 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9971 SDValue Chain = NewNode.getValue(1);
9972
9973 if (VT.isFixedLengthVector())
9974 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9975 if (VT.isFloatingPoint())
9976 NewNode = DAG.getBitcast(VT, NewNode);
9977
9978 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9979
9980 return NewNode;
9981}
9982
9984 unsigned Type) {
9985 SmallVector<SDValue> Operands{Op->op_values()};
9986 Operands.erase(Operands.begin() + 1);
9988
9989 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9990}
9991
9992SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9993 SelectionDAG &DAG) const {
9994 unsigned IntNo = Op.getConstantOperandVal(1);
9995 switch (IntNo) {
9996 default:
9997 break;
9998 case Intrinsic::riscv_seg2_load:
9999 case Intrinsic::riscv_seg3_load:
10000 case Intrinsic::riscv_seg4_load:
10001 case Intrinsic::riscv_seg5_load:
10002 case Intrinsic::riscv_seg6_load:
10003 case Intrinsic::riscv_seg7_load:
10004 case Intrinsic::riscv_seg8_load: {
10005 SDLoc DL(Op);
10006 static const Intrinsic::ID VlsegInts[7] = {
10007 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10008 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10009 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10010 Intrinsic::riscv_vlseg8};
10011 unsigned NF = Op->getNumValues() - 1;
10012 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10013 MVT XLenVT = Subtarget.getXLenVT();
10014 MVT VT = Op->getSimpleValueType(0);
10015 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10016 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10017 ContainerVT.getScalarSizeInBits();
10018 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10019
10020 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10021 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10022 auto *Load = cast<MemIntrinsicSDNode>(Op);
10023
10024 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10025 SDValue Ops[] = {
10026 Load->getChain(),
10027 IntID,
10028 DAG.getUNDEF(VecTupTy),
10029 Op.getOperand(2),
10030 VL,
10031 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10032 SDValue Result =
10034 Load->getMemoryVT(), Load->getMemOperand());
10036 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10037 SDValue SubVec =
10038 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10039 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10040 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10041 }
10042 Results.push_back(Result.getValue(1));
10043 return DAG.getMergeValues(Results, DL);
10044 }
10045 case Intrinsic::riscv_sf_vc_v_x_se:
10047 case Intrinsic::riscv_sf_vc_v_i_se:
10049 case Intrinsic::riscv_sf_vc_v_xv_se:
10051 case Intrinsic::riscv_sf_vc_v_iv_se:
10053 case Intrinsic::riscv_sf_vc_v_vv_se:
10055 case Intrinsic::riscv_sf_vc_v_fv_se:
10057 case Intrinsic::riscv_sf_vc_v_xvv_se:
10059 case Intrinsic::riscv_sf_vc_v_ivv_se:
10061 case Intrinsic::riscv_sf_vc_v_vvv_se:
10063 case Intrinsic::riscv_sf_vc_v_fvv_se:
10065 case Intrinsic::riscv_sf_vc_v_xvw_se:
10067 case Intrinsic::riscv_sf_vc_v_ivw_se:
10069 case Intrinsic::riscv_sf_vc_v_vvw_se:
10071 case Intrinsic::riscv_sf_vc_v_fvw_se:
10073 }
10074
10075 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10076}
10077
10078SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10079 SelectionDAG &DAG) const {
10080 unsigned IntNo = Op.getConstantOperandVal(1);
10081 switch (IntNo) {
10082 default:
10083 break;
10084 case Intrinsic::riscv_seg2_store:
10085 case Intrinsic::riscv_seg3_store:
10086 case Intrinsic::riscv_seg4_store:
10087 case Intrinsic::riscv_seg5_store:
10088 case Intrinsic::riscv_seg6_store:
10089 case Intrinsic::riscv_seg7_store:
10090 case Intrinsic::riscv_seg8_store: {
10091 SDLoc DL(Op);
10092 static const Intrinsic::ID VssegInts[] = {
10093 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10094 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10095 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10096 Intrinsic::riscv_vsseg8};
10097 // Operands are (chain, int_id, vec*, ptr, vl)
10098 unsigned NF = Op->getNumOperands() - 4;
10099 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10100 MVT XLenVT = Subtarget.getXLenVT();
10101 MVT VT = Op->getOperand(2).getSimpleValueType();
10102 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10103 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10104 ContainerVT.getScalarSizeInBits();
10105 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10106
10107 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10108 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10109 SDValue Ptr = Op->getOperand(NF + 2);
10110
10111 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10112
10113 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10114 for (unsigned i = 0; i < NF; i++)
10115 StoredVal = DAG.getNode(
10116 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10118 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10119 DAG.getVectorIdxConstant(i, DL));
10120
10121 SDValue Ops[] = {
10122 FixedIntrinsic->getChain(),
10123 IntID,
10124 StoredVal,
10125 Ptr,
10126 VL,
10127 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10128
10129 return DAG.getMemIntrinsicNode(
10130 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10131 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10132 }
10133 case Intrinsic::riscv_sf_vc_xv_se:
10135 case Intrinsic::riscv_sf_vc_iv_se:
10137 case Intrinsic::riscv_sf_vc_vv_se:
10139 case Intrinsic::riscv_sf_vc_fv_se:
10141 case Intrinsic::riscv_sf_vc_xvv_se:
10143 case Intrinsic::riscv_sf_vc_ivv_se:
10145 case Intrinsic::riscv_sf_vc_vvv_se:
10147 case Intrinsic::riscv_sf_vc_fvv_se:
10149 case Intrinsic::riscv_sf_vc_xvw_se:
10151 case Intrinsic::riscv_sf_vc_ivw_se:
10153 case Intrinsic::riscv_sf_vc_vvw_se:
10155 case Intrinsic::riscv_sf_vc_fvw_se:
10157 }
10158
10159 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10160}
10161
10162static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10163 switch (ISDOpcode) {
10164 default:
10165 llvm_unreachable("Unhandled reduction");
10166 case ISD::VP_REDUCE_ADD:
10167 case ISD::VECREDUCE_ADD:
10169 case ISD::VP_REDUCE_UMAX:
10172 case ISD::VP_REDUCE_SMAX:
10175 case ISD::VP_REDUCE_UMIN:
10178 case ISD::VP_REDUCE_SMIN:
10181 case ISD::VP_REDUCE_AND:
10182 case ISD::VECREDUCE_AND:
10184 case ISD::VP_REDUCE_OR:
10185 case ISD::VECREDUCE_OR:
10187 case ISD::VP_REDUCE_XOR:
10188 case ISD::VECREDUCE_XOR:
10190 case ISD::VP_REDUCE_FADD:
10192 case ISD::VP_REDUCE_SEQ_FADD:
10194 case ISD::VP_REDUCE_FMAX:
10195 case ISD::VP_REDUCE_FMAXIMUM:
10197 case ISD::VP_REDUCE_FMIN:
10198 case ISD::VP_REDUCE_FMINIMUM:
10200 }
10201
10202}
10203
10204SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10205 SelectionDAG &DAG,
10206 bool IsVP) const {
10207 SDLoc DL(Op);
10208 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10209 MVT VecVT = Vec.getSimpleValueType();
10210 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10211 Op.getOpcode() == ISD::VECREDUCE_OR ||
10212 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10213 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10214 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10215 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10216 "Unexpected reduction lowering");
10217
10218 MVT XLenVT = Subtarget.getXLenVT();
10219
10220 MVT ContainerVT = VecVT;
10221 if (VecVT.isFixedLengthVector()) {
10222 ContainerVT = getContainerForFixedLengthVector(VecVT);
10223 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10224 }
10225
10226 SDValue Mask, VL;
10227 if (IsVP) {
10228 Mask = Op.getOperand(2);
10229 VL = Op.getOperand(3);
10230 } else {
10231 std::tie(Mask, VL) =
10232 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10233 }
10234
10236 switch (Op.getOpcode()) {
10237 default:
10238 llvm_unreachable("Unhandled reduction");
10239 case ISD::VECREDUCE_AND:
10240 case ISD::VP_REDUCE_AND: {
10241 // vcpop ~x == 0
10242 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10243 if (IsVP || VecVT.isFixedLengthVector())
10244 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10245 else
10246 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10247 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10248 CC = ISD::SETEQ;
10249 break;
10250 }
10251 case ISD::VECREDUCE_OR:
10252 case ISD::VP_REDUCE_OR:
10253 // vcpop x != 0
10254 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10255 CC = ISD::SETNE;
10256 break;
10257 case ISD::VECREDUCE_XOR:
10258 case ISD::VP_REDUCE_XOR: {
10259 // ((vcpop x) & 1) != 0
10260 SDValue One = DAG.getConstant(1, DL, XLenVT);
10261 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10262 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10263 CC = ISD::SETNE;
10264 break;
10265 }
10266 }
10267
10268 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10269 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10270 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10271
10272 if (!IsVP)
10273 return SetCC;
10274
10275 // Now include the start value in the operation.
10276 // Note that we must return the start value when no elements are operated
10277 // upon. The vcpop instructions we've emitted in each case above will return
10278 // 0 for an inactive vector, and so we've already received the neutral value:
10279 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10280 // can simply include the start value.
10281 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10282 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10283}
10284
10285static bool isNonZeroAVL(SDValue AVL) {
10286 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10287 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10288 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10289 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10290}
10291
10292/// Helper to lower a reduction sequence of the form:
10293/// scalar = reduce_op vec, scalar_start
10294static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10295 SDValue StartValue, SDValue Vec, SDValue Mask,
10296 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10297 const RISCVSubtarget &Subtarget) {
10298 const MVT VecVT = Vec.getSimpleValueType();
10299 const MVT M1VT = getLMUL1VT(VecVT);
10300 const MVT XLenVT = Subtarget.getXLenVT();
10301 const bool NonZeroAVL = isNonZeroAVL(VL);
10302
10303 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10304 // or the original VT if fractional.
10305 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10306 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10307 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10308 // be the result of the reduction operation.
10309 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10310 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10311 DAG, Subtarget);
10312 if (M1VT != InnerVT)
10313 InitialValue =
10314 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10315 InitialValue, DAG.getVectorIdxConstant(0, DL));
10316 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10317 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10318 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10319 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10320 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10321 DAG.getVectorIdxConstant(0, DL));
10322}
10323
10324SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10325 SelectionDAG &DAG) const {
10326 SDLoc DL(Op);
10327 SDValue Vec = Op.getOperand(0);
10328 EVT VecEVT = Vec.getValueType();
10329
10330 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10331
10332 // Due to ordering in legalize types we may have a vector type that needs to
10333 // be split. Do that manually so we can get down to a legal type.
10334 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10336 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10337 VecEVT = Lo.getValueType();
10338 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10339 }
10340
10341 // TODO: The type may need to be widened rather than split. Or widened before
10342 // it can be split.
10343 if (!isTypeLegal(VecEVT))
10344 return SDValue();
10345
10346 MVT VecVT = VecEVT.getSimpleVT();
10347 MVT VecEltVT = VecVT.getVectorElementType();
10348 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10349
10350 MVT ContainerVT = VecVT;
10351 if (VecVT.isFixedLengthVector()) {
10352 ContainerVT = getContainerForFixedLengthVector(VecVT);
10353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10354 }
10355
10356 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10357
10358 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10359 switch (BaseOpc) {
10360 case ISD::AND:
10361 case ISD::OR:
10362 case ISD::UMAX:
10363 case ISD::UMIN:
10364 case ISD::SMAX:
10365 case ISD::SMIN:
10366 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10367 DAG.getVectorIdxConstant(0, DL));
10368 }
10369 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10370 Mask, VL, DL, DAG, Subtarget);
10371}
10372
10373// Given a reduction op, this function returns the matching reduction opcode,
10374// the vector SDValue and the scalar SDValue required to lower this to a
10375// RISCVISD node.
10376static std::tuple<unsigned, SDValue, SDValue>
10378 const RISCVSubtarget &Subtarget) {
10379 SDLoc DL(Op);
10380 auto Flags = Op->getFlags();
10381 unsigned Opcode = Op.getOpcode();
10382 switch (Opcode) {
10383 default:
10384 llvm_unreachable("Unhandled reduction");
10385 case ISD::VECREDUCE_FADD: {
10386 // Use positive zero if we can. It is cheaper to materialize.
10387 SDValue Zero =
10388 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10389 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10390 }
10392 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10393 Op.getOperand(0));
10397 case ISD::VECREDUCE_FMAX: {
10398 SDValue Front =
10399 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10400 DAG.getVectorIdxConstant(0, DL));
10401 unsigned RVVOpc =
10402 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10405 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10406 }
10407 }
10408}
10409
10410SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10411 SelectionDAG &DAG) const {
10412 SDLoc DL(Op);
10413 MVT VecEltVT = Op.getSimpleValueType();
10414
10415 unsigned RVVOpcode;
10416 SDValue VectorVal, ScalarVal;
10417 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10418 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10419 MVT VecVT = VectorVal.getSimpleValueType();
10420
10421 MVT ContainerVT = VecVT;
10422 if (VecVT.isFixedLengthVector()) {
10423 ContainerVT = getContainerForFixedLengthVector(VecVT);
10424 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10425 }
10426
10427 MVT ResVT = Op.getSimpleValueType();
10428 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10429 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10430 VL, DL, DAG, Subtarget);
10431 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10432 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10433 return Res;
10434
10435 if (Op->getFlags().hasNoNaNs())
10436 return Res;
10437
10438 // Force output to NaN if any element is Nan.
10439 SDValue IsNan =
10440 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10441 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10442 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10443 MVT XLenVT = Subtarget.getXLenVT();
10444 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10445 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10446 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10447 return DAG.getSelect(
10448 DL, ResVT, NoNaNs, Res,
10449 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10450}
10451
10452SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10453 SelectionDAG &DAG) const {
10454 SDLoc DL(Op);
10455 unsigned Opc = Op.getOpcode();
10456 SDValue Start = Op.getOperand(0);
10457 SDValue Vec = Op.getOperand(1);
10458 EVT VecEVT = Vec.getValueType();
10459 MVT XLenVT = Subtarget.getXLenVT();
10460
10461 // TODO: The type may need to be widened rather than split. Or widened before
10462 // it can be split.
10463 if (!isTypeLegal(VecEVT))
10464 return SDValue();
10465
10466 MVT VecVT = VecEVT.getSimpleVT();
10467 unsigned RVVOpcode = getRVVReductionOp(Opc);
10468
10469 if (VecVT.isFixedLengthVector()) {
10470 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10471 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10472 }
10473
10474 SDValue VL = Op.getOperand(3);
10475 SDValue Mask = Op.getOperand(2);
10476 SDValue Res =
10477 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10478 Vec, Mask, VL, DL, DAG, Subtarget);
10479 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10480 Op->getFlags().hasNoNaNs())
10481 return Res;
10482
10483 // Propagate NaNs.
10484 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10485 // Check if any of the elements in Vec is NaN.
10486 SDValue IsNaN = DAG.getNode(
10487 RISCVISD::SETCC_VL, DL, PredVT,
10488 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10489 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10490 // Check if the start value is NaN.
10491 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10492 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10493 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10494 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10495 MVT ResVT = Res.getSimpleValueType();
10496 return DAG.getSelect(
10497 DL, ResVT, NoNaNs, Res,
10498 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10499}
10500
10501SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10502 SelectionDAG &DAG) const {
10503 SDValue Vec = Op.getOperand(0);
10504 SDValue SubVec = Op.getOperand(1);
10505 MVT VecVT = Vec.getSimpleValueType();
10506 MVT SubVecVT = SubVec.getSimpleValueType();
10507
10508 SDLoc DL(Op);
10509 MVT XLenVT = Subtarget.getXLenVT();
10510 unsigned OrigIdx = Op.getConstantOperandVal(2);
10511 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10512
10513 if (OrigIdx == 0 && Vec.isUndef())
10514 return Op;
10515
10516 // We don't have the ability to slide mask vectors up indexed by their i1
10517 // elements; the smallest we can do is i8. Often we are able to bitcast to
10518 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10519 // into a scalable one, we might not necessarily have enough scalable
10520 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10521 if (SubVecVT.getVectorElementType() == MVT::i1) {
10522 if (VecVT.getVectorMinNumElements() >= 8 &&
10523 SubVecVT.getVectorMinNumElements() >= 8) {
10524 assert(OrigIdx % 8 == 0 && "Invalid index");
10525 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10526 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10527 "Unexpected mask vector lowering");
10528 OrigIdx /= 8;
10529 SubVecVT =
10530 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10531 SubVecVT.isScalableVector());
10532 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10533 VecVT.isScalableVector());
10534 Vec = DAG.getBitcast(VecVT, Vec);
10535 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10536 } else {
10537 // We can't slide this mask vector up indexed by its i1 elements.
10538 // This poses a problem when we wish to insert a scalable vector which
10539 // can't be re-expressed as a larger type. Just choose the slow path and
10540 // extend to a larger type, then truncate back down.
10541 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10542 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10543 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10544 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10545 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10546 Op.getOperand(2));
10547 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10548 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10549 }
10550 }
10551
10552 // If the subvector vector is a fixed-length type and we don't know VLEN
10553 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10554 // don't know which register of a LMUL group contains the specific subvector
10555 // as we only know the minimum register size. Therefore we must slide the
10556 // vector group up the full amount.
10557 const auto VLen = Subtarget.getRealVLen();
10558 if (SubVecVT.isFixedLengthVector() && !VLen) {
10559 MVT ContainerVT = VecVT;
10560 if (VecVT.isFixedLengthVector()) {
10561 ContainerVT = getContainerForFixedLengthVector(VecVT);
10562 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10563 }
10564
10565 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10566 DAG.getUNDEF(ContainerVT), SubVec,
10567 DAG.getVectorIdxConstant(0, DL));
10568
10569 SDValue Mask =
10570 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10571 // Set the vector length to only the number of elements we care about. Note
10572 // that for slideup this includes the offset.
10573 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10574 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10575
10576 // Use tail agnostic policy if we're inserting over Vec's tail.
10578 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10579 Policy = RISCVII::TAIL_AGNOSTIC;
10580
10581 // If we're inserting into the lowest elements, use a tail undisturbed
10582 // vmv.v.v.
10583 if (OrigIdx == 0) {
10584 SubVec =
10585 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10586 } else {
10587 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10588 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10589 SlideupAmt, Mask, VL, Policy);
10590 }
10591
10592 if (VecVT.isFixedLengthVector())
10593 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10594 return DAG.getBitcast(Op.getValueType(), SubVec);
10595 }
10596
10597 MVT ContainerVecVT = VecVT;
10598 if (VecVT.isFixedLengthVector()) {
10599 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10600 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10601 }
10602
10603 MVT ContainerSubVecVT = SubVecVT;
10604 if (SubVecVT.isFixedLengthVector()) {
10605 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10606 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10607 }
10608
10609 unsigned SubRegIdx;
10610 ElementCount RemIdx;
10611 // insert_subvector scales the index by vscale if the subvector is scalable,
10612 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10613 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10614 if (SubVecVT.isFixedLengthVector()) {
10615 assert(VLen);
10616 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10617 auto Decompose =
10619 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10620 SubRegIdx = Decompose.first;
10621 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10622 (OrigIdx % Vscale));
10623 } else {
10624 auto Decompose =
10626 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10627 SubRegIdx = Decompose.first;
10628 RemIdx = ElementCount::getScalable(Decompose.second);
10629 }
10630
10633 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10634 bool ExactlyVecRegSized =
10635 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10636 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10637
10638 // 1. If the Idx has been completely eliminated and this subvector's size is
10639 // a vector register or a multiple thereof, or the surrounding elements are
10640 // undef, then this is a subvector insert which naturally aligns to a vector
10641 // register. These can easily be handled using subregister manipulation.
10642 // 2. If the subvector isn't an exact multiple of a valid register group size,
10643 // then the insertion must preserve the undisturbed elements of the register.
10644 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10645 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10646 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10647 // of that LMUL=1 type back into the larger vector (resolving to another
10648 // subregister operation). See below for how our VSLIDEUP works. We go via a
10649 // LMUL=1 type to avoid allocating a large register group to hold our
10650 // subvector.
10651 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10652 if (SubVecVT.isFixedLengthVector()) {
10653 // We may get NoSubRegister if inserting at index 0 and the subvec
10654 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10655 if (SubRegIdx == RISCV::NoSubRegister) {
10656 assert(OrigIdx == 0);
10657 return Op;
10658 }
10659
10660 // Use a insert_subvector that will resolve to an insert subreg.
10661 assert(VLen);
10662 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10663 SDValue Insert =
10664 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10665 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10666 if (VecVT.isFixedLengthVector())
10667 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10668 return Insert;
10669 }
10670 return Op;
10671 }
10672
10673 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10674 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10675 // (in our case undisturbed). This means we can set up a subvector insertion
10676 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10677 // size of the subvector.
10678 MVT InterSubVT = ContainerVecVT;
10679 SDValue AlignedExtract = Vec;
10680 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10681 if (SubVecVT.isFixedLengthVector()) {
10682 assert(VLen);
10683 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10684 }
10685 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10686 InterSubVT = getLMUL1VT(ContainerVecVT);
10687 // Extract a subvector equal to the nearest full vector register type. This
10688 // should resolve to a EXTRACT_SUBREG instruction.
10689 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10690 DAG.getVectorIdxConstant(AlignedIdx, DL));
10691 }
10692
10693 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10694 DAG.getUNDEF(InterSubVT), SubVec,
10695 DAG.getVectorIdxConstant(0, DL));
10696
10697 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10698
10699 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10700 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10701
10702 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10704 if (Subtarget.expandVScale(EndIndex) ==
10705 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10706 Policy = RISCVII::TAIL_AGNOSTIC;
10707
10708 // If we're inserting into the lowest elements, use a tail undisturbed
10709 // vmv.v.v.
10710 if (RemIdx.isZero()) {
10711 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10712 SubVec, VL);
10713 } else {
10714 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10715
10716 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10717 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10718
10719 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10720 SlideupAmt, Mask, VL, Policy);
10721 }
10722
10723 // If required, insert this subvector back into the correct vector register.
10724 // This should resolve to an INSERT_SUBREG instruction.
10725 if (ContainerVecVT.bitsGT(InterSubVT))
10726 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10727 DAG.getVectorIdxConstant(AlignedIdx, DL));
10728
10729 if (VecVT.isFixedLengthVector())
10730 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10731
10732 // We might have bitcast from a mask type: cast back to the original type if
10733 // required.
10734 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10735}
10736
10737SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10738 SelectionDAG &DAG) const {
10739 SDValue Vec = Op.getOperand(0);
10740 MVT SubVecVT = Op.getSimpleValueType();
10741 MVT VecVT = Vec.getSimpleValueType();
10742
10743 SDLoc DL(Op);
10744 MVT XLenVT = Subtarget.getXLenVT();
10745 unsigned OrigIdx = Op.getConstantOperandVal(1);
10746 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10747
10748 // With an index of 0 this is a cast-like subvector, which can be performed
10749 // with subregister operations.
10750 if (OrigIdx == 0)
10751 return Op;
10752
10753 // We don't have the ability to slide mask vectors down indexed by their i1
10754 // elements; the smallest we can do is i8. Often we are able to bitcast to
10755 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10756 // from a scalable one, we might not necessarily have enough scalable
10757 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10758 if (SubVecVT.getVectorElementType() == MVT::i1) {
10759 if (VecVT.getVectorMinNumElements() >= 8 &&
10760 SubVecVT.getVectorMinNumElements() >= 8) {
10761 assert(OrigIdx % 8 == 0 && "Invalid index");
10762 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10763 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10764 "Unexpected mask vector lowering");
10765 OrigIdx /= 8;
10766 SubVecVT =
10767 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10768 SubVecVT.isScalableVector());
10769 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10770 VecVT.isScalableVector());
10771 Vec = DAG.getBitcast(VecVT, Vec);
10772 } else {
10773 // We can't slide this mask vector down, indexed by its i1 elements.
10774 // This poses a problem when we wish to extract a scalable vector which
10775 // can't be re-expressed as a larger type. Just choose the slow path and
10776 // extend to a larger type, then truncate back down.
10777 // TODO: We could probably improve this when extracting certain fixed
10778 // from fixed, where we can extract as i8 and shift the correct element
10779 // right to reach the desired subvector?
10780 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10781 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10782 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10783 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10784 Op.getOperand(1));
10785 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10786 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10787 }
10788 }
10789
10790 const auto VLen = Subtarget.getRealVLen();
10791
10792 // If the subvector vector is a fixed-length type and we don't know VLEN
10793 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10794 // don't know which register of a LMUL group contains the specific subvector
10795 // as we only know the minimum register size. Therefore we must slide the
10796 // vector group down the full amount.
10797 if (SubVecVT.isFixedLengthVector() && !VLen) {
10798 MVT ContainerVT = VecVT;
10799 if (VecVT.isFixedLengthVector()) {
10800 ContainerVT = getContainerForFixedLengthVector(VecVT);
10801 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10802 }
10803
10804 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10805 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10806 if (auto ShrunkVT =
10807 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10808 ContainerVT = *ShrunkVT;
10809 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10810 DAG.getVectorIdxConstant(0, DL));
10811 }
10812
10813 SDValue Mask =
10814 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10815 // Set the vector length to only the number of elements we care about. This
10816 // avoids sliding down elements we're going to discard straight away.
10817 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10818 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10819 SDValue Slidedown =
10820 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10821 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10822 // Now we can use a cast-like subvector extract to get the result.
10823 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10824 DAG.getVectorIdxConstant(0, DL));
10825 return DAG.getBitcast(Op.getValueType(), Slidedown);
10826 }
10827
10828 if (VecVT.isFixedLengthVector()) {
10829 VecVT = getContainerForFixedLengthVector(VecVT);
10830 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10831 }
10832
10833 MVT ContainerSubVecVT = SubVecVT;
10834 if (SubVecVT.isFixedLengthVector())
10835 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10836
10837 unsigned SubRegIdx;
10838 ElementCount RemIdx;
10839 // extract_subvector scales the index by vscale if the subvector is scalable,
10840 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10841 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10842 if (SubVecVT.isFixedLengthVector()) {
10843 assert(VLen);
10844 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10845 auto Decompose =
10847 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10848 SubRegIdx = Decompose.first;
10849 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10850 (OrigIdx % Vscale));
10851 } else {
10852 auto Decompose =
10854 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10855 SubRegIdx = Decompose.first;
10856 RemIdx = ElementCount::getScalable(Decompose.second);
10857 }
10858
10859 // If the Idx has been completely eliminated then this is a subvector extract
10860 // which naturally aligns to a vector register. These can easily be handled
10861 // using subregister manipulation. We use an extract_subvector that will
10862 // resolve to an extract subreg.
10863 if (RemIdx.isZero()) {
10864 if (SubVecVT.isFixedLengthVector()) {
10865 assert(VLen);
10866 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10867 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10868 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10869 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10870 }
10871 return Op;
10872 }
10873
10874 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10875 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10876 // divide exactly.
10877 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10878 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10879
10880 // If the vector type is an LMUL-group type, extract a subvector equal to the
10881 // nearest full vector register type.
10882 MVT InterSubVT = VecVT;
10883 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10884 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10885 // we should have successfully decomposed the extract into a subregister.
10886 // We use an extract_subvector that will resolve to a subreg extract.
10887 assert(SubRegIdx != RISCV::NoSubRegister);
10888 (void)SubRegIdx;
10889 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10890 if (SubVecVT.isFixedLengthVector()) {
10891 assert(VLen);
10892 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10893 }
10894 InterSubVT = getLMUL1VT(VecVT);
10895 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10896 DAG.getConstant(Idx, DL, XLenVT));
10897 }
10898
10899 // Slide this vector register down by the desired number of elements in order
10900 // to place the desired subvector starting at element 0.
10901 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10902 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10903 if (SubVecVT.isFixedLengthVector())
10904 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10905 SDValue Slidedown =
10906 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10907 Vec, SlidedownAmt, Mask, VL);
10908
10909 // Now the vector is in the right position, extract our final subvector. This
10910 // should resolve to a COPY.
10911 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10912 DAG.getVectorIdxConstant(0, DL));
10913
10914 // We might have bitcast from a mask type: cast back to the original type if
10915 // required.
10916 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10917}
10918
10919// Widen a vector's operands to i8, then truncate its results back to the
10920// original type, typically i1. All operand and result types must be the same.
10922 SelectionDAG &DAG) {
10923 MVT VT = N.getSimpleValueType();
10924 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10926 for (SDValue Op : N->ops()) {
10927 assert(Op.getSimpleValueType() == VT &&
10928 "Operands and result must be same type");
10929 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10930 }
10931
10932 unsigned NumVals = N->getNumValues();
10933
10935 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10936 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10937 SmallVector<SDValue, 4> TruncVals;
10938 for (unsigned I = 0; I < NumVals; I++) {
10939 TruncVals.push_back(
10940 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10941 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10942 }
10943
10944 if (TruncVals.size() > 1)
10945 return DAG.getMergeValues(TruncVals, DL);
10946 return TruncVals.front();
10947}
10948
10949SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10950 SelectionDAG &DAG) const {
10951 SDLoc DL(Op);
10952 MVT VecVT = Op.getSimpleValueType();
10953
10954 assert(VecVT.isScalableVector() &&
10955 "vector_interleave on non-scalable vector!");
10956
10957 // 1 bit element vectors need to be widened to e8
10958 if (VecVT.getVectorElementType() == MVT::i1)
10959 return widenVectorOpsToi8(Op, DL, DAG);
10960
10961 // If the VT is LMUL=8, we need to split and reassemble.
10962 if (VecVT.getSizeInBits().getKnownMinValue() ==
10963 (8 * RISCV::RVVBitsPerBlock)) {
10964 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10965 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10966 EVT SplitVT = Op0Lo.getValueType();
10967
10969 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10971 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10972
10973 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10974 ResLo.getValue(0), ResHi.getValue(0));
10975 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10976 ResHi.getValue(1));
10977 return DAG.getMergeValues({Even, Odd}, DL);
10978 }
10979
10980 // Concatenate the two vectors as one vector to deinterleave
10981 MVT ConcatVT =
10984 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10985 Op.getOperand(0), Op.getOperand(1));
10986
10987 // We can deinterleave through vnsrl.wi if the element type is smaller than
10988 // ELEN
10989 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10990 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
10991 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
10992 return DAG.getMergeValues({Even, Odd}, DL);
10993 }
10994
10995 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
10996 // possibly mask vector, then extract the required subvector. Doing this
10997 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
10998 // creation to be rematerialized during register allocation to reduce
10999 // register pressure if needed.
11000
11001 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11002
11003 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11004 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11005 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11006 DAG.getVectorIdxConstant(0, DL));
11007
11008 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11009 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11010 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11011 DAG.getVectorIdxConstant(0, DL));
11012
11013 // vcompress the even and odd elements into two separate vectors
11014 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11015 EvenMask, DAG.getUNDEF(ConcatVT));
11016 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11017 OddMask, DAG.getUNDEF(ConcatVT));
11018
11019 // Extract the result half of the gather for even and odd
11020 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11021 DAG.getVectorIdxConstant(0, DL));
11022 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11023 DAG.getVectorIdxConstant(0, DL));
11024
11025 return DAG.getMergeValues({Even, Odd}, DL);
11026}
11027
11028SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11029 SelectionDAG &DAG) const {
11030 SDLoc DL(Op);
11031 MVT VecVT = Op.getSimpleValueType();
11032
11033 assert(VecVT.isScalableVector() &&
11034 "vector_interleave on non-scalable vector!");
11035
11036 // i1 vectors need to be widened to i8
11037 if (VecVT.getVectorElementType() == MVT::i1)
11038 return widenVectorOpsToi8(Op, DL, DAG);
11039
11040 MVT XLenVT = Subtarget.getXLenVT();
11041 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11042
11043 // If the VT is LMUL=8, we need to split and reassemble.
11044 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11045 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11046 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11047 EVT SplitVT = Op0Lo.getValueType();
11048
11050 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11052 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11053
11054 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11055 ResLo.getValue(0), ResLo.getValue(1));
11056 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11057 ResHi.getValue(0), ResHi.getValue(1));
11058 return DAG.getMergeValues({Lo, Hi}, DL);
11059 }
11060
11061 SDValue Interleaved;
11062
11063 // If the element type is smaller than ELEN, then we can interleave with
11064 // vwaddu.vv and vwmaccu.vx
11065 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11066 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11067 DAG, Subtarget);
11068 } else {
11069 // Otherwise, fallback to using vrgathere16.vv
11070 MVT ConcatVT =
11073 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11074 Op.getOperand(0), Op.getOperand(1));
11075
11076 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11077
11078 // 0 1 2 3 4 5 6 7 ...
11079 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11080
11081 // 1 1 1 1 1 1 1 1 ...
11082 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11083
11084 // 1 0 1 0 1 0 1 0 ...
11085 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11086 OddMask = DAG.getSetCC(
11087 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11088 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11090
11091 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11092
11093 // Build up the index vector for interleaving the concatenated vector
11094 // 0 0 1 1 2 2 3 3 ...
11095 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11096 // 0 n 1 n+1 2 n+2 3 n+3 ...
11097 Idx =
11098 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11099
11100 // Then perform the interleave
11101 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11102 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11103 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11104 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11105 }
11106
11107 // Extract the two halves from the interleaved result
11108 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11109 DAG.getVectorIdxConstant(0, DL));
11110 SDValue Hi = DAG.getNode(
11111 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11113
11114 return DAG.getMergeValues({Lo, Hi}, DL);
11115}
11116
11117// Lower step_vector to the vid instruction. Any non-identity step value must
11118// be accounted for my manual expansion.
11119SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11120 SelectionDAG &DAG) const {
11121 SDLoc DL(Op);
11122 MVT VT = Op.getSimpleValueType();
11123 assert(VT.isScalableVector() && "Expected scalable vector");
11124 MVT XLenVT = Subtarget.getXLenVT();
11125 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11126 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11127 uint64_t StepValImm = Op.getConstantOperandVal(0);
11128 if (StepValImm != 1) {
11129 if (isPowerOf2_64(StepValImm)) {
11130 SDValue StepVal =
11131 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11132 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11133 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11134 } else {
11135 SDValue StepVal = lowerScalarSplat(
11136 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11137 VL, VT, DL, DAG, Subtarget);
11138 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11139 }
11140 }
11141 return StepVec;
11142}
11143
11144// Implement vector_reverse using vrgather.vv with indices determined by
11145// subtracting the id of each element from (VLMAX-1). This will convert
11146// the indices like so:
11147// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11148// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11149SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11150 SelectionDAG &DAG) const {
11151 SDLoc DL(Op);
11152 MVT VecVT = Op.getSimpleValueType();
11153 if (VecVT.getVectorElementType() == MVT::i1) {
11154 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11155 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11156 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11157 return DAG.getSetCC(DL, VecVT, Op2,
11158 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11159 }
11160
11161 MVT ContainerVT = VecVT;
11162 SDValue Vec = Op.getOperand(0);
11163 if (VecVT.isFixedLengthVector()) {
11164 ContainerVT = getContainerForFixedLengthVector(VecVT);
11165 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11166 }
11167
11168 MVT XLenVT = Subtarget.getXLenVT();
11169 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11170
11171 // On some uarchs vrgather.vv will read from every input register for each
11172 // output register, regardless of the indices. However to reverse a vector
11173 // each output register only needs to read from one register. So decompose it
11174 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11175 // O(LMUL^2).
11176 //
11177 // vsetvli a1, zero, e64, m4, ta, ma
11178 // vrgatherei16.vv v12, v8, v16
11179 // ->
11180 // vsetvli a1, zero, e64, m1, ta, ma
11181 // vrgather.vv v15, v8, v16
11182 // vrgather.vv v14, v9, v16
11183 // vrgather.vv v13, v10, v16
11184 // vrgather.vv v12, v11, v16
11185 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11186 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11187 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11188 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11189 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11190 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11191
11192 // Fixed length vectors might not fit exactly into their container, and so
11193 // leave a gap in the front of the vector after being reversed. Slide this
11194 // away.
11195 //
11196 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11197 // 0 1 2 3 x x x x <- reverse
11198 // x x x x 0 1 2 3 <- vslidedown.vx
11199 if (VecVT.isFixedLengthVector()) {
11200 SDValue Offset = DAG.getNode(
11201 ISD::SUB, DL, XLenVT,
11202 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11203 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11204 Concat =
11205 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11206 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11207 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11208 }
11209 return Concat;
11210 }
11211
11212 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11213 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11214 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11215 unsigned MaxVLMAX =
11216 VecVT.isFixedLengthVector()
11217 ? VecVT.getVectorNumElements()
11218 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11219
11220 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11221 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11222
11223 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11224 // to use vrgatherei16.vv.
11225 if (MaxVLMAX > 256 && EltSize == 8) {
11226 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11227 // Reverse each half, then reassemble them in reverse order.
11228 // NOTE: It's also possible that after splitting that VLMAX no longer
11229 // requires vrgatherei16.vv.
11230 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11231 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11232 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11233 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11234 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11235 // Reassemble the low and high pieces reversed.
11236 // FIXME: This is a CONCAT_VECTORS.
11237 SDValue Res =
11238 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11239 DAG.getVectorIdxConstant(0, DL));
11240 return DAG.getNode(
11241 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11242 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11243 }
11244
11245 // Just promote the int type to i16 which will double the LMUL.
11246 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11247 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11248 }
11249
11250 // At LMUL > 1, do the index computation in 16 bits to reduce register
11251 // pressure.
11252 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11253 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11254 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11255 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11256 IntVT = IntVT.changeVectorElementType(MVT::i16);
11257 }
11258
11259 // Calculate VLMAX-1 for the desired SEW.
11260 SDValue VLMinus1 = DAG.getNode(
11261 ISD::SUB, DL, XLenVT,
11262 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11263 DAG.getConstant(1, DL, XLenVT));
11264
11265 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11266 bool IsRV32E64 =
11267 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11268 SDValue SplatVL;
11269 if (!IsRV32E64)
11270 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11271 else
11272 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11273 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11274
11275 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11276 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11277 DAG.getUNDEF(IntVT), Mask, VL);
11278
11279 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11280 DAG.getUNDEF(ContainerVT), Mask, VL);
11281 if (VecVT.isFixedLengthVector())
11282 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11283 return Gather;
11284}
11285
11286SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11287 SelectionDAG &DAG) const {
11288 SDLoc DL(Op);
11289 SDValue V1 = Op.getOperand(0);
11290 SDValue V2 = Op.getOperand(1);
11291 MVT XLenVT = Subtarget.getXLenVT();
11292 MVT VecVT = Op.getSimpleValueType();
11293
11294 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11295
11296 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11297 SDValue DownOffset, UpOffset;
11298 if (ImmValue >= 0) {
11299 // The operand is a TargetConstant, we need to rebuild it as a regular
11300 // constant.
11301 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11302 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11303 } else {
11304 // The operand is a TargetConstant, we need to rebuild it as a regular
11305 // constant rather than negating the original operand.
11306 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11307 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11308 }
11309
11310 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11311
11312 SDValue SlideDown =
11313 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11314 DownOffset, TrueMask, UpOffset);
11315 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11316 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11318}
11319
11320SDValue
11321RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11322 SelectionDAG &DAG) const {
11323 SDLoc DL(Op);
11324 auto *Load = cast<LoadSDNode>(Op);
11325
11327 Load->getMemoryVT(),
11328 *Load->getMemOperand()) &&
11329 "Expecting a correctly-aligned load");
11330
11331 MVT VT = Op.getSimpleValueType();
11332 MVT XLenVT = Subtarget.getXLenVT();
11333 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11334
11335 // If we know the exact VLEN and our fixed length vector completely fills
11336 // the container, use a whole register load instead.
11337 const auto [MinVLMAX, MaxVLMAX] =
11338 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11339 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11340 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11341 MachineMemOperand *MMO = Load->getMemOperand();
11342 SDValue NewLoad =
11343 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11344 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11345 MMO->getAAInfo(), MMO->getRanges());
11346 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11347 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11348 }
11349
11350 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11351
11352 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11353 SDValue IntID = DAG.getTargetConstant(
11354 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11355 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11356 if (!IsMaskOp)
11357 Ops.push_back(DAG.getUNDEF(ContainerVT));
11358 Ops.push_back(Load->getBasePtr());
11359 Ops.push_back(VL);
11360 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11361 SDValue NewLoad =
11363 Load->getMemoryVT(), Load->getMemOperand());
11364
11365 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11366 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11367}
11368
11369SDValue
11370RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11371 SelectionDAG &DAG) const {
11372 SDLoc DL(Op);
11373 auto *Store = cast<StoreSDNode>(Op);
11374
11376 Store->getMemoryVT(),
11377 *Store->getMemOperand()) &&
11378 "Expecting a correctly-aligned store");
11379
11380 SDValue StoreVal = Store->getValue();
11381 MVT VT = StoreVal.getSimpleValueType();
11382 MVT XLenVT = Subtarget.getXLenVT();
11383
11384 // If the size less than a byte, we need to pad with zeros to make a byte.
11385 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11386 VT = MVT::v8i1;
11387 StoreVal =
11388 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11389 StoreVal, DAG.getVectorIdxConstant(0, DL));
11390 }
11391
11392 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11393
11394 SDValue NewValue =
11395 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11396
11397 // If we know the exact VLEN and our fixed length vector completely fills
11398 // the container, use a whole register store instead.
11399 const auto [MinVLMAX, MaxVLMAX] =
11400 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11401 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11402 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11403 MachineMemOperand *MMO = Store->getMemOperand();
11404 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11405 MMO->getPointerInfo(), MMO->getBaseAlign(),
11406 MMO->getFlags(), MMO->getAAInfo());
11407 }
11408
11409 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11410
11411 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11412 SDValue IntID = DAG.getTargetConstant(
11413 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11414 return DAG.getMemIntrinsicNode(
11415 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11416 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11417 Store->getMemoryVT(), Store->getMemOperand());
11418}
11419
11420SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11421 SelectionDAG &DAG) const {
11422 SDLoc DL(Op);
11423 MVT VT = Op.getSimpleValueType();
11424
11425 const auto *MemSD = cast<MemSDNode>(Op);
11426 EVT MemVT = MemSD->getMemoryVT();
11427 MachineMemOperand *MMO = MemSD->getMemOperand();
11428 SDValue Chain = MemSD->getChain();
11429 SDValue BasePtr = MemSD->getBasePtr();
11430
11431 SDValue Mask, PassThru, VL;
11432 bool IsExpandingLoad = false;
11433 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11434 Mask = VPLoad->getMask();
11435 PassThru = DAG.getUNDEF(VT);
11436 VL = VPLoad->getVectorLength();
11437 } else {
11438 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11439 Mask = MLoad->getMask();
11440 PassThru = MLoad->getPassThru();
11441 IsExpandingLoad = MLoad->isExpandingLoad();
11442 }
11443
11444 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11445
11446 MVT XLenVT = Subtarget.getXLenVT();
11447
11448 MVT ContainerVT = VT;
11449 if (VT.isFixedLengthVector()) {
11450 ContainerVT = getContainerForFixedLengthVector(VT);
11451 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11452 if (!IsUnmasked) {
11453 MVT MaskVT = getMaskTypeFor(ContainerVT);
11454 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11455 }
11456 }
11457
11458 if (!VL)
11459 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11460
11461 SDValue ExpandingVL;
11462 if (!IsUnmasked && IsExpandingLoad) {
11463 ExpandingVL = VL;
11464 VL =
11465 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11466 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11467 }
11468
11469 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11470 : Intrinsic::riscv_vle_mask;
11471 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11472 if (IntID == Intrinsic::riscv_vle)
11473 Ops.push_back(DAG.getUNDEF(ContainerVT));
11474 else
11475 Ops.push_back(PassThru);
11476 Ops.push_back(BasePtr);
11477 if (IntID == Intrinsic::riscv_vle_mask)
11478 Ops.push_back(Mask);
11479 Ops.push_back(VL);
11480 if (IntID == Intrinsic::riscv_vle_mask)
11481 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11482
11483 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11484
11485 SDValue Result =
11486 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11487 Chain = Result.getValue(1);
11488 if (ExpandingVL) {
11489 MVT IndexVT = ContainerVT;
11490 if (ContainerVT.isFloatingPoint())
11491 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11492
11493 MVT IndexEltVT = IndexVT.getVectorElementType();
11494 bool UseVRGATHEREI16 = false;
11495 // If index vector is an i8 vector and the element count exceeds 256, we
11496 // should change the element type of index vector to i16 to avoid
11497 // overflow.
11498 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11499 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11500 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11501 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11502 UseVRGATHEREI16 = true;
11503 }
11504
11505 SDValue Iota =
11506 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11507 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11508 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11509 Result =
11510 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11512 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11513 }
11514
11515 if (VT.isFixedLengthVector())
11516 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11517
11518 return DAG.getMergeValues({Result, Chain}, DL);
11519}
11520
11521SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11522 SelectionDAG &DAG) const {
11523 SDLoc DL(Op);
11524
11525 const auto *MemSD = cast<MemSDNode>(Op);
11526 EVT MemVT = MemSD->getMemoryVT();
11527 MachineMemOperand *MMO = MemSD->getMemOperand();
11528 SDValue Chain = MemSD->getChain();
11529 SDValue BasePtr = MemSD->getBasePtr();
11530 SDValue Val, Mask, VL;
11531
11532 bool IsCompressingStore = false;
11533 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11534 Val = VPStore->getValue();
11535 Mask = VPStore->getMask();
11536 VL = VPStore->getVectorLength();
11537 } else {
11538 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11539 Val = MStore->getValue();
11540 Mask = MStore->getMask();
11541 IsCompressingStore = MStore->isCompressingStore();
11542 }
11543
11544 bool IsUnmasked =
11545 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11546
11547 MVT VT = Val.getSimpleValueType();
11548 MVT XLenVT = Subtarget.getXLenVT();
11549
11550 MVT ContainerVT = VT;
11551 if (VT.isFixedLengthVector()) {
11552 ContainerVT = getContainerForFixedLengthVector(VT);
11553
11554 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11555 if (!IsUnmasked || IsCompressingStore) {
11556 MVT MaskVT = getMaskTypeFor(ContainerVT);
11557 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11558 }
11559 }
11560
11561 if (!VL)
11562 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11563
11564 if (IsCompressingStore) {
11565 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11566 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11567 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11568 VL =
11569 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11570 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11571 }
11572
11573 unsigned IntID =
11574 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11575 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11576 Ops.push_back(Val);
11577 Ops.push_back(BasePtr);
11578 if (!IsUnmasked)
11579 Ops.push_back(Mask);
11580 Ops.push_back(VL);
11581
11583 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11584}
11585
11586SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11587 SelectionDAG &DAG) const {
11588 SDLoc DL(Op);
11589 SDValue Val = Op.getOperand(0);
11590 SDValue Mask = Op.getOperand(1);
11591 SDValue Passthru = Op.getOperand(2);
11592
11593 MVT VT = Val.getSimpleValueType();
11594 MVT XLenVT = Subtarget.getXLenVT();
11595 MVT ContainerVT = VT;
11596 if (VT.isFixedLengthVector()) {
11597 ContainerVT = getContainerForFixedLengthVector(VT);
11598 MVT MaskVT = getMaskTypeFor(ContainerVT);
11599 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11600 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11601 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11602 }
11603
11604 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11605 SDValue Res =
11606 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11607 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11608 Passthru, Val, Mask, VL);
11609
11610 if (VT.isFixedLengthVector())
11611 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11612
11613 return Res;
11614}
11615
11616SDValue
11617RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 MVT InVT = Op.getOperand(0).getSimpleValueType();
11620 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11621
11622 MVT VT = Op.getSimpleValueType();
11623
11624 SDValue Op1 =
11625 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11626 SDValue Op2 =
11627 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11628
11629 SDLoc DL(Op);
11630 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11631 DAG, Subtarget);
11632 MVT MaskVT = getMaskTypeFor(ContainerVT);
11633
11634 SDValue Cmp =
11635 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11636 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11637
11638 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11639}
11640
11641SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11642 SelectionDAG &DAG) const {
11643 unsigned Opc = Op.getOpcode();
11644 SDLoc DL(Op);
11645 SDValue Chain = Op.getOperand(0);
11646 SDValue Op1 = Op.getOperand(1);
11647 SDValue Op2 = Op.getOperand(2);
11648 SDValue CC = Op.getOperand(3);
11649 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11650 MVT VT = Op.getSimpleValueType();
11651 MVT InVT = Op1.getSimpleValueType();
11652
11653 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11654 // condition code.
11655 if (Opc == ISD::STRICT_FSETCCS) {
11656 // Expand strict_fsetccs(x, oeq) to
11657 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11658 SDVTList VTList = Op->getVTList();
11659 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11660 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11661 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11662 Op2, OLECCVal);
11663 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11664 Op1, OLECCVal);
11665 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11666 Tmp1.getValue(1), Tmp2.getValue(1));
11667 // Tmp1 and Tmp2 might be the same node.
11668 if (Tmp1 != Tmp2)
11669 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11670 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11671 }
11672
11673 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11674 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11675 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11676 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11677 Op2, OEQCCVal);
11678 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11679 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11680 }
11681 }
11682
11683 MVT ContainerInVT = InVT;
11684 if (InVT.isFixedLengthVector()) {
11685 ContainerInVT = getContainerForFixedLengthVector(InVT);
11686 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11687 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11688 }
11689 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11690
11691 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11692
11693 SDValue Res;
11694 if (Opc == ISD::STRICT_FSETCC &&
11695 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11696 CCVal == ISD::SETOLE)) {
11697 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11698 // active when both input elements are ordered.
11699 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11700 SDValue OrderMask1 = DAG.getNode(
11701 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11702 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11703 True, VL});
11704 SDValue OrderMask2 = DAG.getNode(
11705 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11706 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11707 True, VL});
11708 Mask =
11709 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11710 // Use Mask as the passthru operand to let the result be 0 if either of the
11711 // inputs is unordered.
11713 DAG.getVTList(MaskVT, MVT::Other),
11714 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11715 } else {
11716 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11718 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11719 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11720 }
11721
11722 if (VT.isFixedLengthVector()) {
11723 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11724 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11725 }
11726 return Res;
11727}
11728
11729// Lower vector ABS to smax(X, sub(0, X)).
11730SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11731 SDLoc DL(Op);
11732 MVT VT = Op.getSimpleValueType();
11733 SDValue X = Op.getOperand(0);
11734
11735 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11736 "Unexpected type for ISD::ABS");
11737
11738 MVT ContainerVT = VT;
11739 if (VT.isFixedLengthVector()) {
11740 ContainerVT = getContainerForFixedLengthVector(VT);
11741 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11742 }
11743
11744 SDValue Mask, VL;
11745 if (Op->getOpcode() == ISD::VP_ABS) {
11746 Mask = Op->getOperand(1);
11747 if (VT.isFixedLengthVector())
11748 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11749 Subtarget);
11750 VL = Op->getOperand(2);
11751 } else
11752 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11753
11754 SDValue SplatZero = DAG.getNode(
11755 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11756 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11757 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11758 DAG.getUNDEF(ContainerVT), Mask, VL);
11759 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11760 DAG.getUNDEF(ContainerVT), Mask, VL);
11761
11762 if (VT.isFixedLengthVector())
11763 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11764 return Max;
11765}
11766
11767SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11768 SDValue Op, SelectionDAG &DAG) const {
11769 SDLoc DL(Op);
11770 MVT VT = Op.getSimpleValueType();
11771 SDValue Mag = Op.getOperand(0);
11772 SDValue Sign = Op.getOperand(1);
11773 assert(Mag.getValueType() == Sign.getValueType() &&
11774 "Can only handle COPYSIGN with matching types.");
11775
11776 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11777 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11778 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11779
11780 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11781
11782 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11783 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11784
11785 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11786}
11787
11788SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11789 SDValue Op, SelectionDAG &DAG) const {
11790 MVT VT = Op.getSimpleValueType();
11791 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11792
11793 MVT I1ContainerVT =
11794 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11795
11796 SDValue CC =
11797 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11798 SDValue Op1 =
11799 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11800 SDValue Op2 =
11801 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11802
11803 SDLoc DL(Op);
11804 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11805
11806 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11807 Op2, DAG.getUNDEF(ContainerVT), VL);
11808
11809 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11810}
11811
11812SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11813 SelectionDAG &DAG) const {
11814 unsigned NewOpc = getRISCVVLOp(Op);
11815 bool HasPassthruOp = hasPassthruOp(NewOpc);
11816 bool HasMask = hasMaskOp(NewOpc);
11817
11818 MVT VT = Op.getSimpleValueType();
11819 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11820
11821 // Create list of operands by converting existing ones to scalable types.
11823 for (const SDValue &V : Op->op_values()) {
11824 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11825
11826 // Pass through non-vector operands.
11827 if (!V.getValueType().isVector()) {
11828 Ops.push_back(V);
11829 continue;
11830 }
11831
11832 // "cast" fixed length vector to a scalable vector.
11833 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11834 "Only fixed length vectors are supported!");
11835 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11836 }
11837
11838 SDLoc DL(Op);
11839 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11840 if (HasPassthruOp)
11841 Ops.push_back(DAG.getUNDEF(ContainerVT));
11842 if (HasMask)
11843 Ops.push_back(Mask);
11844 Ops.push_back(VL);
11845
11846 // StrictFP operations have two result values. Their lowered result should
11847 // have same result count.
11848 if (Op->isStrictFPOpcode()) {
11849 SDValue ScalableRes =
11850 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11851 Op->getFlags());
11852 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11853 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11854 }
11855
11856 SDValue ScalableRes =
11857 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11858 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11859}
11860
11861// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11862// * Operands of each node are assumed to be in the same order.
11863// * The EVL operand is promoted from i32 to i64 on RV64.
11864// * Fixed-length vectors are converted to their scalable-vector container
11865// types.
11866SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11867 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11868 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11869
11870 SDLoc DL(Op);
11871 MVT VT = Op.getSimpleValueType();
11873
11874 MVT ContainerVT = VT;
11875 if (VT.isFixedLengthVector())
11876 ContainerVT = getContainerForFixedLengthVector(VT);
11877
11878 for (const auto &OpIdx : enumerate(Op->ops())) {
11879 SDValue V = OpIdx.value();
11880 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11881 // Add dummy passthru value before the mask. Or if there isn't a mask,
11882 // before EVL.
11883 if (HasPassthruOp) {
11884 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11885 if (MaskIdx) {
11886 if (*MaskIdx == OpIdx.index())
11887 Ops.push_back(DAG.getUNDEF(ContainerVT));
11888 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11889 OpIdx.index()) {
11890 if (Op.getOpcode() == ISD::VP_MERGE) {
11891 // For VP_MERGE, copy the false operand instead of an undef value.
11892 Ops.push_back(Ops.back());
11893 } else {
11894 assert(Op.getOpcode() == ISD::VP_SELECT);
11895 // For VP_SELECT, add an undef value.
11896 Ops.push_back(DAG.getUNDEF(ContainerVT));
11897 }
11898 }
11899 }
11900 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11901 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11902 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11904 Subtarget.getXLenVT()));
11905 // Pass through operands which aren't fixed-length vectors.
11906 if (!V.getValueType().isFixedLengthVector()) {
11907 Ops.push_back(V);
11908 continue;
11909 }
11910 // "cast" fixed length vector to a scalable vector.
11911 MVT OpVT = V.getSimpleValueType();
11912 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11913 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11914 "Only fixed length vectors are supported!");
11915 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11916 }
11917
11918 if (!VT.isFixedLengthVector())
11919 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11920
11921 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11922
11923 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11924}
11925
11926SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11927 SelectionDAG &DAG) const {
11928 SDLoc DL(Op);
11929 MVT VT = Op.getSimpleValueType();
11930
11931 SDValue Src = Op.getOperand(0);
11932 // NOTE: Mask is dropped.
11933 SDValue VL = Op.getOperand(2);
11934
11935 MVT ContainerVT = VT;
11936 if (VT.isFixedLengthVector()) {
11937 ContainerVT = getContainerForFixedLengthVector(VT);
11938 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11939 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11940 }
11941
11942 MVT XLenVT = Subtarget.getXLenVT();
11943 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11944 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11945 DAG.getUNDEF(ContainerVT), Zero, VL);
11946
11947 SDValue SplatValue = DAG.getSignedConstant(
11948 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11949 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11950 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11951
11952 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11953 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11954 if (!VT.isFixedLengthVector())
11955 return Result;
11956 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11957}
11958
11959SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11960 SelectionDAG &DAG) const {
11961 SDLoc DL(Op);
11962 MVT VT = Op.getSimpleValueType();
11963
11964 SDValue Op1 = Op.getOperand(0);
11965 SDValue Op2 = Op.getOperand(1);
11966 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11967 // NOTE: Mask is dropped.
11968 SDValue VL = Op.getOperand(4);
11969
11970 MVT ContainerVT = VT;
11971 if (VT.isFixedLengthVector()) {
11972 ContainerVT = getContainerForFixedLengthVector(VT);
11973 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11974 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11975 }
11976
11978 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11979
11980 switch (Condition) {
11981 default:
11982 break;
11983 // X != Y --> (X^Y)
11984 case ISD::SETNE:
11985 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11986 break;
11987 // X == Y --> ~(X^Y)
11988 case ISD::SETEQ: {
11989 SDValue Temp =
11990 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11991 Result =
11992 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11993 break;
11994 }
11995 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11996 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11997 case ISD::SETGT:
11998 case ISD::SETULT: {
11999 SDValue Temp =
12000 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12001 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12002 break;
12003 }
12004 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12005 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12006 case ISD::SETLT:
12007 case ISD::SETUGT: {
12008 SDValue Temp =
12009 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12010 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12011 break;
12012 }
12013 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12014 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12015 case ISD::SETGE:
12016 case ISD::SETULE: {
12017 SDValue Temp =
12018 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12019 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12020 break;
12021 }
12022 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12023 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12024 case ISD::SETLE:
12025 case ISD::SETUGE: {
12026 SDValue Temp =
12027 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12028 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12029 break;
12030 }
12031 }
12032
12033 if (!VT.isFixedLengthVector())
12034 return Result;
12035 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12036}
12037
12038// Lower Floating-Point/Integer Type-Convert VP SDNodes
12039SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12040 SelectionDAG &DAG) const {
12041 SDLoc DL(Op);
12042
12043 SDValue Src = Op.getOperand(0);
12044 SDValue Mask = Op.getOperand(1);
12045 SDValue VL = Op.getOperand(2);
12046 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12047
12048 MVT DstVT = Op.getSimpleValueType();
12049 MVT SrcVT = Src.getSimpleValueType();
12050 if (DstVT.isFixedLengthVector()) {
12051 DstVT = getContainerForFixedLengthVector(DstVT);
12052 SrcVT = getContainerForFixedLengthVector(SrcVT);
12053 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12054 MVT MaskVT = getMaskTypeFor(DstVT);
12055 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12056 }
12057
12058 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12059 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12060
12062 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12063 if (SrcVT.isInteger()) {
12064 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12065
12066 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12069
12070 // Do we need to do any pre-widening before converting?
12071 if (SrcEltSize == 1) {
12072 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12073 MVT XLenVT = Subtarget.getXLenVT();
12074 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12075 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12076 DAG.getUNDEF(IntVT), Zero, VL);
12077 SDValue One = DAG.getSignedConstant(
12078 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12079 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12080 DAG.getUNDEF(IntVT), One, VL);
12081 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12082 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12083 } else if (DstEltSize > (2 * SrcEltSize)) {
12084 // Widen before converting.
12085 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12086 DstVT.getVectorElementCount());
12087 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12088 }
12089
12090 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12091 } else {
12092 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12093 "Wrong input/output vector types");
12094
12095 // Convert f16 to f32 then convert f32 to i64.
12096 if (DstEltSize > (2 * SrcEltSize)) {
12097 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12098 MVT InterimFVT =
12099 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12100 Src =
12101 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12102 }
12103
12104 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12105 }
12106 } else { // Narrowing + Conversion
12107 if (SrcVT.isInteger()) {
12108 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12109 // First do a narrowing convert to an FP type half the size, then round
12110 // the FP type to a small FP type if needed.
12111
12112 MVT InterimFVT = DstVT;
12113 if (SrcEltSize > (2 * DstEltSize)) {
12114 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12115 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12116 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12117 }
12118
12119 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12120
12121 if (InterimFVT != DstVT) {
12122 Src = Result;
12123 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12124 }
12125 } else {
12126 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12127 "Wrong input/output vector types");
12128 // First do a narrowing conversion to an integer half the size, then
12129 // truncate if needed.
12130
12131 if (DstEltSize == 1) {
12132 // First convert to the same size integer, then convert to mask using
12133 // setcc.
12134 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12135 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12136 DstVT.getVectorElementCount());
12137 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12138
12139 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12140 // otherwise the conversion was undefined.
12141 MVT XLenVT = Subtarget.getXLenVT();
12142 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12143 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12144 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12145 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12146 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12147 DAG.getUNDEF(DstVT), Mask, VL});
12148 } else {
12149 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12150 DstVT.getVectorElementCount());
12151
12152 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12153
12154 while (InterimIVT != DstVT) {
12155 SrcEltSize /= 2;
12156 Src = Result;
12157 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12158 DstVT.getVectorElementCount());
12159 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12160 Src, Mask, VL);
12161 }
12162 }
12163 }
12164 }
12165
12166 MVT VT = Op.getSimpleValueType();
12167 if (!VT.isFixedLengthVector())
12168 return Result;
12169 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12170}
12171
12172SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12173 SelectionDAG &DAG) const {
12174 SDLoc DL(Op);
12175 MVT VT = Op.getSimpleValueType();
12176 MVT XLenVT = Subtarget.getXLenVT();
12177
12178 SDValue Mask = Op.getOperand(0);
12179 SDValue TrueVal = Op.getOperand(1);
12180 SDValue FalseVal = Op.getOperand(2);
12181 SDValue VL = Op.getOperand(3);
12182
12183 // Use default legalization if a vector of EVL type would be legal.
12184 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12186 if (isTypeLegal(EVLVecVT))
12187 return SDValue();
12188
12189 MVT ContainerVT = VT;
12190 if (VT.isFixedLengthVector()) {
12191 ContainerVT = getContainerForFixedLengthVector(VT);
12192 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12193 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12194 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12195 }
12196
12197 // Promote to a vector of i8.
12198 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12199
12200 // Promote TrueVal and FalseVal using VLMax.
12201 // FIXME: Is there a better way to do this?
12202 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12203 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12204 DAG.getUNDEF(PromotedVT),
12205 DAG.getConstant(1, DL, XLenVT), VLMax);
12206 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12207 DAG.getUNDEF(PromotedVT),
12208 DAG.getConstant(0, DL, XLenVT), VLMax);
12209 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12210 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12211 // Any element past VL uses FalseVal, so use VLMax
12212 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12213 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12214
12215 // VP_MERGE the two promoted values.
12216 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12217 TrueVal, FalseVal, FalseVal, VL);
12218
12219 // Convert back to mask.
12220 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12221 SDValue Result = DAG.getNode(
12222 RISCVISD::SETCC_VL, DL, ContainerVT,
12223 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12224 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12225
12226 if (VT.isFixedLengthVector())
12227 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12228 return Result;
12229}
12230
12231SDValue
12232RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12233 SelectionDAG &DAG) const {
12234 SDLoc DL(Op);
12235
12236 SDValue Op1 = Op.getOperand(0);
12237 SDValue Op2 = Op.getOperand(1);
12238 SDValue Offset = Op.getOperand(2);
12239 SDValue Mask = Op.getOperand(3);
12240 SDValue EVL1 = Op.getOperand(4);
12241 SDValue EVL2 = Op.getOperand(5);
12242
12243 const MVT XLenVT = Subtarget.getXLenVT();
12244 MVT VT = Op.getSimpleValueType();
12245 MVT ContainerVT = VT;
12246 if (VT.isFixedLengthVector()) {
12247 ContainerVT = getContainerForFixedLengthVector(VT);
12248 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12249 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12250 MVT MaskVT = getMaskTypeFor(ContainerVT);
12251 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12252 }
12253
12254 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12255 if (IsMaskVector) {
12256 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12257
12258 // Expand input operands
12259 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12260 DAG.getUNDEF(ContainerVT),
12261 DAG.getConstant(1, DL, XLenVT), EVL1);
12262 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12263 DAG.getUNDEF(ContainerVT),
12264 DAG.getConstant(0, DL, XLenVT), EVL1);
12265 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12266 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12267
12268 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12269 DAG.getUNDEF(ContainerVT),
12270 DAG.getConstant(1, DL, XLenVT), EVL2);
12271 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12272 DAG.getUNDEF(ContainerVT),
12273 DAG.getConstant(0, DL, XLenVT), EVL2);
12274 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12275 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12276 }
12277
12278 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12279 SDValue DownOffset, UpOffset;
12280 if (ImmValue >= 0) {
12281 // The operand is a TargetConstant, we need to rebuild it as a regular
12282 // constant.
12283 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12284 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12285 } else {
12286 // The operand is a TargetConstant, we need to rebuild it as a regular
12287 // constant rather than negating the original operand.
12288 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12289 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12290 }
12291
12292 SDValue SlideDown =
12293 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12294 Op1, DownOffset, Mask, UpOffset);
12295 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12296 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12297
12298 if (IsMaskVector) {
12299 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12300 Result = DAG.getNode(
12301 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12302 {Result, DAG.getConstant(0, DL, ContainerVT),
12303 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12304 Mask, EVL2});
12305 }
12306
12307 if (!VT.isFixedLengthVector())
12308 return Result;
12309 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12310}
12311
12312SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12313 SelectionDAG &DAG) const {
12314 SDLoc DL(Op);
12315 SDValue Val = Op.getOperand(0);
12316 SDValue Mask = Op.getOperand(1);
12317 SDValue VL = Op.getOperand(2);
12318 MVT VT = Op.getSimpleValueType();
12319
12320 MVT ContainerVT = VT;
12321 if (VT.isFixedLengthVector()) {
12322 ContainerVT = getContainerForFixedLengthVector(VT);
12323 MVT MaskVT = getMaskTypeFor(ContainerVT);
12324 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12325 }
12326
12327 SDValue Result =
12328 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12329
12330 if (!VT.isFixedLengthVector())
12331 return Result;
12332 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12333}
12334
12335SDValue
12336RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12337 SelectionDAG &DAG) const {
12338 SDLoc DL(Op);
12339 MVT VT = Op.getSimpleValueType();
12340 MVT XLenVT = Subtarget.getXLenVT();
12341
12342 SDValue Op1 = Op.getOperand(0);
12343 SDValue Mask = Op.getOperand(1);
12344 SDValue EVL = Op.getOperand(2);
12345
12346 MVT ContainerVT = VT;
12347 if (VT.isFixedLengthVector()) {
12348 ContainerVT = getContainerForFixedLengthVector(VT);
12349 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12350 MVT MaskVT = getMaskTypeFor(ContainerVT);
12351 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12352 }
12353
12354 MVT GatherVT = ContainerVT;
12355 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12356 // Check if we are working with mask vectors
12357 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12358 if (IsMaskVector) {
12359 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12360
12361 // Expand input operand
12362 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12363 DAG.getUNDEF(IndicesVT),
12364 DAG.getConstant(1, DL, XLenVT), EVL);
12365 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12366 DAG.getUNDEF(IndicesVT),
12367 DAG.getConstant(0, DL, XLenVT), EVL);
12368 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12369 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12370 }
12371
12372 unsigned EltSize = GatherVT.getScalarSizeInBits();
12373 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12374 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12375 unsigned MaxVLMAX =
12376 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12377
12378 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12379 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12380 // to use vrgatherei16.vv.
12381 // TODO: It's also possible to use vrgatherei16.vv for other types to
12382 // decrease register width for the index calculation.
12383 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12384 if (MaxVLMAX > 256 && EltSize == 8) {
12385 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12386 // Split the vector in half and reverse each half using a full register
12387 // reverse.
12388 // Swap the halves and concatenate them.
12389 // Slide the concatenated result by (VLMax - VL).
12390 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12391 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12392 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12393
12394 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12395 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12396
12397 // Reassemble the low and high pieces reversed.
12398 // NOTE: this Result is unmasked (because we do not need masks for
12399 // shuffles). If in the future this has to change, we can use a SELECT_VL
12400 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12401 SDValue Result =
12402 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12403
12404 // Slide off any elements from past EVL that were reversed into the low
12405 // elements.
12406 unsigned MinElts = GatherVT.getVectorMinNumElements();
12407 SDValue VLMax =
12408 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12409 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12410
12411 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12412 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12413
12414 if (IsMaskVector) {
12415 // Truncate Result back to a mask vector
12416 Result =
12417 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12418 {Result, DAG.getConstant(0, DL, GatherVT),
12420 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12421 }
12422
12423 if (!VT.isFixedLengthVector())
12424 return Result;
12425 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12426 }
12427
12428 // Just promote the int type to i16 which will double the LMUL.
12429 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12430 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12431 }
12432
12433 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12434 SDValue VecLen =
12435 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12436 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12437 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12438 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12439 DAG.getUNDEF(IndicesVT), Mask, EVL);
12440 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12441 DAG.getUNDEF(GatherVT), Mask, EVL);
12442
12443 if (IsMaskVector) {
12444 // Truncate Result back to a mask vector
12445 Result = DAG.getNode(
12446 RISCVISD::SETCC_VL, DL, ContainerVT,
12447 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12448 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12449 }
12450
12451 if (!VT.isFixedLengthVector())
12452 return Result;
12453 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12454}
12455
12456SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12457 SelectionDAG &DAG) const {
12458 MVT VT = Op.getSimpleValueType();
12459 if (VT.getVectorElementType() != MVT::i1)
12460 return lowerVPOp(Op, DAG);
12461
12462 // It is safe to drop mask parameter as masked-off elements are undef.
12463 SDValue Op1 = Op->getOperand(0);
12464 SDValue Op2 = Op->getOperand(1);
12465 SDValue VL = Op->getOperand(3);
12466
12467 MVT ContainerVT = VT;
12468 const bool IsFixed = VT.isFixedLengthVector();
12469 if (IsFixed) {
12470 ContainerVT = getContainerForFixedLengthVector(VT);
12471 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12472 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12473 }
12474
12475 SDLoc DL(Op);
12476 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12477 if (!IsFixed)
12478 return Val;
12479 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12480}
12481
12482SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12483 SelectionDAG &DAG) const {
12484 SDLoc DL(Op);
12485 MVT XLenVT = Subtarget.getXLenVT();
12486 MVT VT = Op.getSimpleValueType();
12487 MVT ContainerVT = VT;
12488 if (VT.isFixedLengthVector())
12489 ContainerVT = getContainerForFixedLengthVector(VT);
12490
12491 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12492
12493 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12494 // Check if the mask is known to be all ones
12495 SDValue Mask = VPNode->getMask();
12496 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12497
12498 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12499 : Intrinsic::riscv_vlse_mask,
12500 DL, XLenVT);
12501 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12502 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12503 VPNode->getStride()};
12504 if (!IsUnmasked) {
12505 if (VT.isFixedLengthVector()) {
12506 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12507 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12508 }
12509 Ops.push_back(Mask);
12510 }
12511 Ops.push_back(VPNode->getVectorLength());
12512 if (!IsUnmasked) {
12513 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12514 Ops.push_back(Policy);
12515 }
12516
12517 SDValue Result =
12519 VPNode->getMemoryVT(), VPNode->getMemOperand());
12520 SDValue Chain = Result.getValue(1);
12521
12522 if (VT.isFixedLengthVector())
12523 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12524
12525 return DAG.getMergeValues({Result, Chain}, DL);
12526}
12527
12528SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12529 SelectionDAG &DAG) const {
12530 SDLoc DL(Op);
12531 MVT XLenVT = Subtarget.getXLenVT();
12532
12533 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12534 SDValue StoreVal = VPNode->getValue();
12535 MVT VT = StoreVal.getSimpleValueType();
12536 MVT ContainerVT = VT;
12537 if (VT.isFixedLengthVector()) {
12538 ContainerVT = getContainerForFixedLengthVector(VT);
12539 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12540 }
12541
12542 // Check if the mask is known to be all ones
12543 SDValue Mask = VPNode->getMask();
12544 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12545
12546 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12547 : Intrinsic::riscv_vsse_mask,
12548 DL, XLenVT);
12549 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12550 VPNode->getBasePtr(), VPNode->getStride()};
12551 if (!IsUnmasked) {
12552 if (VT.isFixedLengthVector()) {
12553 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12554 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12555 }
12556 Ops.push_back(Mask);
12557 }
12558 Ops.push_back(VPNode->getVectorLength());
12559
12560 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12561 Ops, VPNode->getMemoryVT(),
12562 VPNode->getMemOperand());
12563}
12564
12565// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12566// matched to a RVV indexed load. The RVV indexed load instructions only
12567// support the "unsigned unscaled" addressing mode; indices are implicitly
12568// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12569// signed or scaled indexing is extended to the XLEN value type and scaled
12570// accordingly.
12571SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12572 SelectionDAG &DAG) const {
12573 SDLoc DL(Op);
12574 MVT VT = Op.getSimpleValueType();
12575
12576 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12577 EVT MemVT = MemSD->getMemoryVT();
12578 MachineMemOperand *MMO = MemSD->getMemOperand();
12579 SDValue Chain = MemSD->getChain();
12580 SDValue BasePtr = MemSD->getBasePtr();
12581
12582 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12583 SDValue Index, Mask, PassThru, VL;
12584
12585 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12586 Index = VPGN->getIndex();
12587 Mask = VPGN->getMask();
12588 PassThru = DAG.getUNDEF(VT);
12589 VL = VPGN->getVectorLength();
12590 // VP doesn't support extending loads.
12592 } else {
12593 // Else it must be a MGATHER.
12594 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12595 Index = MGN->getIndex();
12596 Mask = MGN->getMask();
12597 PassThru = MGN->getPassThru();
12598 LoadExtType = MGN->getExtensionType();
12599 }
12600
12601 MVT IndexVT = Index.getSimpleValueType();
12602 MVT XLenVT = Subtarget.getXLenVT();
12603
12605 "Unexpected VTs!");
12606 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12607 // Targets have to explicitly opt-in for extending vector loads.
12608 assert(LoadExtType == ISD::NON_EXTLOAD &&
12609 "Unexpected extending MGATHER/VP_GATHER");
12610
12611 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12612 // the selection of the masked intrinsics doesn't do this for us.
12613 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12614
12615 MVT ContainerVT = VT;
12616 if (VT.isFixedLengthVector()) {
12617 ContainerVT = getContainerForFixedLengthVector(VT);
12618 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12619 ContainerVT.getVectorElementCount());
12620
12621 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12622
12623 if (!IsUnmasked) {
12624 MVT MaskVT = getMaskTypeFor(ContainerVT);
12625 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12626 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12627 }
12628 }
12629
12630 if (!VL)
12631 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12632
12633 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12634 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12635 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12636 }
12637
12638 unsigned IntID =
12639 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12640 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12641 if (IsUnmasked)
12642 Ops.push_back(DAG.getUNDEF(ContainerVT));
12643 else
12644 Ops.push_back(PassThru);
12645 Ops.push_back(BasePtr);
12646 Ops.push_back(Index);
12647 if (!IsUnmasked)
12648 Ops.push_back(Mask);
12649 Ops.push_back(VL);
12650 if (!IsUnmasked)
12652
12653 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12654 SDValue Result =
12655 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12656 Chain = Result.getValue(1);
12657
12658 if (VT.isFixedLengthVector())
12659 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12660
12661 return DAG.getMergeValues({Result, Chain}, DL);
12662}
12663
12664// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12665// matched to a RVV indexed store. The RVV indexed store instructions only
12666// support the "unsigned unscaled" addressing mode; indices are implicitly
12667// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12668// signed or scaled indexing is extended to the XLEN value type and scaled
12669// accordingly.
12670SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12671 SelectionDAG &DAG) const {
12672 SDLoc DL(Op);
12673 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12674 EVT MemVT = MemSD->getMemoryVT();
12675 MachineMemOperand *MMO = MemSD->getMemOperand();
12676 SDValue Chain = MemSD->getChain();
12677 SDValue BasePtr = MemSD->getBasePtr();
12678
12679 [[maybe_unused]] bool IsTruncatingStore = false;
12680 SDValue Index, Mask, Val, VL;
12681
12682 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12683 Index = VPSN->getIndex();
12684 Mask = VPSN->getMask();
12685 Val = VPSN->getValue();
12686 VL = VPSN->getVectorLength();
12687 // VP doesn't support truncating stores.
12688 IsTruncatingStore = false;
12689 } else {
12690 // Else it must be a MSCATTER.
12691 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12692 Index = MSN->getIndex();
12693 Mask = MSN->getMask();
12694 Val = MSN->getValue();
12695 IsTruncatingStore = MSN->isTruncatingStore();
12696 }
12697
12698 MVT VT = Val.getSimpleValueType();
12699 MVT IndexVT = Index.getSimpleValueType();
12700 MVT XLenVT = Subtarget.getXLenVT();
12701
12703 "Unexpected VTs!");
12704 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12705 // Targets have to explicitly opt-in for extending vector loads and
12706 // truncating vector stores.
12707 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12708
12709 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12710 // the selection of the masked intrinsics doesn't do this for us.
12711 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12712
12713 MVT ContainerVT = VT;
12714 if (VT.isFixedLengthVector()) {
12715 ContainerVT = getContainerForFixedLengthVector(VT);
12716 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12717 ContainerVT.getVectorElementCount());
12718
12719 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12720 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12721
12722 if (!IsUnmasked) {
12723 MVT MaskVT = getMaskTypeFor(ContainerVT);
12724 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12725 }
12726 }
12727
12728 if (!VL)
12729 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12730
12731 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12732 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12733 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12734 }
12735
12736 unsigned IntID =
12737 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12738 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12739 Ops.push_back(Val);
12740 Ops.push_back(BasePtr);
12741 Ops.push_back(Index);
12742 if (!IsUnmasked)
12743 Ops.push_back(Mask);
12744 Ops.push_back(VL);
12745
12747 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12748}
12749
12750SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12751 SelectionDAG &DAG) const {
12752 const MVT XLenVT = Subtarget.getXLenVT();
12753 SDLoc DL(Op);
12754 SDValue Chain = Op->getOperand(0);
12755 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12756 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12757 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12758
12759 // Encoding used for rounding mode in RISC-V differs from that used in
12760 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12761 // table, which consists of a sequence of 4-bit fields, each representing
12762 // corresponding FLT_ROUNDS mode.
12763 static const int Table =
12769
12770 SDValue Shift =
12771 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12772 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12773 DAG.getConstant(Table, DL, XLenVT), Shift);
12774 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12775 DAG.getConstant(7, DL, XLenVT));
12776
12777 return DAG.getMergeValues({Masked, Chain}, DL);
12778}
12779
12780SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12781 SelectionDAG &DAG) const {
12782 const MVT XLenVT = Subtarget.getXLenVT();
12783 SDLoc DL(Op);
12784 SDValue Chain = Op->getOperand(0);
12785 SDValue RMValue = Op->getOperand(1);
12786 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12787
12788 // Encoding used for rounding mode in RISC-V differs from that used in
12789 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12790 // a table, which consists of a sequence of 4-bit fields, each representing
12791 // corresponding RISC-V mode.
12792 static const unsigned Table =
12798
12799 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12800
12801 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12802 DAG.getConstant(2, DL, XLenVT));
12803 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12804 DAG.getConstant(Table, DL, XLenVT), Shift);
12805 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12806 DAG.getConstant(0x7, DL, XLenVT));
12807 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12808 RMValue);
12809}
12810
12811SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12812 SelectionDAG &DAG) const {
12814
12815 bool isRISCV64 = Subtarget.is64Bit();
12816 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12817
12818 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12819 return DAG.getFrameIndex(FI, PtrVT);
12820}
12821
12822// Returns the opcode of the target-specific SDNode that implements the 32-bit
12823// form of the given Opcode.
12824static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12825 switch (Opcode) {
12826 default:
12827 llvm_unreachable("Unexpected opcode");
12828 case ISD::SHL:
12829 return RISCVISD::SLLW;
12830 case ISD::SRA:
12831 return RISCVISD::SRAW;
12832 case ISD::SRL:
12833 return RISCVISD::SRLW;
12834 case ISD::SDIV:
12835 return RISCVISD::DIVW;
12836 case ISD::UDIV:
12837 return RISCVISD::DIVUW;
12838 case ISD::UREM:
12839 return RISCVISD::REMUW;
12840 case ISD::ROTL:
12841 return RISCVISD::ROLW;
12842 case ISD::ROTR:
12843 return RISCVISD::RORW;
12844 }
12845}
12846
12847// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12848// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12849// otherwise be promoted to i64, making it difficult to select the
12850// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12851// type i8/i16/i32 is lost.
12853 unsigned ExtOpc = ISD::ANY_EXTEND) {
12854 SDLoc DL(N);
12855 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12856 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12857 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12858 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12859 // ReplaceNodeResults requires we maintain the same type for the return value.
12860 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12861}
12862
12863// Converts the given 32-bit operation to a i64 operation with signed extension
12864// semantic to reduce the signed extension instructions.
12866 SDLoc DL(N);
12867 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12868 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12869 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12870 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12871 DAG.getValueType(MVT::i32));
12872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12873}
12874
12877 SelectionDAG &DAG) const {
12878 SDLoc DL(N);
12879 switch (N->getOpcode()) {
12880 default:
12881 llvm_unreachable("Don't know how to custom type legalize this operation!");
12884 case ISD::FP_TO_SINT:
12885 case ISD::FP_TO_UINT: {
12886 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12887 "Unexpected custom legalisation");
12888 bool IsStrict = N->isStrictFPOpcode();
12889 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12890 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12891 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12892 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12894 if (!isTypeLegal(Op0.getValueType()))
12895 return;
12896 if (IsStrict) {
12897 SDValue Chain = N->getOperand(0);
12898 // In absense of Zfh, promote f16 to f32, then convert.
12899 if (Op0.getValueType() == MVT::f16 &&
12900 !Subtarget.hasStdExtZfhOrZhinx()) {
12901 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12902 {Chain, Op0});
12903 Chain = Op0.getValue(1);
12904 }
12905 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12907 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12908 SDValue Res = DAG.getNode(
12909 Opc, DL, VTs, Chain, Op0,
12910 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12911 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12912 Results.push_back(Res.getValue(1));
12913 return;
12914 }
12915 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12916 // convert.
12917 if ((Op0.getValueType() == MVT::f16 &&
12918 !Subtarget.hasStdExtZfhOrZhinx()) ||
12919 Op0.getValueType() == MVT::bf16)
12920 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12921
12922 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12923 SDValue Res =
12924 DAG.getNode(Opc, DL, MVT::i64, Op0,
12925 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12927 return;
12928 }
12929 // If the FP type needs to be softened, emit a library call using the 'si'
12930 // version. If we left it to default legalization we'd end up with 'di'. If
12931 // the FP type doesn't need to be softened just let generic type
12932 // legalization promote the result type.
12933 RTLIB::Libcall LC;
12934 if (IsSigned)
12935 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12936 else
12937 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12938 MakeLibCallOptions CallOptions;
12939 EVT OpVT = Op0.getValueType();
12940 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12941 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12942 SDValue Result;
12943 std::tie(Result, Chain) =
12944 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12945 Results.push_back(Result);
12946 if (IsStrict)
12947 Results.push_back(Chain);
12948 break;
12949 }
12950 case ISD::LROUND: {
12951 SDValue Op0 = N->getOperand(0);
12952 EVT Op0VT = Op0.getValueType();
12953 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12955 if (!isTypeLegal(Op0VT))
12956 return;
12957
12958 // In absense of Zfh, promote f16 to f32, then convert.
12959 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12960 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12961
12962 SDValue Res =
12963 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12964 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12965 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12966 return;
12967 }
12968 // If the FP type needs to be softened, emit a library call to lround. We'll
12969 // need to truncate the result. We assume any value that doesn't fit in i32
12970 // is allowed to return an unspecified value.
12971 RTLIB::Libcall LC =
12972 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12973 MakeLibCallOptions CallOptions;
12974 EVT OpVT = Op0.getValueType();
12975 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12976 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12977 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12978 Results.push_back(Result);
12979 break;
12980 }
12983 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12984 "has custom type legalization on riscv32");
12985
12986 SDValue LoCounter, HiCounter;
12987 MVT XLenVT = Subtarget.getXLenVT();
12988 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12989 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
12990 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
12991 } else {
12992 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
12993 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
12994 }
12995 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12997 N->getOperand(0), LoCounter, HiCounter);
12998
12999 Results.push_back(
13000 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13001 Results.push_back(RCW.getValue(2));
13002 break;
13003 }
13004 case ISD::LOAD: {
13005 if (!ISD::isNON_EXTLoad(N))
13006 return;
13007
13008 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13009 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13010 LoadSDNode *Ld = cast<LoadSDNode>(N);
13011
13012 SDLoc dl(N);
13013 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13014 Ld->getBasePtr(), Ld->getMemoryVT(),
13015 Ld->getMemOperand());
13016 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13017 Results.push_back(Res.getValue(1));
13018 return;
13019 }
13020 case ISD::MUL: {
13021 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13022 unsigned XLen = Subtarget.getXLen();
13023 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13024 if (Size > XLen) {
13025 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13026 SDValue LHS = N->getOperand(0);
13027 SDValue RHS = N->getOperand(1);
13028 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13029
13030 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13031 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13032 // We need exactly one side to be unsigned.
13033 if (LHSIsU == RHSIsU)
13034 return;
13035
13036 auto MakeMULPair = [&](SDValue S, SDValue U) {
13037 MVT XLenVT = Subtarget.getXLenVT();
13038 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13039 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13040 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13041 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13042 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13043 };
13044
13045 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13046 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13047
13048 // The other operand should be signed, but still prefer MULH when
13049 // possible.
13050 if (RHSIsU && LHSIsS && !RHSIsS)
13051 Results.push_back(MakeMULPair(LHS, RHS));
13052 else if (LHSIsU && RHSIsS && !LHSIsS)
13053 Results.push_back(MakeMULPair(RHS, LHS));
13054
13055 return;
13056 }
13057 [[fallthrough]];
13058 }
13059 case ISD::ADD:
13060 case ISD::SUB:
13061 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13062 "Unexpected custom legalisation");
13063 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13064 break;
13065 case ISD::SHL:
13066 case ISD::SRA:
13067 case ISD::SRL:
13068 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13069 "Unexpected custom legalisation");
13070 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13071 // If we can use a BSET instruction, allow default promotion to apply.
13072 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13073 isOneConstant(N->getOperand(0)))
13074 break;
13075 Results.push_back(customLegalizeToWOp(N, DAG));
13076 break;
13077 }
13078
13079 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13080 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13081 // shift amount.
13082 if (N->getOpcode() == ISD::SHL) {
13083 SDLoc DL(N);
13084 SDValue NewOp0 =
13085 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13086 SDValue NewOp1 =
13087 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13088 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13089 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13090 DAG.getValueType(MVT::i32));
13091 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13092 }
13093
13094 break;
13095 case ISD::ROTL:
13096 case ISD::ROTR:
13097 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13098 "Unexpected custom legalisation");
13099 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13100 Subtarget.hasVendorXTHeadBb()) &&
13101 "Unexpected custom legalization");
13102 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13103 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13104 return;
13105 Results.push_back(customLegalizeToWOp(N, DAG));
13106 break;
13107 case ISD::CTTZ:
13109 case ISD::CTLZ:
13110 case ISD::CTLZ_ZERO_UNDEF: {
13111 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13112 "Unexpected custom legalisation");
13113
13114 SDValue NewOp0 =
13115 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13116 bool IsCTZ =
13117 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13118 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13119 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13120 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13121 return;
13122 }
13123 case ISD::SDIV:
13124 case ISD::UDIV:
13125 case ISD::UREM: {
13126 MVT VT = N->getSimpleValueType(0);
13127 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13128 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13129 "Unexpected custom legalisation");
13130 // Don't promote division/remainder by constant since we should expand those
13131 // to multiply by magic constant.
13133 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13134 !isIntDivCheap(N->getValueType(0), Attr))
13135 return;
13136
13137 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13138 // the upper 32 bits. For other types we need to sign or zero extend
13139 // based on the opcode.
13140 unsigned ExtOpc = ISD::ANY_EXTEND;
13141 if (VT != MVT::i32)
13142 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13144
13145 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13146 break;
13147 }
13148 case ISD::SADDO: {
13149 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13150 "Unexpected custom legalisation");
13151
13152 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13153 // use the default legalization.
13154 if (!isa<ConstantSDNode>(N->getOperand(1)))
13155 return;
13156
13157 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13158 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13159 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13160 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13161 DAG.getValueType(MVT::i32));
13162
13163 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13164
13165 // For an addition, the result should be less than one of the operands (LHS)
13166 // if and only if the other operand (RHS) is negative, otherwise there will
13167 // be overflow.
13168 // For a subtraction, the result should be less than one of the operands
13169 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13170 // otherwise there will be overflow.
13171 EVT OType = N->getValueType(1);
13172 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13173 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13174
13175 SDValue Overflow =
13176 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13177 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13178 Results.push_back(Overflow);
13179 return;
13180 }
13181 case ISD::UADDO:
13182 case ISD::USUBO: {
13183 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13184 "Unexpected custom legalisation");
13185 bool IsAdd = N->getOpcode() == ISD::UADDO;
13186 // Create an ADDW or SUBW.
13187 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13188 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13189 SDValue Res =
13190 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13191 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13192 DAG.getValueType(MVT::i32));
13193
13194 SDValue Overflow;
13195 if (IsAdd && isOneConstant(RHS)) {
13196 // Special case uaddo X, 1 overflowed if the addition result is 0.
13197 // The general case (X + C) < C is not necessarily beneficial. Although we
13198 // reduce the live range of X, we may introduce the materialization of
13199 // constant C, especially when the setcc result is used by branch. We have
13200 // no compare with constant and branch instructions.
13201 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13202 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13203 } else if (IsAdd && isAllOnesConstant(RHS)) {
13204 // Special case uaddo X, -1 overflowed if X != 0.
13205 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13206 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13207 } else {
13208 // Sign extend the LHS and perform an unsigned compare with the ADDW
13209 // result. Since the inputs are sign extended from i32, this is equivalent
13210 // to comparing the lower 32 bits.
13211 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13212 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13213 IsAdd ? ISD::SETULT : ISD::SETUGT);
13214 }
13215
13216 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13217 Results.push_back(Overflow);
13218 return;
13219 }
13220 case ISD::UADDSAT:
13221 case ISD::USUBSAT: {
13222 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13223 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13224 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13225 // promotion for UADDO/USUBO.
13226 Results.push_back(expandAddSubSat(N, DAG));
13227 return;
13228 }
13229 case ISD::SADDSAT:
13230 case ISD::SSUBSAT: {
13231 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13232 "Unexpected custom legalisation");
13233 Results.push_back(expandAddSubSat(N, DAG));
13234 return;
13235 }
13236 case ISD::ABS: {
13237 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13238 "Unexpected custom legalisation");
13239
13240 if (Subtarget.hasStdExtZbb()) {
13241 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13242 // This allows us to remember that the result is sign extended. Expanding
13243 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13244 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13245 N->getOperand(0));
13246 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13247 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13248 return;
13249 }
13250
13251 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13252 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13253
13254 // Freeze the source so we can increase it's use count.
13255 Src = DAG.getFreeze(Src);
13256
13257 // Copy sign bit to all bits using the sraiw pattern.
13258 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13259 DAG.getValueType(MVT::i32));
13260 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13261 DAG.getConstant(31, DL, MVT::i64));
13262
13263 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13264 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13265
13266 // NOTE: The result is only required to be anyextended, but sext is
13267 // consistent with type legalization of sub.
13268 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13269 DAG.getValueType(MVT::i32));
13270 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13271 return;
13272 }
13273 case ISD::BITCAST: {
13274 EVT VT = N->getValueType(0);
13275 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13276 SDValue Op0 = N->getOperand(0);
13277 EVT Op0VT = Op0.getValueType();
13278 MVT XLenVT = Subtarget.getXLenVT();
13279 if (VT == MVT::i16 &&
13280 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13281 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13282 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13283 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13284 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13285 Subtarget.hasStdExtFOrZfinx()) {
13286 SDValue FPConv =
13287 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13288 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13289 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13290 Subtarget.hasStdExtDOrZdinx()) {
13291 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13292 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13293 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13294 NewReg.getValue(0), NewReg.getValue(1));
13295 Results.push_back(RetReg);
13296 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13297 isTypeLegal(Op0VT)) {
13298 // Custom-legalize bitcasts from fixed-length vector types to illegal
13299 // scalar types in order to improve codegen. Bitcast the vector to a
13300 // one-element vector type whose element type is the same as the result
13301 // type, and extract the first element.
13302 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13303 if (isTypeLegal(BVT)) {
13304 SDValue BVec = DAG.getBitcast(BVT, Op0);
13305 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13306 DAG.getVectorIdxConstant(0, DL)));
13307 }
13308 }
13309 break;
13310 }
13311 case RISCVISD::BREV8:
13312 case RISCVISD::ORC_B: {
13313 MVT VT = N->getSimpleValueType(0);
13314 MVT XLenVT = Subtarget.getXLenVT();
13315 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13316 "Unexpected custom legalisation");
13317 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13318 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13319 "Unexpected extension");
13320 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13321 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13322 // ReplaceNodeResults requires we maintain the same type for the return
13323 // value.
13324 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13325 break;
13326 }
13328 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13329 // type is illegal (currently only vXi64 RV32).
13330 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13331 // transferred to the destination register. We issue two of these from the
13332 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13333 // first element.
13334 SDValue Vec = N->getOperand(0);
13335 SDValue Idx = N->getOperand(1);
13336
13337 // The vector type hasn't been legalized yet so we can't issue target
13338 // specific nodes if it needs legalization.
13339 // FIXME: We would manually legalize if it's important.
13340 if (!isTypeLegal(Vec.getValueType()))
13341 return;
13342
13343 MVT VecVT = Vec.getSimpleValueType();
13344
13345 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13346 VecVT.getVectorElementType() == MVT::i64 &&
13347 "Unexpected EXTRACT_VECTOR_ELT legalization");
13348
13349 // If this is a fixed vector, we need to convert it to a scalable vector.
13350 MVT ContainerVT = VecVT;
13351 if (VecVT.isFixedLengthVector()) {
13352 ContainerVT = getContainerForFixedLengthVector(VecVT);
13353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13354 }
13355
13356 MVT XLenVT = Subtarget.getXLenVT();
13357
13358 // Use a VL of 1 to avoid processing more elements than we need.
13359 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13360
13361 // Unless the index is known to be 0, we must slide the vector down to get
13362 // the desired element into index 0.
13363 if (!isNullConstant(Idx)) {
13364 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13365 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13366 }
13367
13368 // Extract the lower XLEN bits of the correct vector element.
13369 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13370
13371 // To extract the upper XLEN bits of the vector element, shift the first
13372 // element right by 32 bits and re-extract the lower XLEN bits.
13373 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13374 DAG.getUNDEF(ContainerVT),
13375 DAG.getConstant(32, DL, XLenVT), VL);
13376 SDValue LShr32 =
13377 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13378 DAG.getUNDEF(ContainerVT), Mask, VL);
13379
13380 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13381
13382 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13383 break;
13384 }
13386 unsigned IntNo = N->getConstantOperandVal(0);
13387 switch (IntNo) {
13388 default:
13390 "Don't know how to custom type legalize this intrinsic!");
13391 case Intrinsic::experimental_get_vector_length: {
13392 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13393 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13394 return;
13395 }
13396 case Intrinsic::experimental_cttz_elts: {
13397 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13398 Results.push_back(
13399 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13400 return;
13401 }
13402 case Intrinsic::riscv_orc_b:
13403 case Intrinsic::riscv_brev8:
13404 case Intrinsic::riscv_sha256sig0:
13405 case Intrinsic::riscv_sha256sig1:
13406 case Intrinsic::riscv_sha256sum0:
13407 case Intrinsic::riscv_sha256sum1:
13408 case Intrinsic::riscv_sm3p0:
13409 case Intrinsic::riscv_sm3p1: {
13410 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13411 return;
13412 unsigned Opc;
13413 switch (IntNo) {
13414 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13415 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13416 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13417 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13418 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13419 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13420 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13421 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13422 }
13423
13424 SDValue NewOp =
13425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13426 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13427 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13428 return;
13429 }
13430 case Intrinsic::riscv_sm4ks:
13431 case Intrinsic::riscv_sm4ed: {
13432 unsigned Opc =
13433 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13434 SDValue NewOp0 =
13435 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13436 SDValue NewOp1 =
13437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13438 SDValue Res =
13439 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13440 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13441 return;
13442 }
13443 case Intrinsic::riscv_mopr: {
13444 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13445 return;
13446 SDValue NewOp =
13447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13448 SDValue Res = DAG.getNode(
13449 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13450 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13451 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13452 return;
13453 }
13454 case Intrinsic::riscv_moprr: {
13455 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13456 return;
13457 SDValue NewOp0 =
13458 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13459 SDValue NewOp1 =
13460 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13461 SDValue Res = DAG.getNode(
13462 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13463 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13464 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13465 return;
13466 }
13467 case Intrinsic::riscv_clmul: {
13468 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13469 return;
13470
13471 SDValue NewOp0 =
13472 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13473 SDValue NewOp1 =
13474 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13475 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13476 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13477 return;
13478 }
13479 case Intrinsic::riscv_clmulh:
13480 case Intrinsic::riscv_clmulr: {
13481 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13482 return;
13483
13484 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13485 // to the full 128-bit clmul result of multiplying two xlen values.
13486 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13487 // upper 32 bits.
13488 //
13489 // The alternative is to mask the inputs to 32 bits and use clmul, but
13490 // that requires two shifts to mask each input without zext.w.
13491 // FIXME: If the inputs are known zero extended or could be freely
13492 // zero extended, the mask form would be better.
13493 SDValue NewOp0 =
13494 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13495 SDValue NewOp1 =
13496 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13497 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13498 DAG.getConstant(32, DL, MVT::i64));
13499 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13500 DAG.getConstant(32, DL, MVT::i64));
13501 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13503 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13504 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13505 DAG.getConstant(32, DL, MVT::i64));
13506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13507 return;
13508 }
13509 case Intrinsic::riscv_vmv_x_s: {
13510 EVT VT = N->getValueType(0);
13511 MVT XLenVT = Subtarget.getXLenVT();
13512 if (VT.bitsLT(XLenVT)) {
13513 // Simple case just extract using vmv.x.s and truncate.
13514 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13515 Subtarget.getXLenVT(), N->getOperand(1));
13516 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13517 return;
13518 }
13519
13520 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13521 "Unexpected custom legalization");
13522
13523 // We need to do the move in two steps.
13524 SDValue Vec = N->getOperand(1);
13525 MVT VecVT = Vec.getSimpleValueType();
13526
13527 // First extract the lower XLEN bits of the element.
13528 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13529
13530 // To extract the upper XLEN bits of the vector element, shift the first
13531 // element right by 32 bits and re-extract the lower XLEN bits.
13532 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13533
13534 SDValue ThirtyTwoV =
13535 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13536 DAG.getConstant(32, DL, XLenVT), VL);
13537 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13538 DAG.getUNDEF(VecVT), Mask, VL);
13539 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13540
13541 Results.push_back(
13542 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13543 break;
13544 }
13545 }
13546 break;
13547 }
13548 case ISD::VECREDUCE_ADD:
13549 case ISD::VECREDUCE_AND:
13550 case ISD::VECREDUCE_OR:
13551 case ISD::VECREDUCE_XOR:
13556 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13557 Results.push_back(V);
13558 break;
13559 case ISD::VP_REDUCE_ADD:
13560 case ISD::VP_REDUCE_AND:
13561 case ISD::VP_REDUCE_OR:
13562 case ISD::VP_REDUCE_XOR:
13563 case ISD::VP_REDUCE_SMAX:
13564 case ISD::VP_REDUCE_UMAX:
13565 case ISD::VP_REDUCE_SMIN:
13566 case ISD::VP_REDUCE_UMIN:
13567 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13568 Results.push_back(V);
13569 break;
13570 case ISD::GET_ROUNDING: {
13571 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13572 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13573 Results.push_back(Res.getValue(0));
13574 Results.push_back(Res.getValue(1));
13575 break;
13576 }
13577 }
13578}
13579
13580/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13581/// which corresponds to it.
13582static unsigned getVecReduceOpcode(unsigned Opc) {
13583 switch (Opc) {
13584 default:
13585 llvm_unreachable("Unhandled binary to transfrom reduction");
13586 case ISD::ADD:
13587 return ISD::VECREDUCE_ADD;
13588 case ISD::UMAX:
13589 return ISD::VECREDUCE_UMAX;
13590 case ISD::SMAX:
13591 return ISD::VECREDUCE_SMAX;
13592 case ISD::UMIN:
13593 return ISD::VECREDUCE_UMIN;
13594 case ISD::SMIN:
13595 return ISD::VECREDUCE_SMIN;
13596 case ISD::AND:
13597 return ISD::VECREDUCE_AND;
13598 case ISD::OR:
13599 return ISD::VECREDUCE_OR;
13600 case ISD::XOR:
13601 return ISD::VECREDUCE_XOR;
13602 case ISD::FADD:
13603 // Note: This is the associative form of the generic reduction opcode.
13604 return ISD::VECREDUCE_FADD;
13605 }
13606}
13607
13608/// Perform two related transforms whose purpose is to incrementally recognize
13609/// an explode_vector followed by scalar reduction as a vector reduction node.
13610/// This exists to recover from a deficiency in SLP which can't handle
13611/// forests with multiple roots sharing common nodes. In some cases, one
13612/// of the trees will be vectorized, and the other will remain (unprofitably)
13613/// scalarized.
13614static SDValue
13616 const RISCVSubtarget &Subtarget) {
13617
13618 // This transforms need to run before all integer types have been legalized
13619 // to i64 (so that the vector element type matches the add type), and while
13620 // it's safe to introduce odd sized vector types.
13622 return SDValue();
13623
13624 // Without V, this transform isn't useful. We could form the (illegal)
13625 // operations and let them be scalarized again, but there's really no point.
13626 if (!Subtarget.hasVInstructions())
13627 return SDValue();
13628
13629 const SDLoc DL(N);
13630 const EVT VT = N->getValueType(0);
13631 const unsigned Opc = N->getOpcode();
13632
13633 // For FADD, we only handle the case with reassociation allowed. We
13634 // could handle strict reduction order, but at the moment, there's no
13635 // known reason to, and the complexity isn't worth it.
13636 // TODO: Handle fminnum and fmaxnum here
13637 if (!VT.isInteger() &&
13638 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13639 return SDValue();
13640
13641 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13642 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13643 "Inconsistent mappings");
13644 SDValue LHS = N->getOperand(0);
13645 SDValue RHS = N->getOperand(1);
13646
13647 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13648 return SDValue();
13649
13650 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13651 std::swap(LHS, RHS);
13652
13653 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13654 !isa<ConstantSDNode>(RHS.getOperand(1)))
13655 return SDValue();
13656
13657 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13658 SDValue SrcVec = RHS.getOperand(0);
13659 EVT SrcVecVT = SrcVec.getValueType();
13660 assert(SrcVecVT.getVectorElementType() == VT);
13661 if (SrcVecVT.isScalableVector())
13662 return SDValue();
13663
13664 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13665 return SDValue();
13666
13667 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13668 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13669 // root of our reduction tree. TODO: We could extend this to any two
13670 // adjacent aligned constant indices if desired.
13671 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13672 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13673 uint64_t LHSIdx =
13674 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13675 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13676 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13677 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13678 DAG.getVectorIdxConstant(0, DL));
13679 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13680 }
13681 }
13682
13683 // Match (binop (reduce (extract_subvector V, 0),
13684 // (extract_vector_elt V, sizeof(SubVec))))
13685 // into a reduction of one more element from the original vector V.
13686 if (LHS.getOpcode() != ReduceOpc)
13687 return SDValue();
13688
13689 SDValue ReduceVec = LHS.getOperand(0);
13690 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13691 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13692 isNullConstant(ReduceVec.getOperand(1)) &&
13693 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13694 // For illegal types (e.g. 3xi32), most will be combined again into a
13695 // wider (hopefully legal) type. If this is a terminal state, we are
13696 // relying on type legalization here to produce something reasonable
13697 // and this lowering quality could probably be improved. (TODO)
13698 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13699 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13700 DAG.getVectorIdxConstant(0, DL));
13701 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13702 ReduceVec->getFlags() & N->getFlags());
13703 }
13704
13705 return SDValue();
13706}
13707
13708
13709// Try to fold (<bop> x, (reduction.<bop> vec, start))
13711 const RISCVSubtarget &Subtarget) {
13712 auto BinOpToRVVReduce = [](unsigned Opc) {
13713 switch (Opc) {
13714 default:
13715 llvm_unreachable("Unhandled binary to transfrom reduction");
13716 case ISD::ADD:
13718 case ISD::UMAX:
13720 case ISD::SMAX:
13722 case ISD::UMIN:
13724 case ISD::SMIN:
13726 case ISD::AND:
13728 case ISD::OR:
13730 case ISD::XOR:
13732 case ISD::FADD:
13734 case ISD::FMAXNUM:
13736 case ISD::FMINNUM:
13738 }
13739 };
13740
13741 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13742 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13743 isNullConstant(V.getOperand(1)) &&
13744 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13745 };
13746
13747 unsigned Opc = N->getOpcode();
13748 unsigned ReduceIdx;
13749 if (IsReduction(N->getOperand(0), Opc))
13750 ReduceIdx = 0;
13751 else if (IsReduction(N->getOperand(1), Opc))
13752 ReduceIdx = 1;
13753 else
13754 return SDValue();
13755
13756 // Skip if FADD disallows reassociation but the combiner needs.
13757 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13758 return SDValue();
13759
13760 SDValue Extract = N->getOperand(ReduceIdx);
13761 SDValue Reduce = Extract.getOperand(0);
13762 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13763 return SDValue();
13764
13765 SDValue ScalarV = Reduce.getOperand(2);
13766 EVT ScalarVT = ScalarV.getValueType();
13767 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13768 ScalarV.getOperand(0)->isUndef() &&
13769 isNullConstant(ScalarV.getOperand(2)))
13770 ScalarV = ScalarV.getOperand(1);
13771
13772 // Make sure that ScalarV is a splat with VL=1.
13773 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13774 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13775 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13776 return SDValue();
13777
13778 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13779 return SDValue();
13780
13781 // Check the scalar of ScalarV is neutral element
13782 // TODO: Deal with value other than neutral element.
13783 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13784 0))
13785 return SDValue();
13786
13787 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13788 // FIXME: We might be able to improve this if operand 0 is undef.
13789 if (!isNonZeroAVL(Reduce.getOperand(5)))
13790 return SDValue();
13791
13792 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13793
13794 SDLoc DL(N);
13795 SDValue NewScalarV =
13796 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13797 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13798
13799 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13800 if (ScalarVT != ScalarV.getValueType())
13801 NewScalarV =
13802 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13803 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13804
13805 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13806 NewScalarV, Reduce.getOperand(3),
13807 Reduce.getOperand(4), Reduce.getOperand(5)};
13808 SDValue NewReduce =
13809 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13810 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13811 Extract.getOperand(1));
13812}
13813
13814// Optimize (add (shl x, c0), (shl y, c1)) ->
13815// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13817 const RISCVSubtarget &Subtarget) {
13818 // Perform this optimization only in the zba extension.
13819 if (!Subtarget.hasStdExtZba())
13820 return SDValue();
13821
13822 // Skip for vector types and larger types.
13823 EVT VT = N->getValueType(0);
13824 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13825 return SDValue();
13826
13827 // The two operand nodes must be SHL and have no other use.
13828 SDValue N0 = N->getOperand(0);
13829 SDValue N1 = N->getOperand(1);
13830 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13831 !N0->hasOneUse() || !N1->hasOneUse())
13832 return SDValue();
13833
13834 // Check c0 and c1.
13835 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13836 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13837 if (!N0C || !N1C)
13838 return SDValue();
13839 int64_t C0 = N0C->getSExtValue();
13840 int64_t C1 = N1C->getSExtValue();
13841 if (C0 <= 0 || C1 <= 0)
13842 return SDValue();
13843
13844 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13845 int64_t Bits = std::min(C0, C1);
13846 int64_t Diff = std::abs(C0 - C1);
13847 if (Diff != 1 && Diff != 2 && Diff != 3)
13848 return SDValue();
13849
13850 // Build nodes.
13851 SDLoc DL(N);
13852 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13853 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13854 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13855 DAG.getConstant(Diff, DL, VT), NS);
13856 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13857}
13858
13859// Combine a constant select operand into its use:
13860//
13861// (and (select cond, -1, c), x)
13862// -> (select cond, x, (and x, c)) [AllOnes=1]
13863// (or (select cond, 0, c), x)
13864// -> (select cond, x, (or x, c)) [AllOnes=0]
13865// (xor (select cond, 0, c), x)
13866// -> (select cond, x, (xor x, c)) [AllOnes=0]
13867// (add (select cond, 0, c), x)
13868// -> (select cond, x, (add x, c)) [AllOnes=0]
13869// (sub x, (select cond, 0, c))
13870// -> (select cond, x, (sub x, c)) [AllOnes=0]
13872 SelectionDAG &DAG, bool AllOnes,
13873 const RISCVSubtarget &Subtarget) {
13874 EVT VT = N->getValueType(0);
13875
13876 // Skip vectors.
13877 if (VT.isVector())
13878 return SDValue();
13879
13880 if (!Subtarget.hasConditionalMoveFusion()) {
13881 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13882 if ((!Subtarget.hasStdExtZicond() &&
13883 !Subtarget.hasVendorXVentanaCondOps()) ||
13884 N->getOpcode() != ISD::AND)
13885 return SDValue();
13886
13887 // Maybe harmful when condition code has multiple use.
13888 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13889 return SDValue();
13890
13891 // Maybe harmful when VT is wider than XLen.
13892 if (VT.getSizeInBits() > Subtarget.getXLen())
13893 return SDValue();
13894 }
13895
13896 if ((Slct.getOpcode() != ISD::SELECT &&
13897 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13898 !Slct.hasOneUse())
13899 return SDValue();
13900
13901 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13903 };
13904
13905 bool SwapSelectOps;
13906 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13907 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13908 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13909 SDValue NonConstantVal;
13910 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13911 SwapSelectOps = false;
13912 NonConstantVal = FalseVal;
13913 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13914 SwapSelectOps = true;
13915 NonConstantVal = TrueVal;
13916 } else
13917 return SDValue();
13918
13919 // Slct is now know to be the desired identity constant when CC is true.
13920 TrueVal = OtherOp;
13921 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13922 // Unless SwapSelectOps says the condition should be false.
13923 if (SwapSelectOps)
13924 std::swap(TrueVal, FalseVal);
13925
13926 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13927 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13928 {Slct.getOperand(0), Slct.getOperand(1),
13929 Slct.getOperand(2), TrueVal, FalseVal});
13930
13931 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13932 {Slct.getOperand(0), TrueVal, FalseVal});
13933}
13934
13935// Attempt combineSelectAndUse on each operand of a commutative operator N.
13937 bool AllOnes,
13938 const RISCVSubtarget &Subtarget) {
13939 SDValue N0 = N->getOperand(0);
13940 SDValue N1 = N->getOperand(1);
13941 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13942 return Result;
13943 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13944 return Result;
13945 return SDValue();
13946}
13947
13948// Transform (add (mul x, c0), c1) ->
13949// (add (mul (add x, c1/c0), c0), c1%c0).
13950// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13951// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13952// to an infinite loop in DAGCombine if transformed.
13953// Or transform (add (mul x, c0), c1) ->
13954// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13955// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13956// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13957// lead to an infinite loop in DAGCombine if transformed.
13958// Or transform (add (mul x, c0), c1) ->
13959// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13960// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13961// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13962// lead to an infinite loop in DAGCombine if transformed.
13963// Or transform (add (mul x, c0), c1) ->
13964// (mul (add x, c1/c0), c0).
13965// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13967 const RISCVSubtarget &Subtarget) {
13968 // Skip for vector types and larger types.
13969 EVT VT = N->getValueType(0);
13970 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13971 return SDValue();
13972 // The first operand node must be a MUL and has no other use.
13973 SDValue N0 = N->getOperand(0);
13974 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13975 return SDValue();
13976 // Check if c0 and c1 match above conditions.
13977 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13978 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13979 if (!N0C || !N1C)
13980 return SDValue();
13981 // If N0C has multiple uses it's possible one of the cases in
13982 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13983 // in an infinite loop.
13984 if (!N0C->hasOneUse())
13985 return SDValue();
13986 int64_t C0 = N0C->getSExtValue();
13987 int64_t C1 = N1C->getSExtValue();
13988 int64_t CA, CB;
13989 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13990 return SDValue();
13991 // Search for proper CA (non-zero) and CB that both are simm12.
13992 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13993 !isInt<12>(C0 * (C1 / C0))) {
13994 CA = C1 / C0;
13995 CB = C1 % C0;
13996 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13997 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13998 CA = C1 / C0 + 1;
13999 CB = C1 % C0 - C0;
14000 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14001 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14002 CA = C1 / C0 - 1;
14003 CB = C1 % C0 + C0;
14004 } else
14005 return SDValue();
14006 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14007 SDLoc DL(N);
14008 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14009 DAG.getSignedConstant(CA, DL, VT));
14010 SDValue New1 =
14011 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14012 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14013}
14014
14015// add (zext, zext) -> zext (add (zext, zext))
14016// sub (zext, zext) -> sext (sub (zext, zext))
14017// mul (zext, zext) -> zext (mul (zext, zext))
14018// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14019// udiv (zext, zext) -> zext (udiv (zext, zext))
14020// srem (zext, zext) -> zext (srem (zext, zext))
14021// urem (zext, zext) -> zext (urem (zext, zext))
14022//
14023// where the sum of the extend widths match, and the the range of the bin op
14024// fits inside the width of the narrower bin op. (For profitability on rvv, we
14025// use a power of two for both inner and outer extend.)
14027
14028 EVT VT = N->getValueType(0);
14029 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14030 return SDValue();
14031
14032 SDValue N0 = N->getOperand(0);
14033 SDValue N1 = N->getOperand(1);
14035 return SDValue();
14036 if (!N0.hasOneUse() || !N1.hasOneUse())
14037 return SDValue();
14038
14039 SDValue Src0 = N0.getOperand(0);
14040 SDValue Src1 = N1.getOperand(0);
14041 EVT SrcVT = Src0.getValueType();
14042 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14043 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14044 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14045 return SDValue();
14046
14047 LLVMContext &C = *DAG.getContext();
14049 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14050
14051 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14052 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14053
14054 // Src0 and Src1 are zero extended, so they're always positive if signed.
14055 //
14056 // sub can produce a negative from two positive operands, so it needs sign
14057 // extended. Other nodes produce a positive from two positive operands, so
14058 // zero extend instead.
14059 unsigned OuterExtend =
14060 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14061
14062 return DAG.getNode(
14063 OuterExtend, SDLoc(N), VT,
14064 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14065}
14066
14067// Try to turn (add (xor bool, 1) -1) into (neg bool).
14069 SDValue N0 = N->getOperand(0);
14070 SDValue N1 = N->getOperand(1);
14071 EVT VT = N->getValueType(0);
14072 SDLoc DL(N);
14073
14074 // RHS should be -1.
14075 if (!isAllOnesConstant(N1))
14076 return SDValue();
14077
14078 // Look for (xor X, 1).
14079 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14080 return SDValue();
14081
14082 // First xor input should be 0 or 1.
14084 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14085 return SDValue();
14086
14087 // Emit a negate of the setcc.
14088 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14089 N0.getOperand(0));
14090}
14091
14094 const RISCVSubtarget &Subtarget) {
14095 SelectionDAG &DAG = DCI.DAG;
14096 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14097 return V;
14098 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14099 return V;
14100 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14101 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14102 return V;
14103 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14104 return V;
14105 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14106 return V;
14107 if (SDValue V = combineBinOpOfZExt(N, DAG))
14108 return V;
14109
14110 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14111 // (select lhs, rhs, cc, x, (add x, y))
14112 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14113}
14114
14115// Try to turn a sub boolean RHS and constant LHS into an addi.
14117 SDValue N0 = N->getOperand(0);
14118 SDValue N1 = N->getOperand(1);
14119 EVT VT = N->getValueType(0);
14120 SDLoc DL(N);
14121
14122 // Require a constant LHS.
14123 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14124 if (!N0C)
14125 return SDValue();
14126
14127 // All our optimizations involve subtracting 1 from the immediate and forming
14128 // an ADDI. Make sure the new immediate is valid for an ADDI.
14129 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14130 if (!ImmValMinus1.isSignedIntN(12))
14131 return SDValue();
14132
14133 SDValue NewLHS;
14134 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14135 // (sub constant, (setcc x, y, eq/neq)) ->
14136 // (add (setcc x, y, neq/eq), constant - 1)
14137 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14138 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14139 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14140 return SDValue();
14141 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14142 NewLHS =
14143 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14144 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14145 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14146 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14147 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14148 NewLHS = N1.getOperand(0);
14149 } else
14150 return SDValue();
14151
14152 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14153 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14154}
14155
14156// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14157// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14158// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14159// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14161 const RISCVSubtarget &Subtarget) {
14162 if (!Subtarget.hasStdExtZbb())
14163 return SDValue();
14164
14165 EVT VT = N->getValueType(0);
14166
14167 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14168 return SDValue();
14169
14170 SDValue N0 = N->getOperand(0);
14171 SDValue N1 = N->getOperand(1);
14172
14173 if (N0->getOpcode() != ISD::SHL)
14174 return SDValue();
14175
14176 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14177 if (!ShAmtCLeft)
14178 return SDValue();
14179 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14180
14181 if (ShiftedAmount >= 8)
14182 return SDValue();
14183
14184 SDValue LeftShiftOperand = N0->getOperand(0);
14185 SDValue RightShiftOperand = N1;
14186
14187 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14188 if (N1->getOpcode() != ISD::SRL)
14189 return SDValue();
14190 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14191 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14192 return SDValue();
14193 RightShiftOperand = N1.getOperand(0);
14194 }
14195
14196 // At least one shift should have a single use.
14197 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14198 return SDValue();
14199
14200 if (LeftShiftOperand != RightShiftOperand)
14201 return SDValue();
14202
14203 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14204 Mask <<= ShiftedAmount;
14205 // Check that X has indeed the right shape (only the Y-th bit can be set in
14206 // every byte).
14207 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14208 return SDValue();
14209
14210 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14211}
14212
14214 const RISCVSubtarget &Subtarget) {
14215 if (SDValue V = combineSubOfBoolean(N, DAG))
14216 return V;
14217
14218 EVT VT = N->getValueType(0);
14219 SDValue N0 = N->getOperand(0);
14220 SDValue N1 = N->getOperand(1);
14221 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14222 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14223 isNullConstant(N1.getOperand(1))) {
14224 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14225 if (CCVal == ISD::SETLT) {
14226 SDLoc DL(N);
14227 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14228 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14229 DAG.getConstant(ShAmt, DL, VT));
14230 }
14231 }
14232
14233 if (SDValue V = combineBinOpOfZExt(N, DAG))
14234 return V;
14235 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14236 return V;
14237
14238 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14239 // (select lhs, rhs, cc, x, (sub x, y))
14240 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14241}
14242
14243// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14244// Legalizing setcc can introduce xors like this. Doing this transform reduces
14245// the number of xors and may allow the xor to fold into a branch condition.
14247 SDValue N0 = N->getOperand(0);
14248 SDValue N1 = N->getOperand(1);
14249 bool IsAnd = N->getOpcode() == ISD::AND;
14250
14251 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14252 return SDValue();
14253
14254 if (!N0.hasOneUse() || !N1.hasOneUse())
14255 return SDValue();
14256
14257 SDValue N01 = N0.getOperand(1);
14258 SDValue N11 = N1.getOperand(1);
14259
14260 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14261 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14262 // operation is And, allow one of the Xors to use -1.
14263 if (isOneConstant(N01)) {
14264 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14265 return SDValue();
14266 } else if (isOneConstant(N11)) {
14267 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14268 if (!(IsAnd && isAllOnesConstant(N01)))
14269 return SDValue();
14270 } else
14271 return SDValue();
14272
14273 EVT VT = N->getValueType(0);
14274
14275 SDValue N00 = N0.getOperand(0);
14276 SDValue N10 = N1.getOperand(0);
14277
14278 // The LHS of the xors needs to be 0/1.
14280 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14281 return SDValue();
14282
14283 // Invert the opcode and insert a new xor.
14284 SDLoc DL(N);
14285 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14286 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14287 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14288}
14289
14290// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14291// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14292// value to an unsigned value. This will be lowered to vmax and series of
14293// vnclipu instructions later. This can be extended to other truncated types
14294// other than i8 by replacing 256 and 255 with the equivalent constants for the
14295// type.
14297 EVT VT = N->getValueType(0);
14298 SDValue N0 = N->getOperand(0);
14299 EVT SrcVT = N0.getValueType();
14300
14301 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14302 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14303 return SDValue();
14304
14305 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14306 return SDValue();
14307
14308 SDValue Cond = N0.getOperand(0);
14309 SDValue True = N0.getOperand(1);
14310 SDValue False = N0.getOperand(2);
14311
14312 if (Cond.getOpcode() != ISD::SETCC)
14313 return SDValue();
14314
14315 // FIXME: Support the version of this pattern with the select operands
14316 // swapped.
14317 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14318 if (CCVal != ISD::SETULT)
14319 return SDValue();
14320
14321 SDValue CondLHS = Cond.getOperand(0);
14322 SDValue CondRHS = Cond.getOperand(1);
14323
14324 if (CondLHS != True)
14325 return SDValue();
14326
14327 unsigned ScalarBits = VT.getScalarSizeInBits();
14328
14329 // FIXME: Support other constants.
14330 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14331 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14332 return SDValue();
14333
14334 if (False.getOpcode() != ISD::SIGN_EXTEND)
14335 return SDValue();
14336
14337 False = False.getOperand(0);
14338
14339 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14340 return SDValue();
14341
14342 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14343 if (!FalseRHSC || !FalseRHSC->isZero())
14344 return SDValue();
14345
14346 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14347 if (CCVal2 != ISD::SETGT)
14348 return SDValue();
14349
14350 // Emit the signed to unsigned saturation pattern.
14351 SDLoc DL(N);
14352 SDValue Max =
14353 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14354 SDValue Min =
14355 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14356 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14357 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14358}
14359
14361 const RISCVSubtarget &Subtarget) {
14362 SDValue N0 = N->getOperand(0);
14363 EVT VT = N->getValueType(0);
14364
14365 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14366 // extending X. This is safe since we only need the LSB after the shift and
14367 // shift amounts larger than 31 would produce poison. If we wait until
14368 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14369 // to use a BEXT instruction.
14370 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14371 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14372 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14373 SDLoc DL(N0);
14374 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14375 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14376 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14377 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14378 }
14379
14380 return combineTruncSelectToSMaxUSat(N, DAG);
14381}
14382
14383// Combines two comparison operation and logic operation to one selection
14384// operation(min, max) and logic operation. Returns new constructed Node if
14385// conditions for optimization are satisfied.
14388 const RISCVSubtarget &Subtarget) {
14389 SelectionDAG &DAG = DCI.DAG;
14390
14391 SDValue N0 = N->getOperand(0);
14392 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14393 // extending X. This is safe since we only need the LSB after the shift and
14394 // shift amounts larger than 31 would produce poison. If we wait until
14395 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14396 // to use a BEXT instruction.
14397 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14398 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14399 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14400 N0.hasOneUse()) {
14401 SDLoc DL(N);
14402 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14403 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14404 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14405 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14406 DAG.getConstant(1, DL, MVT::i64));
14407 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14408 }
14409
14410 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14411 return V;
14412 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14413 return V;
14414
14415 if (DCI.isAfterLegalizeDAG())
14416 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14417 return V;
14418
14419 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14420 // (select lhs, rhs, cc, x, (and x, y))
14421 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14422}
14423
14424// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14425// FIXME: Generalize to other binary operators with same operand.
14427 SelectionDAG &DAG) {
14428 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14429
14430 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14432 !N0.hasOneUse() || !N1.hasOneUse())
14433 return SDValue();
14434
14435 // Should have the same condition.
14436 SDValue Cond = N0.getOperand(1);
14437 if (Cond != N1.getOperand(1))
14438 return SDValue();
14439
14440 SDValue TrueV = N0.getOperand(0);
14441 SDValue FalseV = N1.getOperand(0);
14442
14443 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14444 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14445 !isOneConstant(TrueV.getOperand(1)) ||
14446 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14447 return SDValue();
14448
14449 EVT VT = N->getValueType(0);
14450 SDLoc DL(N);
14451
14452 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14453 Cond);
14454 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14455 Cond);
14456 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14457 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14458}
14459
14461 const RISCVSubtarget &Subtarget) {
14462 SelectionDAG &DAG = DCI.DAG;
14463
14464 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14465 return V;
14466 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14467 return V;
14468
14469 if (DCI.isAfterLegalizeDAG())
14470 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14471 return V;
14472
14473 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14474 // We may be able to pull a common operation out of the true and false value.
14475 SDValue N0 = N->getOperand(0);
14476 SDValue N1 = N->getOperand(1);
14477 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14478 return V;
14479 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14480 return V;
14481
14482 // fold (or (select cond, 0, y), x) ->
14483 // (select cond, x, (or x, y))
14484 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14485}
14486
14488 const RISCVSubtarget &Subtarget) {
14489 SDValue N0 = N->getOperand(0);
14490 SDValue N1 = N->getOperand(1);
14491
14492 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14493 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14494 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14495 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14496 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14497 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14498 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14499 SDLoc DL(N);
14500 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14501 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14502 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14503 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14504 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14505 }
14506
14507 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14508 // NOTE: Assumes ROL being legal means ROLW is legal.
14509 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14510 if (N0.getOpcode() == RISCVISD::SLLW &&
14512 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14513 SDLoc DL(N);
14514 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14515 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14516 }
14517
14518 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14519 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14520 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14521 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14522 if (ConstN00 && CC == ISD::SETLT) {
14523 EVT VT = N0.getValueType();
14524 SDLoc DL(N0);
14525 const APInt &Imm = ConstN00->getAPIntValue();
14526 if ((Imm + 1).isSignedIntN(12))
14527 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14528 DAG.getConstant(Imm + 1, DL, VT), CC);
14529 }
14530 }
14531
14532 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14533 return V;
14534 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14535 return V;
14536
14537 // fold (xor (select cond, 0, y), x) ->
14538 // (select cond, x, (xor x, y))
14539 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14540}
14541
14542// Try to expand a scalar multiply to a faster sequence.
14545 const RISCVSubtarget &Subtarget) {
14546
14547 EVT VT = N->getValueType(0);
14548
14549 // LI + MUL is usually smaller than the alternative sequence.
14551 return SDValue();
14552
14553 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14554 return SDValue();
14555
14556 if (VT != Subtarget.getXLenVT())
14557 return SDValue();
14558
14559 const bool HasShlAdd =
14560 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14561
14562 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14563 if (!CNode)
14564 return SDValue();
14565 uint64_t MulAmt = CNode->getZExtValue();
14566
14567 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14568 // We're adding additional uses of X here, and in principle, we should be freezing
14569 // X before doing so. However, adding freeze here causes real regressions, and no
14570 // other target properly freezes X in these cases either.
14571 SDValue X = N->getOperand(0);
14572
14573 if (HasShlAdd) {
14574 for (uint64_t Divisor : {3, 5, 9}) {
14575 if (MulAmt % Divisor != 0)
14576 continue;
14577 uint64_t MulAmt2 = MulAmt / Divisor;
14578 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14579 if (isPowerOf2_64(MulAmt2)) {
14580 SDLoc DL(N);
14581 SDValue X = N->getOperand(0);
14582 // Put the shift first if we can fold a zext into the
14583 // shift forming a slli.uw.
14584 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14585 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14586 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14587 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14588 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14589 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14590 Shl);
14591 }
14592 // Otherwise, put rhe shl second so that it can fold with following
14593 // instructions (e.g. sext or add).
14594 SDValue Mul359 =
14595 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14596 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14597 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14598 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14599 }
14600
14601 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14602 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14603 SDLoc DL(N);
14604 SDValue Mul359 =
14605 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14606 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14607 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14608 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14609 Mul359);
14610 }
14611 }
14612
14613 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14614 // shXadd. First check if this a sum of two power of 2s because that's
14615 // easy. Then count how many zeros are up to the first bit.
14616 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14617 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14618 if (ScaleShift >= 1 && ScaleShift < 4) {
14619 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14620 SDLoc DL(N);
14621 SDValue Shift1 =
14622 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14623 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14624 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14625 }
14626 }
14627
14628 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14629 // This is the two instruction form, there are also three instruction
14630 // variants we could implement. e.g.
14631 // (2^(1,2,3) * 3,5,9 + 1) << C2
14632 // 2^(C1>3) * 3,5,9 +/- 1
14633 for (uint64_t Divisor : {3, 5, 9}) {
14634 uint64_t C = MulAmt - 1;
14635 if (C <= Divisor)
14636 continue;
14637 unsigned TZ = llvm::countr_zero(C);
14638 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14639 SDLoc DL(N);
14640 SDValue Mul359 =
14641 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14642 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14643 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14644 DAG.getConstant(TZ, DL, VT), X);
14645 }
14646 }
14647
14648 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14649 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14650 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14651 if (ScaleShift >= 1 && ScaleShift < 4) {
14652 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14653 SDLoc DL(N);
14654 SDValue Shift1 =
14655 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14656 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14657 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14658 DAG.getConstant(ScaleShift, DL, VT), X));
14659 }
14660 }
14661
14662 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14663 for (uint64_t Offset : {3, 5, 9}) {
14664 if (isPowerOf2_64(MulAmt + Offset)) {
14665 SDLoc DL(N);
14666 SDValue Shift1 =
14667 DAG.getNode(ISD::SHL, DL, VT, X,
14668 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14669 SDValue Mul359 =
14670 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14671 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14672 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14673 }
14674 }
14675 }
14676
14677 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14678 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14679 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14680 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14681 SDLoc DL(N);
14682 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14683 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14684 SDValue Shift2 =
14685 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14686 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14687 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14688 }
14689
14690 if (HasShlAdd) {
14691 for (uint64_t Divisor : {3, 5, 9}) {
14692 if (MulAmt % Divisor != 0)
14693 continue;
14694 uint64_t MulAmt2 = MulAmt / Divisor;
14695 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14696 // of 25 which happen to be quite common.
14697 for (uint64_t Divisor2 : {3, 5, 9}) {
14698 if (MulAmt2 % Divisor2 != 0)
14699 continue;
14700 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14701 if (isPowerOf2_64(MulAmt3)) {
14702 SDLoc DL(N);
14703 SDValue Mul359A =
14704 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14705 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14706 SDValue Mul359B = DAG.getNode(
14707 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14708 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14709 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14710 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14711 }
14712 }
14713 }
14714 }
14715
14716 return SDValue();
14717}
14718
14719// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14720// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14721// Same for other equivalent types with other equivalent constants.
14723 EVT VT = N->getValueType(0);
14724 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14725
14726 // Do this for legal vectors unless they are i1 or i8 vectors.
14727 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14728 return SDValue();
14729
14730 if (N->getOperand(0).getOpcode() != ISD::AND ||
14731 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14732 return SDValue();
14733
14734 SDValue And = N->getOperand(0);
14735 SDValue Srl = And.getOperand(0);
14736
14737 APInt V1, V2, V3;
14738 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14739 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14741 return SDValue();
14742
14743 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14744 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14745 V3 != (HalfSize - 1))
14746 return SDValue();
14747
14748 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14749 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14750 VT.getVectorElementCount() * 2);
14751 SDLoc DL(N);
14752 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14753 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14754 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14755 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14756}
14757
14760 const RISCVSubtarget &Subtarget) {
14761 EVT VT = N->getValueType(0);
14762 if (!VT.isVector())
14763 return expandMul(N, DAG, DCI, Subtarget);
14764
14765 SDLoc DL(N);
14766 SDValue N0 = N->getOperand(0);
14767 SDValue N1 = N->getOperand(1);
14768 SDValue MulOper;
14769 unsigned AddSubOpc;
14770
14771 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14772 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14773 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14774 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14775 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14776 AddSubOpc = V->getOpcode();
14777 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14778 SDValue Opnd = V->getOperand(1);
14779 MulOper = V->getOperand(0);
14780 if (AddSubOpc == ISD::SUB)
14781 std::swap(Opnd, MulOper);
14782 if (isOneOrOneSplat(Opnd))
14783 return true;
14784 }
14785 return false;
14786 };
14787
14788 if (IsAddSubWith1(N0)) {
14789 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14790 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14791 }
14792
14793 if (IsAddSubWith1(N1)) {
14794 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14795 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14796 }
14797
14798 if (SDValue V = combineBinOpOfZExt(N, DAG))
14799 return V;
14800
14802 return V;
14803
14804 return SDValue();
14805}
14806
14807/// According to the property that indexed load/store instructions zero-extend
14808/// their indices, try to narrow the type of index operand.
14809static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14810 if (isIndexTypeSigned(IndexType))
14811 return false;
14812
14813 if (!N->hasOneUse())
14814 return false;
14815
14816 EVT VT = N.getValueType();
14817 SDLoc DL(N);
14818
14819 // In general, what we're doing here is seeing if we can sink a truncate to
14820 // a smaller element type into the expression tree building our index.
14821 // TODO: We can generalize this and handle a bunch more cases if useful.
14822
14823 // Narrow a buildvector to the narrowest element type. This requires less
14824 // work and less register pressure at high LMUL, and creates smaller constants
14825 // which may be cheaper to materialize.
14826 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14827 KnownBits Known = DAG.computeKnownBits(N);
14828 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14829 LLVMContext &C = *DAG.getContext();
14830 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14831 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14832 N = DAG.getNode(ISD::TRUNCATE, DL,
14833 VT.changeVectorElementType(ResultVT), N);
14834 return true;
14835 }
14836 }
14837
14838 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14839 if (N.getOpcode() != ISD::SHL)
14840 return false;
14841
14842 SDValue N0 = N.getOperand(0);
14843 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14845 return false;
14846 if (!N0->hasOneUse())
14847 return false;
14848
14849 APInt ShAmt;
14850 SDValue N1 = N.getOperand(1);
14851 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14852 return false;
14853
14854 SDValue Src = N0.getOperand(0);
14855 EVT SrcVT = Src.getValueType();
14856 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14857 unsigned ShAmtV = ShAmt.getZExtValue();
14858 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14859 NewElen = std::max(NewElen, 8U);
14860
14861 // Skip if NewElen is not narrower than the original extended type.
14862 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14863 return false;
14864
14865 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14866 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14867
14868 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14869 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14870 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14871 return true;
14872}
14873
14874// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14875// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14876// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14877// can become a sext.w instead of a shift pair.
14879 const RISCVSubtarget &Subtarget) {
14880 SDValue N0 = N->getOperand(0);
14881 SDValue N1 = N->getOperand(1);
14882 EVT VT = N->getValueType(0);
14883 EVT OpVT = N0.getValueType();
14884
14885 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14886 return SDValue();
14887
14888 // RHS needs to be a constant.
14889 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14890 if (!N1C)
14891 return SDValue();
14892
14893 // LHS needs to be (and X, 0xffffffff).
14894 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14895 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14896 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14897 return SDValue();
14898
14899 // Looking for an equality compare.
14900 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14901 if (!isIntEqualitySetCC(Cond))
14902 return SDValue();
14903
14904 // Don't do this if the sign bit is provably zero, it will be turned back into
14905 // an AND.
14906 APInt SignMask = APInt::getOneBitSet(64, 31);
14907 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14908 return SDValue();
14909
14910 const APInt &C1 = N1C->getAPIntValue();
14911
14912 SDLoc dl(N);
14913 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14914 // to be equal.
14915 if (C1.getActiveBits() > 32)
14916 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14917
14918 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14919 N0.getOperand(0), DAG.getValueType(MVT::i32));
14920 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14921 dl, OpVT), Cond);
14922}
14923
14924static SDValue
14926 const RISCVSubtarget &Subtarget) {
14927 SDValue Src = N->getOperand(0);
14928 EVT VT = N->getValueType(0);
14929 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14930 unsigned Opc = Src.getOpcode();
14931
14932 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14933 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14934 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14935 Subtarget.hasStdExtZfhmin())
14936 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14937 Src.getOperand(0));
14938
14939 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14940 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14941 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14942 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14943 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14944 Src.getOperand(1));
14945
14946 return SDValue();
14947}
14948
14949namespace {
14950// Forward declaration of the structure holding the necessary information to
14951// apply a combine.
14952struct CombineResult;
14953
14954enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14955/// Helper class for folding sign/zero extensions.
14956/// In particular, this class is used for the following combines:
14957/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14958/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14959/// mul | mul_vl -> vwmul(u) | vwmul_su
14960/// shl | shl_vl -> vwsll
14961/// fadd -> vfwadd | vfwadd_w
14962/// fsub -> vfwsub | vfwsub_w
14963/// fmul -> vfwmul
14964/// An object of this class represents an operand of the operation we want to
14965/// combine.
14966/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14967/// NodeExtensionHelper for `a` and one for `b`.
14968///
14969/// This class abstracts away how the extension is materialized and
14970/// how its number of users affect the combines.
14971///
14972/// In particular:
14973/// - VWADD_W is conceptually == add(op0, sext(op1))
14974/// - VWADDU_W == add(op0, zext(op1))
14975/// - VWSUB_W == sub(op0, sext(op1))
14976/// - VWSUBU_W == sub(op0, zext(op1))
14977/// - VFWADD_W == fadd(op0, fpext(op1))
14978/// - VFWSUB_W == fsub(op0, fpext(op1))
14979/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14980/// zext|sext(smaller_value).
14981struct NodeExtensionHelper {
14982 /// Records if this operand is like being zero extended.
14983 bool SupportsZExt;
14984 /// Records if this operand is like being sign extended.
14985 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14986 /// instance, a splat constant (e.g., 3), would support being both sign and
14987 /// zero extended.
14988 bool SupportsSExt;
14989 /// Records if this operand is like being floating-Point extended.
14990 bool SupportsFPExt;
14991 /// This boolean captures whether we care if this operand would still be
14992 /// around after the folding happens.
14993 bool EnforceOneUse;
14994 /// Original value that this NodeExtensionHelper represents.
14995 SDValue OrigOperand;
14996
14997 /// Get the value feeding the extension or the value itself.
14998 /// E.g., for zext(a), this would return a.
14999 SDValue getSource() const {
15000 switch (OrigOperand.getOpcode()) {
15001 case ISD::ZERO_EXTEND:
15002 case ISD::SIGN_EXTEND:
15003 case RISCVISD::VSEXT_VL:
15004 case RISCVISD::VZEXT_VL:
15006 return OrigOperand.getOperand(0);
15007 default:
15008 return OrigOperand;
15009 }
15010 }
15011
15012 /// Check if this instance represents a splat.
15013 bool isSplat() const {
15014 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15015 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15016 }
15017
15018 /// Get the extended opcode.
15019 unsigned getExtOpc(ExtKind SupportsExt) const {
15020 switch (SupportsExt) {
15021 case ExtKind::SExt:
15022 return RISCVISD::VSEXT_VL;
15023 case ExtKind::ZExt:
15024 return RISCVISD::VZEXT_VL;
15025 case ExtKind::FPExt:
15027 }
15028 llvm_unreachable("Unknown ExtKind enum");
15029 }
15030
15031 /// Get or create a value that can feed \p Root with the given extension \p
15032 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15033 /// operand. \see ::getSource().
15034 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15035 const RISCVSubtarget &Subtarget,
15036 std::optional<ExtKind> SupportsExt) const {
15037 if (!SupportsExt.has_value())
15038 return OrigOperand;
15039
15040 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15041
15042 SDValue Source = getSource();
15043 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15044 if (Source.getValueType() == NarrowVT)
15045 return Source;
15046
15047 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15048 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15049 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15050 Root->getOpcode() == RISCVISD::VFMADD_VL);
15051 return Source;
15052 }
15053
15054 unsigned ExtOpc = getExtOpc(*SupportsExt);
15055
15056 // If we need an extension, we should be changing the type.
15057 SDLoc DL(OrigOperand);
15058 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15059 switch (OrigOperand.getOpcode()) {
15060 case ISD::ZERO_EXTEND:
15061 case ISD::SIGN_EXTEND:
15062 case RISCVISD::VSEXT_VL:
15063 case RISCVISD::VZEXT_VL:
15065 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15066 case ISD::SPLAT_VECTOR:
15067 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15069 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15070 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15072 Source = Source.getOperand(1);
15073 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15074 Source = Source.getOperand(0);
15075 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15076 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15077 DAG.getUNDEF(NarrowVT), Source, VL);
15078 default:
15079 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15080 // and that operand should already have the right NarrowVT so no
15081 // extension should be required at this point.
15082 llvm_unreachable("Unsupported opcode");
15083 }
15084 }
15085
15086 /// Helper function to get the narrow type for \p Root.
15087 /// The narrow type is the type of \p Root where we divided the size of each
15088 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15089 /// \pre Both the narrow type and the original type should be legal.
15090 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15091 MVT VT = Root->getSimpleValueType(0);
15092
15093 // Determine the narrow size.
15094 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15095
15096 MVT EltVT = SupportsExt == ExtKind::FPExt
15097 ? MVT::getFloatingPointVT(NarrowSize)
15098 : MVT::getIntegerVT(NarrowSize);
15099
15100 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15101 "Trying to extend something we can't represent");
15102 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15103 return NarrowVT;
15104 }
15105
15106 /// Get the opcode to materialize:
15107 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15108 static unsigned getSExtOpcode(unsigned Opcode) {
15109 switch (Opcode) {
15110 case ISD::ADD:
15111 case RISCVISD::ADD_VL:
15114 case ISD::OR:
15115 return RISCVISD::VWADD_VL;
15116 case ISD::SUB:
15117 case RISCVISD::SUB_VL:
15120 return RISCVISD::VWSUB_VL;
15121 case ISD::MUL:
15122 case RISCVISD::MUL_VL:
15123 return RISCVISD::VWMUL_VL;
15124 default:
15125 llvm_unreachable("Unexpected opcode");
15126 }
15127 }
15128
15129 /// Get the opcode to materialize:
15130 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15131 static unsigned getZExtOpcode(unsigned Opcode) {
15132 switch (Opcode) {
15133 case ISD::ADD:
15134 case RISCVISD::ADD_VL:
15137 case ISD::OR:
15138 return RISCVISD::VWADDU_VL;
15139 case ISD::SUB:
15140 case RISCVISD::SUB_VL:
15143 return RISCVISD::VWSUBU_VL;
15144 case ISD::MUL:
15145 case RISCVISD::MUL_VL:
15146 return RISCVISD::VWMULU_VL;
15147 case ISD::SHL:
15148 case RISCVISD::SHL_VL:
15149 return RISCVISD::VWSLL_VL;
15150 default:
15151 llvm_unreachable("Unexpected opcode");
15152 }
15153 }
15154
15155 /// Get the opcode to materialize:
15156 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15157 static unsigned getFPExtOpcode(unsigned Opcode) {
15158 switch (Opcode) {
15159 case RISCVISD::FADD_VL:
15161 return RISCVISD::VFWADD_VL;
15162 case RISCVISD::FSUB_VL:
15164 return RISCVISD::VFWSUB_VL;
15165 case RISCVISD::FMUL_VL:
15166 return RISCVISD::VFWMUL_VL;
15168 return RISCVISD::VFWMADD_VL;
15170 return RISCVISD::VFWMSUB_VL;
15172 return RISCVISD::VFWNMADD_VL;
15174 return RISCVISD::VFWNMSUB_VL;
15175 default:
15176 llvm_unreachable("Unexpected opcode");
15177 }
15178 }
15179
15180 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15181 /// newOpcode(a, b).
15182 static unsigned getSUOpcode(unsigned Opcode) {
15183 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15184 "SU is only supported for MUL");
15185 return RISCVISD::VWMULSU_VL;
15186 }
15187
15188 /// Get the opcode to materialize
15189 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15190 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15191 switch (Opcode) {
15192 case ISD::ADD:
15193 case RISCVISD::ADD_VL:
15194 case ISD::OR:
15195 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15197 case ISD::SUB:
15198 case RISCVISD::SUB_VL:
15199 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15201 case RISCVISD::FADD_VL:
15202 return RISCVISD::VFWADD_W_VL;
15203 case RISCVISD::FSUB_VL:
15204 return RISCVISD::VFWSUB_W_VL;
15205 default:
15206 llvm_unreachable("Unexpected opcode");
15207 }
15208 }
15209
15210 using CombineToTry = std::function<std::optional<CombineResult>(
15211 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15212 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15213 const RISCVSubtarget &)>;
15214
15215 /// Check if this node needs to be fully folded or extended for all users.
15216 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15217
15218 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15219 const RISCVSubtarget &Subtarget) {
15220 unsigned Opc = OrigOperand.getOpcode();
15221 MVT VT = OrigOperand.getSimpleValueType();
15222
15223 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15224 "Unexpected Opcode");
15225
15226 // The pasthru must be undef for tail agnostic.
15227 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15228 return;
15229
15230 // Get the scalar value.
15231 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15232 : OrigOperand.getOperand(1);
15233
15234 // See if we have enough sign bits or zero bits in the scalar to use a
15235 // widening opcode by splatting to smaller element size.
15236 unsigned EltBits = VT.getScalarSizeInBits();
15237 unsigned ScalarBits = Op.getValueSizeInBits();
15238 // If we're not getting all bits from the element, we need special handling.
15239 if (ScalarBits < EltBits) {
15240 // This should only occur on RV32.
15241 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15242 !Subtarget.is64Bit() && "Unexpected splat");
15243 // vmv.v.x sign extends narrow inputs.
15244 SupportsSExt = true;
15245
15246 // If the input is positive, then sign extend is also zero extend.
15247 if (DAG.SignBitIsZero(Op))
15248 SupportsZExt = true;
15249
15250 EnforceOneUse = false;
15251 return;
15252 }
15253
15254 unsigned NarrowSize = EltBits / 2;
15255 // If the narrow type cannot be expressed with a legal VMV,
15256 // this is not a valid candidate.
15257 if (NarrowSize < 8)
15258 return;
15259
15260 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15261 SupportsSExt = true;
15262
15263 if (DAG.MaskedValueIsZero(Op,
15264 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15265 SupportsZExt = true;
15266
15267 EnforceOneUse = false;
15268 }
15269
15270 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15271 const RISCVSubtarget &Subtarget) {
15272 // Any f16 extension will neeed zvfh
15273 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15274 return false;
15275 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15276 // zvfbfwma
15277 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15278 Root->getOpcode() != RISCVISD::VFMADD_VL))
15279 return false;
15280 return true;
15281 }
15282
15283 /// Helper method to set the various fields of this struct based on the
15284 /// type of \p Root.
15285 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15286 const RISCVSubtarget &Subtarget) {
15287 SupportsZExt = false;
15288 SupportsSExt = false;
15289 SupportsFPExt = false;
15290 EnforceOneUse = true;
15291 unsigned Opc = OrigOperand.getOpcode();
15292 // For the nodes we handle below, we end up using their inputs directly: see
15293 // getSource(). However since they either don't have a passthru or we check
15294 // that their passthru is undef, we can safely ignore their mask and VL.
15295 switch (Opc) {
15296 case ISD::ZERO_EXTEND:
15297 case ISD::SIGN_EXTEND: {
15298 MVT VT = OrigOperand.getSimpleValueType();
15299 if (!VT.isVector())
15300 break;
15301
15302 SDValue NarrowElt = OrigOperand.getOperand(0);
15303 MVT NarrowVT = NarrowElt.getSimpleValueType();
15304 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15305 if (NarrowVT.getVectorElementType() == MVT::i1)
15306 break;
15307
15308 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15309 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15310 break;
15311 }
15312 case RISCVISD::VZEXT_VL:
15313 SupportsZExt = true;
15314 break;
15315 case RISCVISD::VSEXT_VL:
15316 SupportsSExt = true;
15317 break;
15319 MVT NarrowEltVT =
15321 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15322 break;
15323 SupportsFPExt = true;
15324 break;
15325 }
15326 case ISD::SPLAT_VECTOR:
15328 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15329 break;
15330 case RISCVISD::VFMV_V_F_VL: {
15331 MVT VT = OrigOperand.getSimpleValueType();
15332
15333 if (!OrigOperand.getOperand(0).isUndef())
15334 break;
15335
15336 SDValue Op = OrigOperand.getOperand(1);
15337 if (Op.getOpcode() != ISD::FP_EXTEND)
15338 break;
15339
15340 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15341 Subtarget))
15342 break;
15343
15344 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15345 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15346 if (NarrowSize != ScalarBits)
15347 break;
15348
15349 SupportsFPExt = true;
15350 break;
15351 }
15352 default:
15353 break;
15354 }
15355 }
15356
15357 /// Check if \p Root supports any extension folding combines.
15358 static bool isSupportedRoot(const SDNode *Root,
15359 const RISCVSubtarget &Subtarget) {
15360 switch (Root->getOpcode()) {
15361 case ISD::ADD:
15362 case ISD::SUB:
15363 case ISD::MUL: {
15364 return Root->getValueType(0).isScalableVector();
15365 }
15366 case ISD::OR: {
15367 return Root->getValueType(0).isScalableVector() &&
15368 Root->getFlags().hasDisjoint();
15369 }
15370 // Vector Widening Integer Add/Sub/Mul Instructions
15371 case RISCVISD::ADD_VL:
15372 case RISCVISD::MUL_VL:
15375 case RISCVISD::SUB_VL:
15378 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15379 case RISCVISD::FADD_VL:
15380 case RISCVISD::FSUB_VL:
15381 case RISCVISD::FMUL_VL:
15384 return true;
15385 case ISD::SHL:
15386 return Root->getValueType(0).isScalableVector() &&
15387 Subtarget.hasStdExtZvbb();
15388 case RISCVISD::SHL_VL:
15389 return Subtarget.hasStdExtZvbb();
15394 return true;
15395 default:
15396 return false;
15397 }
15398 }
15399
15400 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15401 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15402 const RISCVSubtarget &Subtarget) {
15403 assert(isSupportedRoot(Root, Subtarget) &&
15404 "Trying to build an helper with an "
15405 "unsupported root");
15406 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15408 OrigOperand = Root->getOperand(OperandIdx);
15409
15410 unsigned Opc = Root->getOpcode();
15411 switch (Opc) {
15412 // We consider
15413 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15414 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15415 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15422 if (OperandIdx == 1) {
15423 SupportsZExt =
15425 SupportsSExt =
15427 SupportsFPExt =
15429 // There's no existing extension here, so we don't have to worry about
15430 // making sure it gets removed.
15431 EnforceOneUse = false;
15432 break;
15433 }
15434 [[fallthrough]];
15435 default:
15436 fillUpExtensionSupport(Root, DAG, Subtarget);
15437 break;
15438 }
15439 }
15440
15441 /// Helper function to get the Mask and VL from \p Root.
15442 static std::pair<SDValue, SDValue>
15443 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15444 const RISCVSubtarget &Subtarget) {
15445 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15446 switch (Root->getOpcode()) {
15447 case ISD::ADD:
15448 case ISD::SUB:
15449 case ISD::MUL:
15450 case ISD::OR:
15451 case ISD::SHL: {
15452 SDLoc DL(Root);
15453 MVT VT = Root->getSimpleValueType(0);
15454 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15455 }
15456 default:
15457 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15458 }
15459 }
15460
15461 /// Helper function to check if \p N is commutative with respect to the
15462 /// foldings that are supported by this class.
15463 static bool isCommutative(const SDNode *N) {
15464 switch (N->getOpcode()) {
15465 case ISD::ADD:
15466 case ISD::MUL:
15467 case ISD::OR:
15468 case RISCVISD::ADD_VL:
15469 case RISCVISD::MUL_VL:
15472 case RISCVISD::FADD_VL:
15473 case RISCVISD::FMUL_VL:
15479 return true;
15480 case ISD::SUB:
15481 case RISCVISD::SUB_VL:
15484 case RISCVISD::FSUB_VL:
15486 case ISD::SHL:
15487 case RISCVISD::SHL_VL:
15488 return false;
15489 default:
15490 llvm_unreachable("Unexpected opcode");
15491 }
15492 }
15493
15494 /// Get a list of combine to try for folding extensions in \p Root.
15495 /// Note that each returned CombineToTry function doesn't actually modify
15496 /// anything. Instead they produce an optional CombineResult that if not None,
15497 /// need to be materialized for the combine to be applied.
15498 /// \see CombineResult::materialize.
15499 /// If the related CombineToTry function returns std::nullopt, that means the
15500 /// combine didn't match.
15501 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15502};
15503
15504/// Helper structure that holds all the necessary information to materialize a
15505/// combine that does some extension folding.
15506struct CombineResult {
15507 /// Opcode to be generated when materializing the combine.
15508 unsigned TargetOpcode;
15509 // No value means no extension is needed.
15510 std::optional<ExtKind> LHSExt;
15511 std::optional<ExtKind> RHSExt;
15512 /// Root of the combine.
15513 SDNode *Root;
15514 /// LHS of the TargetOpcode.
15515 NodeExtensionHelper LHS;
15516 /// RHS of the TargetOpcode.
15517 NodeExtensionHelper RHS;
15518
15519 CombineResult(unsigned TargetOpcode, SDNode *Root,
15520 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15521 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15522 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15523 LHS(LHS), RHS(RHS) {}
15524
15525 /// Return a value that uses TargetOpcode and that can be used to replace
15526 /// Root.
15527 /// The actual replacement is *not* done in that method.
15528 SDValue materialize(SelectionDAG &DAG,
15529 const RISCVSubtarget &Subtarget) const {
15530 SDValue Mask, VL, Passthru;
15531 std::tie(Mask, VL) =
15532 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15533 switch (Root->getOpcode()) {
15534 default:
15535 Passthru = Root->getOperand(2);
15536 break;
15537 case ISD::ADD:
15538 case ISD::SUB:
15539 case ISD::MUL:
15540 case ISD::OR:
15541 case ISD::SHL:
15542 Passthru = DAG.getUNDEF(Root->getValueType(0));
15543 break;
15544 }
15545 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15546 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15547 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15548 Passthru, Mask, VL);
15549 }
15550};
15551
15552/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15553/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15554/// are zext) and LHS and RHS can be folded into Root.
15555/// AllowExtMask define which form `ext` can take in this pattern.
15556///
15557/// \note If the pattern can match with both zext and sext, the returned
15558/// CombineResult will feature the zext result.
15559///
15560/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15561/// can be used to apply the pattern.
15562static std::optional<CombineResult>
15563canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15564 const NodeExtensionHelper &RHS,
15565 uint8_t AllowExtMask, SelectionDAG &DAG,
15566 const RISCVSubtarget &Subtarget) {
15567 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15568 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15569 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15570 /*RHSExt=*/{ExtKind::ZExt});
15571 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15572 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15573 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15574 /*RHSExt=*/{ExtKind::SExt});
15575 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15576 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15577 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15578 /*RHSExt=*/{ExtKind::FPExt});
15579 return std::nullopt;
15580}
15581
15582/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15583/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15584/// are zext) and LHS and RHS can be folded into Root.
15585///
15586/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15587/// can be used to apply the pattern.
15588static std::optional<CombineResult>
15589canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15590 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15591 const RISCVSubtarget &Subtarget) {
15592 return canFoldToVWWithSameExtensionImpl(
15593 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15594 Subtarget);
15595}
15596
15597/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15598///
15599/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15600/// can be used to apply the pattern.
15601static std::optional<CombineResult>
15602canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15603 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15604 const RISCVSubtarget &Subtarget) {
15605 if (RHS.SupportsFPExt)
15606 return CombineResult(
15607 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15608 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15609
15610 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15611 // sext/zext?
15612 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15613 // purposes.
15614 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15615 return CombineResult(
15616 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15617 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15618 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15619 return CombineResult(
15620 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15621 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15622 return std::nullopt;
15623}
15624
15625/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15626///
15627/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15628/// can be used to apply the pattern.
15629static std::optional<CombineResult>
15630canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15631 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15632 const RISCVSubtarget &Subtarget) {
15633 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15634 Subtarget);
15635}
15636
15637/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15638///
15639/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15640/// can be used to apply the pattern.
15641static std::optional<CombineResult>
15642canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15643 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15644 const RISCVSubtarget &Subtarget) {
15645 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15646 Subtarget);
15647}
15648
15649/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15650///
15651/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15652/// can be used to apply the pattern.
15653static std::optional<CombineResult>
15654canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15655 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15656 const RISCVSubtarget &Subtarget) {
15657 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15658 Subtarget);
15659}
15660
15661/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15662///
15663/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15664/// can be used to apply the pattern.
15665static std::optional<CombineResult>
15666canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15667 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15668 const RISCVSubtarget &Subtarget) {
15669
15670 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15671 return std::nullopt;
15672 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15673 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15674 /*RHSExt=*/{ExtKind::ZExt});
15675}
15676
15678NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15679 SmallVector<CombineToTry> Strategies;
15680 switch (Root->getOpcode()) {
15681 case ISD::ADD:
15682 case ISD::SUB:
15683 case ISD::OR:
15684 case RISCVISD::ADD_VL:
15685 case RISCVISD::SUB_VL:
15686 case RISCVISD::FADD_VL:
15687 case RISCVISD::FSUB_VL:
15688 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15689 Strategies.push_back(canFoldToVWWithSameExtension);
15690 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15691 Strategies.push_back(canFoldToVW_W);
15692 break;
15693 case RISCVISD::FMUL_VL:
15698 Strategies.push_back(canFoldToVWWithSameExtension);
15699 break;
15700 case ISD::MUL:
15701 case RISCVISD::MUL_VL:
15702 // mul -> vwmul(u)
15703 Strategies.push_back(canFoldToVWWithSameExtension);
15704 // mul -> vwmulsu
15705 Strategies.push_back(canFoldToVW_SU);
15706 break;
15707 case ISD::SHL:
15708 case RISCVISD::SHL_VL:
15709 // shl -> vwsll
15710 Strategies.push_back(canFoldToVWWithZEXT);
15711 break;
15714 // vwadd_w|vwsub_w -> vwadd|vwsub
15715 Strategies.push_back(canFoldToVWWithSEXT);
15716 break;
15719 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15720 Strategies.push_back(canFoldToVWWithZEXT);
15721 break;
15724 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15725 Strategies.push_back(canFoldToVWWithFPEXT);
15726 break;
15727 default:
15728 llvm_unreachable("Unexpected opcode");
15729 }
15730 return Strategies;
15731}
15732} // End anonymous namespace.
15733
15734/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15735/// The supported combines are:
15736/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15737/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15738/// mul | mul_vl -> vwmul(u) | vwmul_su
15739/// shl | shl_vl -> vwsll
15740/// fadd_vl -> vfwadd | vfwadd_w
15741/// fsub_vl -> vfwsub | vfwsub_w
15742/// fmul_vl -> vfwmul
15743/// vwadd_w(u) -> vwadd(u)
15744/// vwsub_w(u) -> vwsub(u)
15745/// vfwadd_w -> vfwadd
15746/// vfwsub_w -> vfwsub
15749 const RISCVSubtarget &Subtarget) {
15750 SelectionDAG &DAG = DCI.DAG;
15751 if (DCI.isBeforeLegalize())
15752 return SDValue();
15753
15754 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15755 return SDValue();
15756
15757 SmallVector<SDNode *> Worklist;
15758 SmallSet<SDNode *, 8> Inserted;
15759 Worklist.push_back(N);
15760 Inserted.insert(N);
15761 SmallVector<CombineResult> CombinesToApply;
15762
15763 while (!Worklist.empty()) {
15764 SDNode *Root = Worklist.pop_back_val();
15765
15766 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15767 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15768 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15769 &Inserted](const NodeExtensionHelper &Op) {
15770 if (Op.needToPromoteOtherUsers()) {
15771 for (SDUse &Use : Op.OrigOperand->uses()) {
15772 SDNode *TheUser = Use.getUser();
15773 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15774 return false;
15775 // We only support the first 2 operands of FMA.
15776 if (Use.getOperandNo() >= 2)
15777 return false;
15778 if (Inserted.insert(TheUser).second)
15779 Worklist.push_back(TheUser);
15780 }
15781 }
15782 return true;
15783 };
15784
15785 // Control the compile time by limiting the number of node we look at in
15786 // total.
15787 if (Inserted.size() > ExtensionMaxWebSize)
15788 return SDValue();
15789
15791 NodeExtensionHelper::getSupportedFoldings(Root);
15792
15793 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15794 bool Matched = false;
15795 for (int Attempt = 0;
15796 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15797 ++Attempt) {
15798
15799 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15800 FoldingStrategies) {
15801 std::optional<CombineResult> Res =
15802 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15803 if (Res) {
15804 Matched = true;
15805 CombinesToApply.push_back(*Res);
15806 // All the inputs that are extended need to be folded, otherwise
15807 // we would be leaving the old input (since it is may still be used),
15808 // and the new one.
15809 if (Res->LHSExt.has_value())
15810 if (!AppendUsersIfNeeded(LHS))
15811 return SDValue();
15812 if (Res->RHSExt.has_value())
15813 if (!AppendUsersIfNeeded(RHS))
15814 return SDValue();
15815 break;
15816 }
15817 }
15818 std::swap(LHS, RHS);
15819 }
15820 // Right now we do an all or nothing approach.
15821 if (!Matched)
15822 return SDValue();
15823 }
15824 // Store the value for the replacement of the input node separately.
15825 SDValue InputRootReplacement;
15826 // We do the RAUW after we materialize all the combines, because some replaced
15827 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15828 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15829 // yet-to-be-visited CombinesToApply roots.
15831 ValuesToReplace.reserve(CombinesToApply.size());
15832 for (CombineResult Res : CombinesToApply) {
15833 SDValue NewValue = Res.materialize(DAG, Subtarget);
15834 if (!InputRootReplacement) {
15835 assert(Res.Root == N &&
15836 "First element is expected to be the current node");
15837 InputRootReplacement = NewValue;
15838 } else {
15839 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15840 }
15841 }
15842 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15843 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15844 DCI.AddToWorklist(OldNewValues.second.getNode());
15845 }
15846 return InputRootReplacement;
15847}
15848
15849// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15850// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15851// y will be the Passthru and cond will be the Mask.
15853 unsigned Opc = N->getOpcode();
15856
15857 SDValue Y = N->getOperand(0);
15858 SDValue MergeOp = N->getOperand(1);
15859 unsigned MergeOpc = MergeOp.getOpcode();
15860
15861 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15862 return SDValue();
15863
15864 SDValue X = MergeOp->getOperand(1);
15865
15866 if (!MergeOp.hasOneUse())
15867 return SDValue();
15868
15869 // Passthru should be undef
15870 SDValue Passthru = N->getOperand(2);
15871 if (!Passthru.isUndef())
15872 return SDValue();
15873
15874 // Mask should be all ones
15875 SDValue Mask = N->getOperand(3);
15876 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15877 return SDValue();
15878
15879 // False value of MergeOp should be all zeros
15880 SDValue Z = MergeOp->getOperand(2);
15881
15882 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15883 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15884 Z = Z.getOperand(1);
15885
15886 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15887 return SDValue();
15888
15889 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15890 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15891 N->getFlags());
15892}
15893
15896 const RISCVSubtarget &Subtarget) {
15897 [[maybe_unused]] unsigned Opc = N->getOpcode();
15900
15901 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15902 return V;
15903
15904 return combineVWADDSUBWSelect(N, DCI.DAG);
15905}
15906
15907// Helper function for performMemPairCombine.
15908// Try to combine the memory loads/stores LSNode1 and LSNode2
15909// into a single memory pair operation.
15911 LSBaseSDNode *LSNode2, SDValue BasePtr,
15912 uint64_t Imm) {
15914 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15915
15916 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15917 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15918 return SDValue();
15919
15921 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15922
15923 // The new operation has twice the width.
15924 MVT XLenVT = Subtarget.getXLenVT();
15925 EVT MemVT = LSNode1->getMemoryVT();
15926 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15927 MachineMemOperand *MMO = LSNode1->getMemOperand();
15929 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15930
15931 if (LSNode1->getOpcode() == ISD::LOAD) {
15932 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15933 unsigned Opcode;
15934 if (MemVT == MVT::i32)
15935 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15936 else
15937 Opcode = RISCVISD::TH_LDD;
15938
15939 SDValue Res = DAG.getMemIntrinsicNode(
15940 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15941 {LSNode1->getChain(), BasePtr,
15942 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15943 NewMemVT, NewMMO);
15944
15945 SDValue Node1 =
15946 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15947 SDValue Node2 =
15948 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15949
15950 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15951 return Node1;
15952 } else {
15953 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15954
15955 SDValue Res = DAG.getMemIntrinsicNode(
15956 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15957 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15958 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15959 NewMemVT, NewMMO);
15960
15961 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15962 return Res;
15963 }
15964}
15965
15966// Try to combine two adjacent loads/stores to a single pair instruction from
15967// the XTHeadMemPair vendor extension.
15970 SelectionDAG &DAG = DCI.DAG;
15972 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15973
15974 // Target does not support load/store pair.
15975 if (!Subtarget.hasVendorXTHeadMemPair())
15976 return SDValue();
15977
15978 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15979 EVT MemVT = LSNode1->getMemoryVT();
15980 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15981
15982 // No volatile, indexed or atomic loads/stores.
15983 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15984 return SDValue();
15985
15986 // Function to get a base + constant representation from a memory value.
15987 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15988 if (Ptr->getOpcode() == ISD::ADD)
15989 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15990 return {Ptr->getOperand(0), C1->getZExtValue()};
15991 return {Ptr, 0};
15992 };
15993
15994 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15995
15996 SDValue Chain = N->getOperand(0);
15997 for (SDUse &Use : Chain->uses()) {
15998 if (Use.getUser() != N && Use.getResNo() == 0 &&
15999 Use.getUser()->getOpcode() == N->getOpcode()) {
16000 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16001
16002 // No volatile, indexed or atomic loads/stores.
16003 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16004 continue;
16005
16006 // Check if LSNode1 and LSNode2 have the same type and extension.
16007 if (LSNode1->getOpcode() == ISD::LOAD)
16008 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16009 cast<LoadSDNode>(LSNode1)->getExtensionType())
16010 continue;
16011
16012 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16013 continue;
16014
16015 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16016
16017 // Check if the base pointer is the same for both instruction.
16018 if (Base1 != Base2)
16019 continue;
16020
16021 // Check if the offsets match the XTHeadMemPair encoding contraints.
16022 bool Valid = false;
16023 if (MemVT == MVT::i32) {
16024 // Check for adjacent i32 values and a 2-bit index.
16025 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16026 Valid = true;
16027 } else if (MemVT == MVT::i64) {
16028 // Check for adjacent i64 values and a 2-bit index.
16029 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16030 Valid = true;
16031 }
16032
16033 if (!Valid)
16034 continue;
16035
16036 // Try to combine.
16037 if (SDValue Res =
16038 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16039 return Res;
16040 }
16041 }
16042
16043 return SDValue();
16044}
16045
16046// Fold
16047// (fp_to_int (froundeven X)) -> fcvt X, rne
16048// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16049// (fp_to_int (ffloor X)) -> fcvt X, rdn
16050// (fp_to_int (fceil X)) -> fcvt X, rup
16051// (fp_to_int (fround X)) -> fcvt X, rmm
16052// (fp_to_int (frint X)) -> fcvt X
16055 const RISCVSubtarget &Subtarget) {
16056 SelectionDAG &DAG = DCI.DAG;
16057 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16058 MVT XLenVT = Subtarget.getXLenVT();
16059
16060 SDValue Src = N->getOperand(0);
16061
16062 // Don't do this for strict-fp Src.
16063 if (Src->isStrictFPOpcode())
16064 return SDValue();
16065
16066 // Ensure the FP type is legal.
16067 if (!TLI.isTypeLegal(Src.getValueType()))
16068 return SDValue();
16069
16070 // Don't do this for f16 with Zfhmin and not Zfh.
16071 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16072 return SDValue();
16073
16074 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16075 // If the result is invalid, we didn't find a foldable instruction.
16076 if (FRM == RISCVFPRndMode::Invalid)
16077 return SDValue();
16078
16079 SDLoc DL(N);
16080 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16081 EVT VT = N->getValueType(0);
16082
16083 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16084 MVT SrcVT = Src.getSimpleValueType();
16085 MVT SrcContainerVT = SrcVT;
16086 MVT ContainerVT = VT.getSimpleVT();
16087 SDValue XVal = Src.getOperand(0);
16088
16089 // For widening and narrowing conversions we just combine it into a
16090 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16091 // end up getting lowered to their appropriate pseudo instructions based on
16092 // their operand types
16093 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16094 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16095 return SDValue();
16096
16097 // Make fixed-length vectors scalable first
16098 if (SrcVT.isFixedLengthVector()) {
16099 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16100 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16101 ContainerVT =
16102 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16103 }
16104
16105 auto [Mask, VL] =
16106 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16107
16108 SDValue FpToInt;
16109 if (FRM == RISCVFPRndMode::RTZ) {
16110 // Use the dedicated trunc static rounding mode if we're truncating so we
16111 // don't need to generate calls to fsrmi/fsrm
16112 unsigned Opc =
16114 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16115 } else {
16116 unsigned Opc =
16118 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16119 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16120 }
16121
16122 // If converted from fixed-length to scalable, convert back
16123 if (VT.isFixedLengthVector())
16124 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16125
16126 return FpToInt;
16127 }
16128
16129 // Only handle XLen or i32 types. Other types narrower than XLen will
16130 // eventually be legalized to XLenVT.
16131 if (VT != MVT::i32 && VT != XLenVT)
16132 return SDValue();
16133
16134 unsigned Opc;
16135 if (VT == XLenVT)
16136 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16137 else
16139
16140 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16141 DAG.getTargetConstant(FRM, DL, XLenVT));
16142 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16143}
16144
16145// Fold
16146// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16147// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16148// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16149// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16150// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16151// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16154 const RISCVSubtarget &Subtarget) {
16155 SelectionDAG &DAG = DCI.DAG;
16156 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16157 MVT XLenVT = Subtarget.getXLenVT();
16158
16159 // Only handle XLen types. Other types narrower than XLen will eventually be
16160 // legalized to XLenVT.
16161 EVT DstVT = N->getValueType(0);
16162 if (DstVT != XLenVT)
16163 return SDValue();
16164
16165 SDValue Src = N->getOperand(0);
16166
16167 // Don't do this for strict-fp Src.
16168 if (Src->isStrictFPOpcode())
16169 return SDValue();
16170
16171 // Ensure the FP type is also legal.
16172 if (!TLI.isTypeLegal(Src.getValueType()))
16173 return SDValue();
16174
16175 // Don't do this for f16 with Zfhmin and not Zfh.
16176 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16177 return SDValue();
16178
16179 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16180
16181 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16182 if (FRM == RISCVFPRndMode::Invalid)
16183 return SDValue();
16184
16185 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16186
16187 unsigned Opc;
16188 if (SatVT == DstVT)
16189 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16190 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16192 else
16193 return SDValue();
16194 // FIXME: Support other SatVTs by clamping before or after the conversion.
16195
16196 Src = Src.getOperand(0);
16197
16198 SDLoc DL(N);
16199 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16200 DAG.getTargetConstant(FRM, DL, XLenVT));
16201
16202 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16203 // extend.
16204 if (Opc == RISCVISD::FCVT_WU_RV64)
16205 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16206
16207 // RISC-V FP-to-int conversions saturate to the destination register size, but
16208 // don't produce 0 for nan.
16209 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16210 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16211}
16212
16213// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16214// smaller than XLenVT.
16216 const RISCVSubtarget &Subtarget) {
16217 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16218
16219 SDValue Src = N->getOperand(0);
16220 if (Src.getOpcode() != ISD::BSWAP)
16221 return SDValue();
16222
16223 EVT VT = N->getValueType(0);
16224 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16225 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16226 return SDValue();
16227
16228 SDLoc DL(N);
16229 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16230}
16231
16232// Convert from one FMA opcode to another based on whether we are negating the
16233// multiply result and/or the accumulator.
16234// NOTE: Only supports RVV operations with VL.
16235static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16236 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16237 if (NegMul) {
16238 // clang-format off
16239 switch (Opcode) {
16240 default: llvm_unreachable("Unexpected opcode");
16241 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16242 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16243 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16244 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16249 }
16250 // clang-format on
16251 }
16252
16253 // Negating the accumulator changes ADD<->SUB.
16254 if (NegAcc) {
16255 // clang-format off
16256 switch (Opcode) {
16257 default: llvm_unreachable("Unexpected opcode");
16258 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16259 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16260 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16261 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16266 }
16267 // clang-format on
16268 }
16269
16270 return Opcode;
16271}
16272
16274 // Fold FNEG_VL into FMA opcodes.
16275 // The first operand of strict-fp is chain.
16276 bool IsStrict =
16277 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16278 unsigned Offset = IsStrict ? 1 : 0;
16279 SDValue A = N->getOperand(0 + Offset);
16280 SDValue B = N->getOperand(1 + Offset);
16281 SDValue C = N->getOperand(2 + Offset);
16282 SDValue Mask = N->getOperand(3 + Offset);
16283 SDValue VL = N->getOperand(4 + Offset);
16284
16285 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16286 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16287 V.getOperand(2) == VL) {
16288 // Return the negated input.
16289 V = V.getOperand(0);
16290 return true;
16291 }
16292
16293 return false;
16294 };
16295
16296 bool NegA = invertIfNegative(A);
16297 bool NegB = invertIfNegative(B);
16298 bool NegC = invertIfNegative(C);
16299
16300 // If no operands are negated, we're done.
16301 if (!NegA && !NegB && !NegC)
16302 return SDValue();
16303
16304 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16305 if (IsStrict)
16306 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16307 {N->getOperand(0), A, B, C, Mask, VL});
16308 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16309 VL);
16310}
16311
16314 const RISCVSubtarget &Subtarget) {
16315 SelectionDAG &DAG = DCI.DAG;
16316
16318 return V;
16319
16320 // FIXME: Ignore strict opcodes for now.
16321 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16322 return SDValue();
16323
16324 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16325}
16326
16328 const RISCVSubtarget &Subtarget) {
16329 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16330
16331 EVT VT = N->getValueType(0);
16332
16333 if (VT != Subtarget.getXLenVT())
16334 return SDValue();
16335
16336 if (!isa<ConstantSDNode>(N->getOperand(1)))
16337 return SDValue();
16338 uint64_t ShAmt = N->getConstantOperandVal(1);
16339
16340 SDValue N0 = N->getOperand(0);
16341
16342 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16343 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16344 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16345 unsigned ExtSize =
16346 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16347 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16348 N0.getOperand(0).hasOneUse() &&
16349 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16350 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16351 if (LShAmt < ExtSize) {
16352 unsigned Size = VT.getSizeInBits();
16353 SDLoc ShlDL(N0.getOperand(0));
16354 SDValue Shl =
16355 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16356 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16357 SDLoc DL(N);
16358 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16359 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16360 }
16361 }
16362 }
16363
16364 if (ShAmt > 32 || VT != MVT::i64)
16365 return SDValue();
16366
16367 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16368 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16369 //
16370 // Also try these folds where an add or sub is in the middle.
16371 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16372 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16373 SDValue Shl;
16374 ConstantSDNode *AddC = nullptr;
16375
16376 // We might have an ADD or SUB between the SRA and SHL.
16377 bool IsAdd = N0.getOpcode() == ISD::ADD;
16378 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16379 // Other operand needs to be a constant we can modify.
16380 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16381 if (!AddC)
16382 return SDValue();
16383
16384 // AddC needs to have at least 32 trailing zeros.
16385 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16386 return SDValue();
16387
16388 // All users should be a shift by constant less than or equal to 32. This
16389 // ensures we'll do this optimization for each of them to produce an
16390 // add/sub+sext_inreg they can all share.
16391 for (SDNode *U : N0->users()) {
16392 if (U->getOpcode() != ISD::SRA ||
16393 !isa<ConstantSDNode>(U->getOperand(1)) ||
16394 U->getConstantOperandVal(1) > 32)
16395 return SDValue();
16396 }
16397
16398 Shl = N0.getOperand(IsAdd ? 0 : 1);
16399 } else {
16400 // Not an ADD or SUB.
16401 Shl = N0;
16402 }
16403
16404 // Look for a shift left by 32.
16405 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16406 Shl.getConstantOperandVal(1) != 32)
16407 return SDValue();
16408
16409 // We if we didn't look through an add/sub, then the shl should have one use.
16410 // If we did look through an add/sub, the sext_inreg we create is free so
16411 // we're only creating 2 new instructions. It's enough to only remove the
16412 // original sra+add/sub.
16413 if (!AddC && !Shl.hasOneUse())
16414 return SDValue();
16415
16416 SDLoc DL(N);
16417 SDValue In = Shl.getOperand(0);
16418
16419 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16420 // constant.
16421 if (AddC) {
16422 SDValue ShiftedAddC =
16423 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16424 if (IsAdd)
16425 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16426 else
16427 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16428 }
16429
16430 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16431 DAG.getValueType(MVT::i32));
16432 if (ShAmt == 32)
16433 return SExt;
16434
16435 return DAG.getNode(
16436 ISD::SHL, DL, MVT::i64, SExt,
16437 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16438}
16439
16440// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16441// the result is used as the conditon of a br_cc or select_cc we can invert,
16442// inverting the setcc is free, and Z is 0/1. Caller will invert the
16443// br_cc/select_cc.
16445 bool IsAnd = Cond.getOpcode() == ISD::AND;
16446 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16447 return SDValue();
16448
16449 if (!Cond.hasOneUse())
16450 return SDValue();
16451
16452 SDValue Setcc = Cond.getOperand(0);
16453 SDValue Xor = Cond.getOperand(1);
16454 // Canonicalize setcc to LHS.
16455 if (Setcc.getOpcode() != ISD::SETCC)
16456 std::swap(Setcc, Xor);
16457 // LHS should be a setcc and RHS should be an xor.
16458 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16459 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16460 return SDValue();
16461
16462 // If the condition is an And, SimplifyDemandedBits may have changed
16463 // (xor Z, 1) to (not Z).
16464 SDValue Xor1 = Xor.getOperand(1);
16465 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16466 return SDValue();
16467
16468 EVT VT = Cond.getValueType();
16469 SDValue Xor0 = Xor.getOperand(0);
16470
16471 // The LHS of the xor needs to be 0/1.
16473 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16474 return SDValue();
16475
16476 // We can only invert integer setccs.
16477 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16478 if (!SetCCOpVT.isScalarInteger())
16479 return SDValue();
16480
16481 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16482 if (ISD::isIntEqualitySetCC(CCVal)) {
16483 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16484 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16485 Setcc.getOperand(1), CCVal);
16486 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16487 // Invert (setlt 0, X) by converting to (setlt X, 1).
16488 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16489 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16490 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16491 // (setlt X, 1) by converting to (setlt 0, X).
16492 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16493 DAG.getConstant(0, SDLoc(Setcc), VT),
16494 Setcc.getOperand(0), CCVal);
16495 } else
16496 return SDValue();
16497
16498 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16499 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16500}
16501
16502// Perform common combines for BR_CC and SELECT_CC condtions.
16503static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16504 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16505 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16506
16507 // As far as arithmetic right shift always saves the sign,
16508 // shift can be omitted.
16509 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16510 // setge (sra X, N), 0 -> setge X, 0
16511 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16512 LHS.getOpcode() == ISD::SRA) {
16513 LHS = LHS.getOperand(0);
16514 return true;
16515 }
16516
16517 if (!ISD::isIntEqualitySetCC(CCVal))
16518 return false;
16519
16520 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16521 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16522 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16523 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16524 // If we're looking for eq 0 instead of ne 0, we need to invert the
16525 // condition.
16526 bool Invert = CCVal == ISD::SETEQ;
16527 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16528 if (Invert)
16529 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16530
16531 RHS = LHS.getOperand(1);
16532 LHS = LHS.getOperand(0);
16533 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16534
16535 CC = DAG.getCondCode(CCVal);
16536 return true;
16537 }
16538
16539 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16540 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16541 RHS = LHS.getOperand(1);
16542 LHS = LHS.getOperand(0);
16543 return true;
16544 }
16545
16546 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16547 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16548 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16549 SDValue LHS0 = LHS.getOperand(0);
16550 if (LHS0.getOpcode() == ISD::AND &&
16551 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16552 uint64_t Mask = LHS0.getConstantOperandVal(1);
16553 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16554 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16555 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16556 CC = DAG.getCondCode(CCVal);
16557
16558 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16559 LHS = LHS0.getOperand(0);
16560 if (ShAmt != 0)
16561 LHS =
16562 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16563 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16564 return true;
16565 }
16566 }
16567 }
16568
16569 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16570 // This can occur when legalizing some floating point comparisons.
16571 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16572 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16573 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16574 CC = DAG.getCondCode(CCVal);
16575 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16576 return true;
16577 }
16578
16579 if (isNullConstant(RHS)) {
16580 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16581 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16582 CC = DAG.getCondCode(CCVal);
16583 LHS = NewCond;
16584 return true;
16585 }
16586 }
16587
16588 return false;
16589}
16590
16591// Fold
16592// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16593// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16594// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16595// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16597 SDValue TrueVal, SDValue FalseVal,
16598 bool Swapped) {
16599 bool Commutative = true;
16600 unsigned Opc = TrueVal.getOpcode();
16601 switch (Opc) {
16602 default:
16603 return SDValue();
16604 case ISD::SHL:
16605 case ISD::SRA:
16606 case ISD::SRL:
16607 case ISD::SUB:
16608 Commutative = false;
16609 break;
16610 case ISD::ADD:
16611 case ISD::OR:
16612 case ISD::XOR:
16613 break;
16614 }
16615
16616 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16617 return SDValue();
16618
16619 unsigned OpToFold;
16620 if (FalseVal == TrueVal.getOperand(0))
16621 OpToFold = 0;
16622 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16623 OpToFold = 1;
16624 else
16625 return SDValue();
16626
16627 EVT VT = N->getValueType(0);
16628 SDLoc DL(N);
16629 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16630 EVT OtherOpVT = OtherOp.getValueType();
16631 SDValue IdentityOperand =
16632 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16633 if (!Commutative)
16634 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16635 assert(IdentityOperand && "No identity operand!");
16636
16637 if (Swapped)
16638 std::swap(OtherOp, IdentityOperand);
16639 SDValue NewSel =
16640 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16641 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16642}
16643
16644// This tries to get rid of `select` and `icmp` that are being used to handle
16645// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16647 SDValue Cond = N->getOperand(0);
16648
16649 // This represents either CTTZ or CTLZ instruction.
16650 SDValue CountZeroes;
16651
16652 SDValue ValOnZero;
16653
16654 if (Cond.getOpcode() != ISD::SETCC)
16655 return SDValue();
16656
16657 if (!isNullConstant(Cond->getOperand(1)))
16658 return SDValue();
16659
16660 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16661 if (CCVal == ISD::CondCode::SETEQ) {
16662 CountZeroes = N->getOperand(2);
16663 ValOnZero = N->getOperand(1);
16664 } else if (CCVal == ISD::CondCode::SETNE) {
16665 CountZeroes = N->getOperand(1);
16666 ValOnZero = N->getOperand(2);
16667 } else {
16668 return SDValue();
16669 }
16670
16671 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16672 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16673 CountZeroes = CountZeroes.getOperand(0);
16674
16675 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16676 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16677 CountZeroes.getOpcode() != ISD::CTLZ &&
16678 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16679 return SDValue();
16680
16681 if (!isNullConstant(ValOnZero))
16682 return SDValue();
16683
16684 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16685 if (Cond->getOperand(0) != CountZeroesArgument)
16686 return SDValue();
16687
16688 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16689 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16690 CountZeroes.getValueType(), CountZeroesArgument);
16691 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16692 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16693 CountZeroes.getValueType(), CountZeroesArgument);
16694 }
16695
16696 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16697 SDValue BitWidthMinusOne =
16698 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16699
16700 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16701 CountZeroes, BitWidthMinusOne);
16702 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16703}
16704
16706 const RISCVSubtarget &Subtarget) {
16707 SDValue Cond = N->getOperand(0);
16708 SDValue True = N->getOperand(1);
16709 SDValue False = N->getOperand(2);
16710 SDLoc DL(N);
16711 EVT VT = N->getValueType(0);
16712 EVT CondVT = Cond.getValueType();
16713
16714 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16715 return SDValue();
16716
16717 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16718 // BEXTI, where C is power of 2.
16719 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16720 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16721 SDValue LHS = Cond.getOperand(0);
16722 SDValue RHS = Cond.getOperand(1);
16723 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16724 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16725 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16726 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16727 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16728 return DAG.getSelect(DL, VT,
16729 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16730 False, True);
16731 }
16732 }
16733 return SDValue();
16734}
16735
16737 const RISCVSubtarget &Subtarget) {
16738 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16739 return Folded;
16740
16741 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16742 return V;
16743
16744 if (Subtarget.hasConditionalMoveFusion())
16745 return SDValue();
16746
16747 SDValue TrueVal = N->getOperand(1);
16748 SDValue FalseVal = N->getOperand(2);
16749 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16750 return V;
16751 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16752}
16753
16754/// If we have a build_vector where each lane is binop X, C, where C
16755/// is a constant (but not necessarily the same constant on all lanes),
16756/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16757/// We assume that materializing a constant build vector will be no more
16758/// expensive that performing O(n) binops.
16760 const RISCVSubtarget &Subtarget,
16761 const RISCVTargetLowering &TLI) {
16762 SDLoc DL(N);
16763 EVT VT = N->getValueType(0);
16764
16765 assert(!VT.isScalableVector() && "unexpected build vector");
16766
16767 if (VT.getVectorNumElements() == 1)
16768 return SDValue();
16769
16770 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16771 if (!TLI.isBinOp(Opcode))
16772 return SDValue();
16773
16774 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16775 return SDValue();
16776
16777 // This BUILD_VECTOR involves an implicit truncation, and sinking
16778 // truncates through binops is non-trivial.
16779 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16780 return SDValue();
16781
16782 SmallVector<SDValue> LHSOps;
16783 SmallVector<SDValue> RHSOps;
16784 for (SDValue Op : N->ops()) {
16785 if (Op.isUndef()) {
16786 // We can't form a divide or remainder from undef.
16787 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16788 return SDValue();
16789
16790 LHSOps.push_back(Op);
16791 RHSOps.push_back(Op);
16792 continue;
16793 }
16794
16795 // TODO: We can handle operations which have an neutral rhs value
16796 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16797 // of profit in a more explicit manner.
16798 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16799 return SDValue();
16800
16801 LHSOps.push_back(Op.getOperand(0));
16802 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16803 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16804 return SDValue();
16805 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16806 // have different LHS and RHS types.
16807 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16808 return SDValue();
16809
16810 RHSOps.push_back(Op.getOperand(1));
16811 }
16812
16813 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16814 DAG.getBuildVector(VT, DL, RHSOps));
16815}
16816
16818 const RISCVSubtarget &Subtarget,
16819 const RISCVTargetLowering &TLI) {
16820 SDValue InVec = N->getOperand(0);
16821 SDValue InVal = N->getOperand(1);
16822 SDValue EltNo = N->getOperand(2);
16823 SDLoc DL(N);
16824
16825 EVT VT = InVec.getValueType();
16826 if (VT.isScalableVector())
16827 return SDValue();
16828
16829 if (!InVec.hasOneUse())
16830 return SDValue();
16831
16832 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16833 // move the insert_vector_elts into the arms of the binop. Note that
16834 // the new RHS must be a constant.
16835 const unsigned InVecOpcode = InVec->getOpcode();
16836 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16837 InVal.hasOneUse()) {
16838 SDValue InVecLHS = InVec->getOperand(0);
16839 SDValue InVecRHS = InVec->getOperand(1);
16840 SDValue InValLHS = InVal->getOperand(0);
16841 SDValue InValRHS = InVal->getOperand(1);
16842
16844 return SDValue();
16845 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16846 return SDValue();
16847 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16848 // have different LHS and RHS types.
16849 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16850 return SDValue();
16852 InVecLHS, InValLHS, EltNo);
16854 InVecRHS, InValRHS, EltNo);
16855 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16856 }
16857
16858 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16859 // move the insert_vector_elt to the source operand of the concat_vector.
16860 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16861 return SDValue();
16862
16863 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16864 if (!IndexC)
16865 return SDValue();
16866 unsigned Elt = IndexC->getZExtValue();
16867
16868 EVT ConcatVT = InVec.getOperand(0).getValueType();
16869 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16870 return SDValue();
16871 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16872 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16873
16874 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16875 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16876 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16877 ConcatOp, InVal, NewIdx);
16878
16879 SmallVector<SDValue> ConcatOps;
16880 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16881 ConcatOps[ConcatOpIdx] = ConcatOp;
16882 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16883}
16884
16885// If we're concatenating a series of vector loads like
16886// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16887// Then we can turn this into a strided load by widening the vector elements
16888// vlse32 p, stride=n
16890 const RISCVSubtarget &Subtarget,
16891 const RISCVTargetLowering &TLI) {
16892 SDLoc DL(N);
16893 EVT VT = N->getValueType(0);
16894
16895 // Only perform this combine on legal MVTs.
16896 if (!TLI.isTypeLegal(VT))
16897 return SDValue();
16898
16899 // TODO: Potentially extend this to scalable vectors
16900 if (VT.isScalableVector())
16901 return SDValue();
16902
16903 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16904 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16905 !SDValue(BaseLd, 0).hasOneUse())
16906 return SDValue();
16907
16908 EVT BaseLdVT = BaseLd->getValueType(0);
16909
16910 // Go through the loads and check that they're strided
16912 Lds.push_back(BaseLd);
16913 Align Align = BaseLd->getAlign();
16914 for (SDValue Op : N->ops().drop_front()) {
16915 auto *Ld = dyn_cast<LoadSDNode>(Op);
16916 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16917 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16918 Ld->getValueType(0) != BaseLdVT)
16919 return SDValue();
16920
16921 Lds.push_back(Ld);
16922
16923 // The common alignment is the most restrictive (smallest) of all the loads
16924 Align = std::min(Align, Ld->getAlign());
16925 }
16926
16927 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16928 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16929 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16930 // If the load ptrs can be decomposed into a common (Base + Index) with a
16931 // common constant stride, then return the constant stride.
16932 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16933 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16934 if (BIO1.equalBaseIndex(BIO2, DAG))
16935 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16936
16937 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16938 SDValue P1 = Ld1->getBasePtr();
16939 SDValue P2 = Ld2->getBasePtr();
16940 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16941 return {{P2.getOperand(1), false}};
16942 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16943 return {{P1.getOperand(1), true}};
16944
16945 return std::nullopt;
16946 };
16947
16948 // Get the distance between the first and second loads
16949 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16950 if (!BaseDiff)
16951 return SDValue();
16952
16953 // Check all the loads are the same distance apart
16954 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16955 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16956 return SDValue();
16957
16958 // TODO: At this point, we've successfully matched a generalized gather
16959 // load. Maybe we should emit that, and then move the specialized
16960 // matchers above and below into a DAG combine?
16961
16962 // Get the widened scalar type, e.g. v4i8 -> i64
16963 unsigned WideScalarBitWidth =
16964 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16965 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16966
16967 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16968 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16969 if (!TLI.isTypeLegal(WideVecVT))
16970 return SDValue();
16971
16972 // Check that the operation is legal
16973 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16974 return SDValue();
16975
16976 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16977 SDValue Stride =
16978 std::holds_alternative<SDValue>(StrideVariant)
16979 ? std::get<SDValue>(StrideVariant)
16980 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
16981 Lds[0]->getOffset().getValueType());
16982 if (MustNegateStride)
16983 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16984
16985 SDValue AllOneMask =
16986 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16987 DAG.getConstant(1, DL, MVT::i1));
16988
16989 uint64_t MemSize;
16990 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16991 ConstStride && ConstStride->getSExtValue() >= 0)
16992 // total size = (elsize * n) + (stride - elsize) * (n-1)
16993 // = elsize + stride * (n-1)
16994 MemSize = WideScalarVT.getSizeInBits() +
16995 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16996 else
16997 // If Stride isn't constant, then we can't know how much it will load
16999
17001 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17002 Align);
17003
17004 SDValue StridedLoad = DAG.getStridedLoadVP(
17005 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17006 AllOneMask,
17007 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17008
17009 for (SDValue Ld : N->ops())
17010 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17011
17012 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17013}
17014
17016 const RISCVSubtarget &Subtarget) {
17017
17018 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17019
17020 if (N->getValueType(0).isFixedLengthVector())
17021 return SDValue();
17022
17023 SDValue Addend = N->getOperand(0);
17024 SDValue MulOp = N->getOperand(1);
17025
17026 if (N->getOpcode() == RISCVISD::ADD_VL) {
17027 SDValue AddPassthruOp = N->getOperand(2);
17028 if (!AddPassthruOp.isUndef())
17029 return SDValue();
17030 }
17031
17032 auto IsVWMulOpc = [](unsigned Opc) {
17033 switch (Opc) {
17034 case RISCVISD::VWMUL_VL:
17037 return true;
17038 default:
17039 return false;
17040 }
17041 };
17042
17043 if (!IsVWMulOpc(MulOp.getOpcode()))
17044 std::swap(Addend, MulOp);
17045
17046 if (!IsVWMulOpc(MulOp.getOpcode()))
17047 return SDValue();
17048
17049 SDValue MulPassthruOp = MulOp.getOperand(2);
17050
17051 if (!MulPassthruOp.isUndef())
17052 return SDValue();
17053
17054 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17055 const RISCVSubtarget &Subtarget) {
17056 if (N->getOpcode() == ISD::ADD) {
17057 SDLoc DL(N);
17058 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17059 Subtarget);
17060 }
17061 return std::make_pair(N->getOperand(3), N->getOperand(4));
17062 }(N, DAG, Subtarget);
17063
17064 SDValue MulMask = MulOp.getOperand(3);
17065 SDValue MulVL = MulOp.getOperand(4);
17066
17067 if (AddMask != MulMask || AddVL != MulVL)
17068 return SDValue();
17069
17070 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17071 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17072 "Unexpected opcode after VWMACC_VL");
17073 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17074 "Unexpected opcode after VWMACC_VL!");
17075 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17076 "Unexpected opcode after VWMUL_VL!");
17077 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17078 "Unexpected opcode after VWMUL_VL!");
17079
17080 SDLoc DL(N);
17081 EVT VT = N->getValueType(0);
17082 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17083 AddVL};
17084 return DAG.getNode(Opc, DL, VT, Ops);
17085}
17086
17088 ISD::MemIndexType &IndexType,
17090 if (!DCI.isBeforeLegalize())
17091 return false;
17092
17093 SelectionDAG &DAG = DCI.DAG;
17094 const MVT XLenVT =
17095 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17096
17097 const EVT IndexVT = Index.getValueType();
17098
17099 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17100 // mode, so anything else must be manually legalized.
17101 if (!isIndexTypeSigned(IndexType))
17102 return false;
17103
17104 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17105 // Any index legalization should first promote to XLenVT, so we don't lose
17106 // bits when scaling. This may create an illegal index type so we let
17107 // LLVM's legalization take care of the splitting.
17108 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17109 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17110 IndexVT.changeVectorElementType(XLenVT), Index);
17111 }
17112 IndexType = ISD::UNSIGNED_SCALED;
17113 return true;
17114}
17115
17116/// Match the index vector of a scatter or gather node as the shuffle mask
17117/// which performs the rearrangement if possible. Will only match if
17118/// all lanes are touched, and thus replacing the scatter or gather with
17119/// a unit strided access and shuffle is legal.
17120static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17121 SmallVector<int> &ShuffleMask) {
17122 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17123 return false;
17124 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17125 return false;
17126
17127 const unsigned ElementSize = VT.getScalarStoreSize();
17128 const unsigned NumElems = VT.getVectorNumElements();
17129
17130 // Create the shuffle mask and check all bits active
17131 assert(ShuffleMask.empty());
17132 BitVector ActiveLanes(NumElems);
17133 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17134 // TODO: We've found an active bit of UB, and could be
17135 // more aggressive here if desired.
17136 if (Index->getOperand(i)->isUndef())
17137 return false;
17138 uint64_t C = Index->getConstantOperandVal(i);
17139 if (C % ElementSize != 0)
17140 return false;
17141 C = C / ElementSize;
17142 if (C >= NumElems)
17143 return false;
17144 ShuffleMask.push_back(C);
17145 ActiveLanes.set(C);
17146 }
17147 return ActiveLanes.all();
17148}
17149
17150/// Match the index of a gather or scatter operation as an operation
17151/// with twice the element width and half the number of elements. This is
17152/// generally profitable (if legal) because these operations are linear
17153/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17154/// come out ahead.
17155static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17156 Align BaseAlign, const RISCVSubtarget &ST) {
17157 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17158 return false;
17159 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17160 return false;
17161
17162 // Attempt a doubling. If we can use a element type 4x or 8x in
17163 // size, this will happen via multiply iterations of the transform.
17164 const unsigned NumElems = VT.getVectorNumElements();
17165 if (NumElems % 2 != 0)
17166 return false;
17167
17168 const unsigned ElementSize = VT.getScalarStoreSize();
17169 const unsigned WiderElementSize = ElementSize * 2;
17170 if (WiderElementSize > ST.getELen()/8)
17171 return false;
17172
17173 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17174 return false;
17175
17176 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17177 // TODO: We've found an active bit of UB, and could be
17178 // more aggressive here if desired.
17179 if (Index->getOperand(i)->isUndef())
17180 return false;
17181 // TODO: This offset check is too strict if we support fully
17182 // misaligned memory operations.
17183 uint64_t C = Index->getConstantOperandVal(i);
17184 if (i % 2 == 0) {
17185 if (C % WiderElementSize != 0)
17186 return false;
17187 continue;
17188 }
17189 uint64_t Last = Index->getConstantOperandVal(i-1);
17190 if (C != Last + ElementSize)
17191 return false;
17192 }
17193 return true;
17194}
17195
17196// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17197// This would be benefit for the cases where X and Y are both the same value
17198// type of low precision vectors. Since the truncate would be lowered into
17199// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17200// restriction, such pattern would be expanded into a series of "vsetvli"
17201// and "vnsrl" instructions later to reach this point.
17203 SDValue Mask = N->getOperand(1);
17204 SDValue VL = N->getOperand(2);
17205
17206 bool IsVLMAX = isAllOnesConstant(VL) ||
17207 (isa<RegisterSDNode>(VL) &&
17208 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17209 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17210 Mask.getOperand(0) != VL)
17211 return SDValue();
17212
17213 auto IsTruncNode = [&](SDValue V) {
17214 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17215 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17216 };
17217
17218 SDValue Op = N->getOperand(0);
17219
17220 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17221 // to distinguish such pattern.
17222 while (IsTruncNode(Op)) {
17223 if (!Op.hasOneUse())
17224 return SDValue();
17225 Op = Op.getOperand(0);
17226 }
17227
17228 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17229 return SDValue();
17230
17231 SDValue N0 = Op.getOperand(0);
17232 SDValue N1 = Op.getOperand(1);
17233 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17234 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17235 return SDValue();
17236
17237 SDValue N00 = N0.getOperand(0);
17238 SDValue N10 = N1.getOperand(0);
17239 if (!N00.getValueType().isVector() ||
17240 N00.getValueType() != N10.getValueType() ||
17241 N->getValueType(0) != N10.getValueType())
17242 return SDValue();
17243
17244 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17245 SDValue SMin =
17246 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17247 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17248 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17249}
17250
17251// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17252// maximum value for the truncated type.
17253// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17254// is the signed maximum value for the truncated type and C2 is the signed
17255// minimum value.
17257 const RISCVSubtarget &Subtarget) {
17258 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17259
17260 MVT VT = N->getSimpleValueType(0);
17261
17262 SDValue Mask = N->getOperand(1);
17263 SDValue VL = N->getOperand(2);
17264
17265 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17266 APInt &SplatVal) {
17267 if (V.getOpcode() != Opc &&
17268 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17269 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17270 return SDValue();
17271
17272 SDValue Op = V.getOperand(1);
17273
17274 // Peek through conversion between fixed and scalable vectors.
17275 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17276 isNullConstant(Op.getOperand(2)) &&
17277 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17278 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17279 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17280 isNullConstant(Op.getOperand(1).getOperand(1)))
17281 Op = Op.getOperand(1).getOperand(0);
17282
17283 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17284 return V.getOperand(0);
17285
17286 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17287 Op.getOperand(2) == VL) {
17288 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17289 SplatVal =
17290 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17291 return V.getOperand(0);
17292 }
17293 }
17294
17295 return SDValue();
17296 };
17297
17298 SDLoc DL(N);
17299
17300 auto DetectUSatPattern = [&](SDValue V) {
17301 APInt LoC, HiC;
17302
17303 // Simple case, V is a UMIN.
17304 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17305 if (HiC.isMask(VT.getScalarSizeInBits()))
17306 return UMinOp;
17307
17308 // If we have an SMAX that removes negative numbers first, then we can match
17309 // SMIN instead of UMIN.
17310 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17311 if (SDValue SMaxOp =
17312 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17313 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17314 return SMinOp;
17315
17316 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17317 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17318 // first.
17319 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17320 if (SDValue SMinOp =
17321 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17322 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17323 HiC.uge(LoC))
17324 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17325 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17326 Mask, VL);
17327
17328 return SDValue();
17329 };
17330
17331 auto DetectSSatPattern = [&](SDValue V) {
17332 unsigned NumDstBits = VT.getScalarSizeInBits();
17333 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17334 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17335 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17336
17337 APInt HiC, LoC;
17338 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17339 if (SDValue SMaxOp =
17340 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17341 if (HiC == SignedMax && LoC == SignedMin)
17342 return SMaxOp;
17343
17344 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17345 if (SDValue SMinOp =
17346 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17347 if (HiC == SignedMax && LoC == SignedMin)
17348 return SMinOp;
17349
17350 return SDValue();
17351 };
17352
17353 SDValue Src = N->getOperand(0);
17354
17355 // Look through multiple layers of truncates.
17356 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17357 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17358 Src.hasOneUse())
17359 Src = Src.getOperand(0);
17360
17361 SDValue Val;
17362 unsigned ClipOpc;
17363 if ((Val = DetectUSatPattern(Src)))
17365 else if ((Val = DetectSSatPattern(Src)))
17367 else
17368 return SDValue();
17369
17370 MVT ValVT = Val.getSimpleValueType();
17371
17372 do {
17373 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17374 ValVT = ValVT.changeVectorElementType(ValEltVT);
17375 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17376 } while (ValVT != VT);
17377
17378 return Val;
17379}
17380
17381// Convert
17382// (iX ctpop (bitcast (vXi1 A)))
17383// ->
17384// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17385// FIXME: It's complicated to match all the variations of this after type
17386// legalization so we only handle the pre-type legalization pattern, but that
17387// requires the fixed vector type to be legal.
17389 const RISCVSubtarget &Subtarget) {
17390 EVT VT = N->getValueType(0);
17391 if (!VT.isScalarInteger())
17392 return SDValue();
17393
17394 SDValue Src = N->getOperand(0);
17395
17396 // Peek through zero_extend. It doesn't change the count.
17397 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17398 Src = Src.getOperand(0);
17399
17400 if (Src.getOpcode() != ISD::BITCAST)
17401 return SDValue();
17402
17403 Src = Src.getOperand(0);
17404 EVT SrcEVT = Src.getValueType();
17405 if (!SrcEVT.isSimple())
17406 return SDValue();
17407
17408 MVT SrcMVT = SrcEVT.getSimpleVT();
17409 // Make sure the input is an i1 vector.
17410 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17411 return SDValue();
17412
17413 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17414 return SDValue();
17415
17416 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17417 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17418
17419 SDLoc DL(N);
17420 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17421
17422 MVT XLenVT = Subtarget.getXLenVT();
17423 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17424 return DAG.getZExtOrTrunc(Pop, DL, VT);
17425}
17426
17428 DAGCombinerInfo &DCI) const {
17429 SelectionDAG &DAG = DCI.DAG;
17430 const MVT XLenVT = Subtarget.getXLenVT();
17431 SDLoc DL(N);
17432
17433 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17434 // bits are demanded. N will be added to the Worklist if it was not deleted.
17435 // Caller should return SDValue(N, 0) if this returns true.
17436 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17437 SDValue Op = N->getOperand(OpNo);
17438 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17439 if (!SimplifyDemandedBits(Op, Mask, DCI))
17440 return false;
17441
17442 if (N->getOpcode() != ISD::DELETED_NODE)
17443 DCI.AddToWorklist(N);
17444 return true;
17445 };
17446
17447 switch (N->getOpcode()) {
17448 default:
17449 break;
17450 case RISCVISD::SplitF64: {
17451 SDValue Op0 = N->getOperand(0);
17452 // If the input to SplitF64 is just BuildPairF64 then the operation is
17453 // redundant. Instead, use BuildPairF64's operands directly.
17454 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17455 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17456
17457 if (Op0->isUndef()) {
17458 SDValue Lo = DAG.getUNDEF(MVT::i32);
17459 SDValue Hi = DAG.getUNDEF(MVT::i32);
17460 return DCI.CombineTo(N, Lo, Hi);
17461 }
17462
17463 // It's cheaper to materialise two 32-bit integers than to load a double
17464 // from the constant pool and transfer it to integer registers through the
17465 // stack.
17466 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17467 APInt V = C->getValueAPF().bitcastToAPInt();
17468 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17469 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17470 return DCI.CombineTo(N, Lo, Hi);
17471 }
17472
17473 // This is a target-specific version of a DAGCombine performed in
17474 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17475 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17476 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17477 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17478 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17479 break;
17480 SDValue NewSplitF64 =
17481 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17482 Op0.getOperand(0));
17483 SDValue Lo = NewSplitF64.getValue(0);
17484 SDValue Hi = NewSplitF64.getValue(1);
17485 APInt SignBit = APInt::getSignMask(32);
17486 if (Op0.getOpcode() == ISD::FNEG) {
17487 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17488 DAG.getConstant(SignBit, DL, MVT::i32));
17489 return DCI.CombineTo(N, Lo, NewHi);
17490 }
17491 assert(Op0.getOpcode() == ISD::FABS);
17492 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17493 DAG.getConstant(~SignBit, DL, MVT::i32));
17494 return DCI.CombineTo(N, Lo, NewHi);
17495 }
17496 case RISCVISD::SLLW:
17497 case RISCVISD::SRAW:
17498 case RISCVISD::SRLW:
17499 case RISCVISD::RORW:
17500 case RISCVISD::ROLW: {
17501 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17502 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17503 SimplifyDemandedLowBitsHelper(1, 5))
17504 return SDValue(N, 0);
17505
17506 break;
17507 }
17508 case RISCVISD::CLZW:
17509 case RISCVISD::CTZW: {
17510 // Only the lower 32 bits of the first operand are read
17511 if (SimplifyDemandedLowBitsHelper(0, 32))
17512 return SDValue(N, 0);
17513 break;
17514 }
17516 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17517 // conversion is unnecessary and can be replaced with the
17518 // FMV_X_ANYEXTW_RV64 operand.
17519 SDValue Op0 = N->getOperand(0);
17521 return Op0.getOperand(0);
17522 break;
17523 }
17526 SDLoc DL(N);
17527 SDValue Op0 = N->getOperand(0);
17528 MVT VT = N->getSimpleValueType(0);
17529
17530 // Constant fold.
17531 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17532 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17533 return DAG.getConstant(Val, DL, VT);
17534 }
17535
17536 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17537 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17538 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17539 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17540 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17541 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17542 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17543 assert(Op0.getOperand(0).getValueType() == VT &&
17544 "Unexpected value type!");
17545 return Op0.getOperand(0);
17546 }
17547
17548 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17549 cast<LoadSDNode>(Op0)->isSimple()) {
17551 auto *LN0 = cast<LoadSDNode>(Op0);
17552 SDValue Load =
17553 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17554 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17555 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17556 return Load;
17557 }
17558
17559 // This is a target-specific version of a DAGCombine performed in
17560 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17561 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17562 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17563 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17564 !Op0.getNode()->hasOneUse())
17565 break;
17566 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17567 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17568 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17569 if (Op0.getOpcode() == ISD::FNEG)
17570 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17571 DAG.getConstant(SignBit, DL, VT));
17572
17573 assert(Op0.getOpcode() == ISD::FABS);
17574 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17575 DAG.getConstant(~SignBit, DL, VT));
17576 }
17577 case ISD::ABS: {
17578 EVT VT = N->getValueType(0);
17579 SDValue N0 = N->getOperand(0);
17580 // abs (sext) -> zext (abs)
17581 // abs (zext) -> zext (handled elsewhere)
17582 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17583 SDValue Src = N0.getOperand(0);
17584 SDLoc DL(N);
17585 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17586 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17587 }
17588 break;
17589 }
17590 case ISD::ADD: {
17591 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17592 return V;
17593 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17594 return V;
17595 return performADDCombine(N, DCI, Subtarget);
17596 }
17597 case ISD::SUB: {
17598 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17599 return V;
17600 return performSUBCombine(N, DAG, Subtarget);
17601 }
17602 case ISD::AND:
17603 return performANDCombine(N, DCI, Subtarget);
17604 case ISD::OR: {
17605 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17606 return V;
17607 return performORCombine(N, DCI, Subtarget);
17608 }
17609 case ISD::XOR:
17610 return performXORCombine(N, DAG, Subtarget);
17611 case ISD::MUL:
17612 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17613 return V;
17614 return performMULCombine(N, DAG, DCI, Subtarget);
17615 case ISD::SDIV:
17616 case ISD::UDIV:
17617 case ISD::SREM:
17618 case ISD::UREM:
17619 if (SDValue V = combineBinOpOfZExt(N, DAG))
17620 return V;
17621 break;
17622 case ISD::FMUL: {
17623 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17624 SDValue N0 = N->getOperand(0);
17625 SDValue N1 = N->getOperand(1);
17626 if (N0->getOpcode() != ISD::FCOPYSIGN)
17627 std::swap(N0, N1);
17628 if (N0->getOpcode() != ISD::FCOPYSIGN)
17629 return SDValue();
17630 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17631 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17632 return SDValue();
17633 EVT VT = N->getValueType(0);
17634 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17635 return SDValue();
17636 SDValue Sign = N0->getOperand(1);
17637 if (Sign.getValueType() != VT)
17638 return SDValue();
17639 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17640 }
17641 case ISD::FADD:
17642 case ISD::UMAX:
17643 case ISD::UMIN:
17644 case ISD::SMAX:
17645 case ISD::SMIN:
17646 case ISD::FMAXNUM:
17647 case ISD::FMINNUM: {
17648 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17649 return V;
17650 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17651 return V;
17652 return SDValue();
17653 }
17654 case ISD::SETCC:
17655 return performSETCCCombine(N, DAG, Subtarget);
17657 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17658 case ISD::ZERO_EXTEND:
17659 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17660 // type legalization. This is safe because fp_to_uint produces poison if
17661 // it overflows.
17662 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17663 SDValue Src = N->getOperand(0);
17664 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17665 isTypeLegal(Src.getOperand(0).getValueType()))
17666 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17667 Src.getOperand(0));
17668 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17669 isTypeLegal(Src.getOperand(1).getValueType())) {
17670 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17671 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17672 Src.getOperand(0), Src.getOperand(1));
17673 DCI.CombineTo(N, Res);
17674 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17675 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17676 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17677 }
17678 }
17679 return SDValue();
17681 if (SDValue V = combineTruncOfSraSext(N, DAG))
17682 return V;
17683 return combineTruncToVnclip(N, DAG, Subtarget);
17684 case ISD::TRUNCATE:
17685 return performTRUNCATECombine(N, DAG, Subtarget);
17686 case ISD::SELECT:
17687 return performSELECTCombine(N, DAG, Subtarget);
17689 case RISCVISD::CZERO_NEZ: {
17690 SDValue Val = N->getOperand(0);
17691 SDValue Cond = N->getOperand(1);
17692
17693 unsigned Opc = N->getOpcode();
17694
17695 // czero_eqz x, x -> x
17696 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17697 return Val;
17698
17699 unsigned InvOpc =
17701
17702 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17703 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17704 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17705 SDValue NewCond = Cond.getOperand(0);
17706 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17707 if (DAG.MaskedValueIsZero(NewCond, Mask))
17708 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17709 }
17710 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17711 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17712 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17713 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17714 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17715 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17716 if (ISD::isIntEqualitySetCC(CCVal))
17717 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17718 N->getValueType(0), Val, Cond.getOperand(0));
17719 }
17720 return SDValue();
17721 }
17722 case RISCVISD::SELECT_CC: {
17723 // Transform
17724 SDValue LHS = N->getOperand(0);
17725 SDValue RHS = N->getOperand(1);
17726 SDValue CC = N->getOperand(2);
17727 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17728 SDValue TrueV = N->getOperand(3);
17729 SDValue FalseV = N->getOperand(4);
17730 SDLoc DL(N);
17731 EVT VT = N->getValueType(0);
17732
17733 // If the True and False values are the same, we don't need a select_cc.
17734 if (TrueV == FalseV)
17735 return TrueV;
17736
17737 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17738 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17739 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17740 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17741 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17742 if (CCVal == ISD::CondCode::SETGE)
17743 std::swap(TrueV, FalseV);
17744
17745 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17746 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17747 // Only handle simm12, if it is not in this range, it can be considered as
17748 // register.
17749 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17750 isInt<12>(TrueSImm - FalseSImm)) {
17751 SDValue SRA =
17752 DAG.getNode(ISD::SRA, DL, VT, LHS,
17753 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17754 SDValue AND =
17755 DAG.getNode(ISD::AND, DL, VT, SRA,
17756 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17757 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17758 }
17759
17760 if (CCVal == ISD::CondCode::SETGE)
17761 std::swap(TrueV, FalseV);
17762 }
17763
17764 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17765 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17766 {LHS, RHS, CC, TrueV, FalseV});
17767
17768 if (!Subtarget.hasConditionalMoveFusion()) {
17769 // (select c, -1, y) -> -c | y
17770 if (isAllOnesConstant(TrueV)) {
17771 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17772 SDValue Neg = DAG.getNegative(C, DL, VT);
17773 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17774 }
17775 // (select c, y, -1) -> -!c | y
17776 if (isAllOnesConstant(FalseV)) {
17777 SDValue C =
17778 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17779 SDValue Neg = DAG.getNegative(C, DL, VT);
17780 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17781 }
17782
17783 // (select c, 0, y) -> -!c & y
17784 if (isNullConstant(TrueV)) {
17785 SDValue C =
17786 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17787 SDValue Neg = DAG.getNegative(C, DL, VT);
17788 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17789 }
17790 // (select c, y, 0) -> -c & y
17791 if (isNullConstant(FalseV)) {
17792 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17793 SDValue Neg = DAG.getNegative(C, DL, VT);
17794 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17795 }
17796 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17797 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17798 if (((isOneConstant(FalseV) && LHS == TrueV &&
17799 CCVal == ISD::CondCode::SETNE) ||
17800 (isOneConstant(TrueV) && LHS == FalseV &&
17801 CCVal == ISD::CondCode::SETEQ)) &&
17803 // freeze it to be safe.
17804 LHS = DAG.getFreeze(LHS);
17806 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17807 }
17808 }
17809
17810 // If both true/false are an xor with 1, pull through the select.
17811 // This can occur after op legalization if both operands are setccs that
17812 // require an xor to invert.
17813 // FIXME: Generalize to other binary ops with identical operand?
17814 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17815 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17816 isOneConstant(TrueV.getOperand(1)) &&
17817 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17818 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17819 TrueV.getOperand(0), FalseV.getOperand(0));
17820 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17821 }
17822
17823 return SDValue();
17824 }
17825 case RISCVISD::BR_CC: {
17826 SDValue LHS = N->getOperand(1);
17827 SDValue RHS = N->getOperand(2);
17828 SDValue CC = N->getOperand(3);
17829 SDLoc DL(N);
17830
17831 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17832 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17833 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17834
17835 return SDValue();
17836 }
17837 case ISD::BITREVERSE:
17838 return performBITREVERSECombine(N, DAG, Subtarget);
17839 case ISD::FP_TO_SINT:
17840 case ISD::FP_TO_UINT:
17841 return performFP_TO_INTCombine(N, DCI, Subtarget);
17844 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17845 case ISD::FCOPYSIGN: {
17846 EVT VT = N->getValueType(0);
17847 if (!VT.isVector())
17848 break;
17849 // There is a form of VFSGNJ which injects the negated sign of its second
17850 // operand. Try and bubble any FNEG up after the extend/round to produce
17851 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17852 // TRUNC=1.
17853 SDValue In2 = N->getOperand(1);
17854 // Avoid cases where the extend/round has multiple uses, as duplicating
17855 // those is typically more expensive than removing a fneg.
17856 if (!In2.hasOneUse())
17857 break;
17858 if (In2.getOpcode() != ISD::FP_EXTEND &&
17859 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17860 break;
17861 In2 = In2.getOperand(0);
17862 if (In2.getOpcode() != ISD::FNEG)
17863 break;
17864 SDLoc DL(N);
17865 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17866 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17867 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17868 }
17869 case ISD::MGATHER: {
17870 const auto *MGN = cast<MaskedGatherSDNode>(N);
17871 const EVT VT = N->getValueType(0);
17872 SDValue Index = MGN->getIndex();
17873 SDValue ScaleOp = MGN->getScale();
17874 ISD::MemIndexType IndexType = MGN->getIndexType();
17875 assert(!MGN->isIndexScaled() &&
17876 "Scaled gather/scatter should not be formed");
17877
17878 SDLoc DL(N);
17879 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17880 return DAG.getMaskedGather(
17881 N->getVTList(), MGN->getMemoryVT(), DL,
17882 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17883 MGN->getBasePtr(), Index, ScaleOp},
17884 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17885
17886 if (narrowIndex(Index, IndexType, DAG))
17887 return DAG.getMaskedGather(
17888 N->getVTList(), MGN->getMemoryVT(), DL,
17889 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17890 MGN->getBasePtr(), Index, ScaleOp},
17891 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17892
17893 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17894 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17895 // The sequence will be XLenVT, not the type of Index. Tell
17896 // isSimpleVIDSequence this so we avoid overflow.
17897 if (std::optional<VIDSequence> SimpleVID =
17898 isSimpleVIDSequence(Index, Subtarget.getXLen());
17899 SimpleVID && SimpleVID->StepDenominator == 1) {
17900 const int64_t StepNumerator = SimpleVID->StepNumerator;
17901 const int64_t Addend = SimpleVID->Addend;
17902
17903 // Note: We don't need to check alignment here since (by assumption
17904 // from the existance of the gather), our offsets must be sufficiently
17905 // aligned.
17906
17907 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17908 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17909 assert(IndexType == ISD::UNSIGNED_SCALED);
17910 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17911 DAG.getSignedConstant(Addend, DL, PtrVT));
17912
17913 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17915 SDValue StridedLoad = DAG.getStridedLoadVP(
17916 VT, DL, MGN->getChain(), BasePtr,
17917 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
17918 EVL, MGN->getMemOperand());
17919 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17920 StridedLoad, MGN->getPassThru(), EVL);
17921 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17922 DL);
17923 }
17924 }
17925
17926 SmallVector<int> ShuffleMask;
17927 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17928 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17929 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17930 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17931 MGN->getMask(), DAG.getUNDEF(VT),
17932 MGN->getMemoryVT(), MGN->getMemOperand(),
17934 SDValue Shuffle =
17935 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17936 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17937 }
17938
17939 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17940 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17941 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17942 SmallVector<SDValue> NewIndices;
17943 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17944 NewIndices.push_back(Index.getOperand(i));
17945 EVT IndexVT = Index.getValueType()
17946 .getHalfNumVectorElementsVT(*DAG.getContext());
17947 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17948
17949 unsigned ElementSize = VT.getScalarStoreSize();
17950 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17951 auto EltCnt = VT.getVectorElementCount();
17952 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17953 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17954 EltCnt.divideCoefficientBy(2));
17955 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17956 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17957 EltCnt.divideCoefficientBy(2));
17958 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17959
17960 SDValue Gather =
17961 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17962 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17963 Index, ScaleOp},
17964 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17965 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17966 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17967 }
17968 break;
17969 }
17970 case ISD::MSCATTER:{
17971 const auto *MSN = cast<MaskedScatterSDNode>(N);
17972 SDValue Index = MSN->getIndex();
17973 SDValue ScaleOp = MSN->getScale();
17974 ISD::MemIndexType IndexType = MSN->getIndexType();
17975 assert(!MSN->isIndexScaled() &&
17976 "Scaled gather/scatter should not be formed");
17977
17978 SDLoc DL(N);
17979 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17980 return DAG.getMaskedScatter(
17981 N->getVTList(), MSN->getMemoryVT(), DL,
17982 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17983 Index, ScaleOp},
17984 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17985
17986 if (narrowIndex(Index, IndexType, DAG))
17987 return DAG.getMaskedScatter(
17988 N->getVTList(), MSN->getMemoryVT(), DL,
17989 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17990 Index, ScaleOp},
17991 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17992
17993 EVT VT = MSN->getValue()->getValueType(0);
17994 SmallVector<int> ShuffleMask;
17995 if (!MSN->isTruncatingStore() &&
17996 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17997 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17998 DAG.getUNDEF(VT), ShuffleMask);
17999 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18000 DAG.getUNDEF(XLenVT), MSN->getMask(),
18001 MSN->getMemoryVT(), MSN->getMemOperand(),
18002 ISD::UNINDEXED, false);
18003 }
18004 break;
18005 }
18006 case ISD::VP_GATHER: {
18007 const auto *VPGN = cast<VPGatherSDNode>(N);
18008 SDValue Index = VPGN->getIndex();
18009 SDValue ScaleOp = VPGN->getScale();
18010 ISD::MemIndexType IndexType = VPGN->getIndexType();
18011 assert(!VPGN->isIndexScaled() &&
18012 "Scaled gather/scatter should not be formed");
18013
18014 SDLoc DL(N);
18015 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18016 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18017 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18018 ScaleOp, VPGN->getMask(),
18019 VPGN->getVectorLength()},
18020 VPGN->getMemOperand(), IndexType);
18021
18022 if (narrowIndex(Index, IndexType, DAG))
18023 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18024 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18025 ScaleOp, VPGN->getMask(),
18026 VPGN->getVectorLength()},
18027 VPGN->getMemOperand(), IndexType);
18028
18029 break;
18030 }
18031 case ISD::VP_SCATTER: {
18032 const auto *VPSN = cast<VPScatterSDNode>(N);
18033 SDValue Index = VPSN->getIndex();
18034 SDValue ScaleOp = VPSN->getScale();
18035 ISD::MemIndexType IndexType = VPSN->getIndexType();
18036 assert(!VPSN->isIndexScaled() &&
18037 "Scaled gather/scatter should not be formed");
18038
18039 SDLoc DL(N);
18040 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18041 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18042 {VPSN->getChain(), VPSN->getValue(),
18043 VPSN->getBasePtr(), Index, ScaleOp,
18044 VPSN->getMask(), VPSN->getVectorLength()},
18045 VPSN->getMemOperand(), IndexType);
18046
18047 if (narrowIndex(Index, IndexType, DAG))
18048 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18049 {VPSN->getChain(), VPSN->getValue(),
18050 VPSN->getBasePtr(), Index, ScaleOp,
18051 VPSN->getMask(), VPSN->getVectorLength()},
18052 VPSN->getMemOperand(), IndexType);
18053 break;
18054 }
18055 case RISCVISD::SHL_VL:
18056 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18057 return V;
18058 [[fallthrough]];
18059 case RISCVISD::SRA_VL:
18060 case RISCVISD::SRL_VL: {
18061 SDValue ShAmt = N->getOperand(1);
18063 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18064 SDLoc DL(N);
18065 SDValue VL = N->getOperand(4);
18066 EVT VT = N->getValueType(0);
18067 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18068 ShAmt.getOperand(1), VL);
18069 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18070 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18071 }
18072 break;
18073 }
18074 case ISD::SRA:
18075 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18076 return V;
18077 [[fallthrough]];
18078 case ISD::SRL:
18079 case ISD::SHL: {
18080 if (N->getOpcode() == ISD::SHL) {
18081 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18082 return V;
18083 }
18084 SDValue ShAmt = N->getOperand(1);
18086 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18087 SDLoc DL(N);
18088 EVT VT = N->getValueType(0);
18089 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18090 ShAmt.getOperand(1),
18091 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18092 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18093 }
18094 break;
18095 }
18096 case RISCVISD::ADD_VL:
18097 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18098 return V;
18099 return combineToVWMACC(N, DAG, Subtarget);
18104 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18105 case RISCVISD::SUB_VL:
18106 case RISCVISD::MUL_VL:
18107 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18116 return performVFMADD_VLCombine(N, DCI, Subtarget);
18117 case RISCVISD::FADD_VL:
18118 case RISCVISD::FSUB_VL:
18119 case RISCVISD::FMUL_VL:
18122 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18123 case ISD::LOAD:
18124 case ISD::STORE: {
18125 if (DCI.isAfterLegalizeDAG())
18126 if (SDValue V = performMemPairCombine(N, DCI))
18127 return V;
18128
18129 if (N->getOpcode() != ISD::STORE)
18130 break;
18131
18132 auto *Store = cast<StoreSDNode>(N);
18133 SDValue Chain = Store->getChain();
18134 EVT MemVT = Store->getMemoryVT();
18135 SDValue Val = Store->getValue();
18136 SDLoc DL(N);
18137
18138 bool IsScalarizable =
18139 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18140 Store->isSimple() &&
18141 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18142 isPowerOf2_64(MemVT.getSizeInBits()) &&
18143 MemVT.getSizeInBits() <= Subtarget.getXLen();
18144
18145 // If sufficiently aligned we can scalarize stores of constant vectors of
18146 // any power-of-two size up to XLen bits, provided that they aren't too
18147 // expensive to materialize.
18148 // vsetivli zero, 2, e8, m1, ta, ma
18149 // vmv.v.i v8, 4
18150 // vse64.v v8, (a0)
18151 // ->
18152 // li a1, 1028
18153 // sh a1, 0(a0)
18154 if (DCI.isBeforeLegalize() && IsScalarizable &&
18156 // Get the constant vector bits
18157 APInt NewC(Val.getValueSizeInBits(), 0);
18158 uint64_t EltSize = Val.getScalarValueSizeInBits();
18159 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18160 if (Val.getOperand(i).isUndef())
18161 continue;
18162 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18163 i * EltSize);
18164 }
18165 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18166
18167 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18168 true) <= 2 &&
18170 NewVT, *Store->getMemOperand())) {
18171 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18172 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18173 Store->getPointerInfo(), Store->getOriginalAlign(),
18174 Store->getMemOperand()->getFlags());
18175 }
18176 }
18177
18178 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18179 // vsetivli zero, 2, e16, m1, ta, ma
18180 // vle16.v v8, (a0)
18181 // vse16.v v8, (a1)
18182 if (auto *L = dyn_cast<LoadSDNode>(Val);
18183 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18184 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18185 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18186 L->getMemoryVT() == MemVT) {
18187 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18189 NewVT, *Store->getMemOperand()) &&
18191 NewVT, *L->getMemOperand())) {
18192 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18193 L->getPointerInfo(), L->getOriginalAlign(),
18194 L->getMemOperand()->getFlags());
18195 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18196 Store->getPointerInfo(), Store->getOriginalAlign(),
18197 Store->getMemOperand()->getFlags());
18198 }
18199 }
18200
18201 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18202 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18203 // any illegal types.
18204 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18205 (DCI.isAfterLegalizeDAG() &&
18207 isNullConstant(Val.getOperand(1)))) {
18208 SDValue Src = Val.getOperand(0);
18209 MVT VecVT = Src.getSimpleValueType();
18210 // VecVT should be scalable and memory VT should match the element type.
18211 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18212 MemVT == VecVT.getVectorElementType()) {
18213 SDLoc DL(N);
18214 MVT MaskVT = getMaskTypeFor(VecVT);
18215 return DAG.getStoreVP(
18216 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18217 DAG.getConstant(1, DL, MaskVT),
18218 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18219 Store->getMemOperand(), Store->getAddressingMode(),
18220 Store->isTruncatingStore(), /*IsCompress*/ false);
18221 }
18222 }
18223
18224 break;
18225 }
18226 case ISD::SPLAT_VECTOR: {
18227 EVT VT = N->getValueType(0);
18228 // Only perform this combine on legal MVT types.
18229 if (!isTypeLegal(VT))
18230 break;
18231 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18232 DAG, Subtarget))
18233 return Gather;
18234 break;
18235 }
18236 case ISD::BUILD_VECTOR:
18237 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18238 return V;
18239 break;
18241 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18242 return V;
18243 break;
18245 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18246 return V;
18247 break;
18248 case RISCVISD::VFMV_V_F_VL: {
18249 const MVT VT = N->getSimpleValueType(0);
18250 SDValue Passthru = N->getOperand(0);
18251 SDValue Scalar = N->getOperand(1);
18252 SDValue VL = N->getOperand(2);
18253
18254 // If VL is 1, we can use vfmv.s.f.
18255 if (isOneConstant(VL))
18256 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18257 break;
18258 }
18259 case RISCVISD::VMV_V_X_VL: {
18260 const MVT VT = N->getSimpleValueType(0);
18261 SDValue Passthru = N->getOperand(0);
18262 SDValue Scalar = N->getOperand(1);
18263 SDValue VL = N->getOperand(2);
18264
18265 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18266 // scalar input.
18267 unsigned ScalarSize = Scalar.getValueSizeInBits();
18268 unsigned EltWidth = VT.getScalarSizeInBits();
18269 if (ScalarSize > EltWidth && Passthru.isUndef())
18270 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18271 return SDValue(N, 0);
18272
18273 // If VL is 1 and the scalar value won't benefit from immediate, we can
18274 // use vmv.s.x.
18275 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18276 if (isOneConstant(VL) &&
18277 (!Const || Const->isZero() ||
18278 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18279 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18280
18281 break;
18282 }
18283 case RISCVISD::VFMV_S_F_VL: {
18284 SDValue Src = N->getOperand(1);
18285 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18286 // into an undef vector.
18287 // TODO: Could use a vslide or vmv.v.v for non-undef.
18288 if (N->getOperand(0).isUndef() &&
18289 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18290 isNullConstant(Src.getOperand(1)) &&
18291 Src.getOperand(0).getValueType().isScalableVector()) {
18292 EVT VT = N->getValueType(0);
18293 EVT SrcVT = Src.getOperand(0).getValueType();
18295 // Widths match, just return the original vector.
18296 if (SrcVT == VT)
18297 return Src.getOperand(0);
18298 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18299 }
18300 [[fallthrough]];
18301 }
18302 case RISCVISD::VMV_S_X_VL: {
18303 const MVT VT = N->getSimpleValueType(0);
18304 SDValue Passthru = N->getOperand(0);
18305 SDValue Scalar = N->getOperand(1);
18306 SDValue VL = N->getOperand(2);
18307
18308 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18309 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18310 return Scalar.getOperand(0);
18311
18312 // Use M1 or smaller to avoid over constraining register allocation
18313 const MVT M1VT = getLMUL1VT(VT);
18314 if (M1VT.bitsLT(VT)) {
18315 SDValue M1Passthru =
18316 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18317 DAG.getVectorIdxConstant(0, DL));
18318 SDValue Result =
18319 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18320 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18321 DAG.getVectorIdxConstant(0, DL));
18322 return Result;
18323 }
18324
18325 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18326 // higher would involve overly constraining the register allocator for
18327 // no purpose.
18328 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18329 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18330 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18331 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18332
18333 break;
18334 }
18335 case RISCVISD::VMV_X_S: {
18336 SDValue Vec = N->getOperand(0);
18337 MVT VecVT = N->getOperand(0).getSimpleValueType();
18338 const MVT M1VT = getLMUL1VT(VecVT);
18339 if (M1VT.bitsLT(VecVT)) {
18340 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18341 DAG.getVectorIdxConstant(0, DL));
18342 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18343 }
18344 break;
18345 }
18349 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18350 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18351 switch (IntNo) {
18352 // By default we do not combine any intrinsic.
18353 default:
18354 return SDValue();
18355 case Intrinsic::riscv_vcpop:
18356 case Intrinsic::riscv_vcpop_mask:
18357 case Intrinsic::riscv_vfirst:
18358 case Intrinsic::riscv_vfirst_mask: {
18359 SDValue VL = N->getOperand(2);
18360 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18361 IntNo == Intrinsic::riscv_vfirst_mask)
18362 VL = N->getOperand(3);
18363 if (!isNullConstant(VL))
18364 return SDValue();
18365 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18366 SDLoc DL(N);
18367 EVT VT = N->getValueType(0);
18368 if (IntNo == Intrinsic::riscv_vfirst ||
18369 IntNo == Intrinsic::riscv_vfirst_mask)
18370 return DAG.getAllOnesConstant(DL, VT);
18371 return DAG.getConstant(0, DL, VT);
18372 }
18373 }
18374 }
18375 case ISD::BITCAST: {
18377 SDValue N0 = N->getOperand(0);
18378 EVT VT = N->getValueType(0);
18379 EVT SrcVT = N0.getValueType();
18380 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18381 unsigned NF = VT.getRISCVVectorTupleNumFields();
18382 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18383 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18384 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18385
18386 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18387
18388 SDValue Result = DAG.getUNDEF(VT);
18389 for (unsigned i = 0; i < NF; ++i)
18390 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18391 DAG.getVectorIdxConstant(i, DL));
18392 return Result;
18393 }
18394 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18395 // type, widen both sides to avoid a trip through memory.
18396 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18397 VT.isScalarInteger()) {
18398 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18399 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18400 Ops[0] = N0;
18401 SDLoc DL(N);
18402 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18403 N0 = DAG.getBitcast(MVT::i8, N0);
18404 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18405 }
18406
18407 return SDValue();
18408 }
18409 case ISD::CTPOP:
18410 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18411 return V;
18412 break;
18413 }
18414
18415 return SDValue();
18416}
18417
18419 EVT XVT, unsigned KeptBits) const {
18420 // For vectors, we don't have a preference..
18421 if (XVT.isVector())
18422 return false;
18423
18424 if (XVT != MVT::i32 && XVT != MVT::i64)
18425 return false;
18426
18427 // We can use sext.w for RV64 or an srai 31 on RV32.
18428 if (KeptBits == 32 || KeptBits == 64)
18429 return true;
18430
18431 // With Zbb we can use sext.h/sext.b.
18432 return Subtarget.hasStdExtZbb() &&
18433 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18434 KeptBits == 16);
18435}
18436
18438 const SDNode *N, CombineLevel Level) const {
18439 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18440 N->getOpcode() == ISD::SRL) &&
18441 "Expected shift op");
18442
18443 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18444 // materialised in fewer instructions than `(OP _, c1)`:
18445 //
18446 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18447 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18448 SDValue N0 = N->getOperand(0);
18449 EVT Ty = N0.getValueType();
18450
18451 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18452 // LD/ST, it can still complete the folding optimization operation performed
18453 // above.
18454 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18455 for (SDNode *Use : X->users()) {
18456 // This use is the one we're on right now. Skip it
18457 if (Use == User || Use->getOpcode() == ISD::SELECT)
18458 continue;
18459 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18460 return false;
18461 }
18462 return true;
18463 };
18464
18465 if (Ty.isScalarInteger() &&
18466 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18467 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18468 return isUsedByLdSt(N0.getNode(), N);
18469
18470 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18471 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18472
18473 // Bail if we might break a sh{1,2,3}add pattern.
18474 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18475 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18476 N->user_begin()->getOpcode() == ISD::ADD &&
18477 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18478 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18479 return false;
18480
18481 if (C1 && C2) {
18482 const APInt &C1Int = C1->getAPIntValue();
18483 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18484
18485 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18486 // and the combine should happen, to potentially allow further combines
18487 // later.
18488 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18489 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18490 return true;
18491
18492 // We can materialise `c1` in an add immediate, so it's "free", and the
18493 // combine should be prevented.
18494 if (C1Int.getSignificantBits() <= 64 &&
18496 return false;
18497
18498 // Neither constant will fit into an immediate, so find materialisation
18499 // costs.
18500 int C1Cost =
18501 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18502 /*CompressionCost*/ true);
18503 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18504 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18505 /*CompressionCost*/ true);
18506
18507 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18508 // combine should be prevented.
18509 if (C1Cost < ShiftedC1Cost)
18510 return false;
18511 }
18512 }
18513
18514 if (!N0->hasOneUse())
18515 return false;
18516
18517 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18518 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18519 !N0->getOperand(0)->hasOneUse())
18520 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18521
18522 return true;
18523}
18524
18526 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18527 TargetLoweringOpt &TLO) const {
18528 // Delay this optimization as late as possible.
18529 if (!TLO.LegalOps)
18530 return false;
18531
18532 EVT VT = Op.getValueType();
18533 if (VT.isVector())
18534 return false;
18535
18536 unsigned Opcode = Op.getOpcode();
18537 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18538 return false;
18539
18540 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18541 if (!C)
18542 return false;
18543
18544 const APInt &Mask = C->getAPIntValue();
18545
18546 // Clear all non-demanded bits initially.
18547 APInt ShrunkMask = Mask & DemandedBits;
18548
18549 // Try to make a smaller immediate by setting undemanded bits.
18550
18551 APInt ExpandedMask = Mask | ~DemandedBits;
18552
18553 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18554 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18555 };
18556 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18557 if (NewMask == Mask)
18558 return true;
18559 SDLoc DL(Op);
18560 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18561 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18562 Op.getOperand(0), NewC);
18563 return TLO.CombineTo(Op, NewOp);
18564 };
18565
18566 // If the shrunk mask fits in sign extended 12 bits, let the target
18567 // independent code apply it.
18568 if (ShrunkMask.isSignedIntN(12))
18569 return false;
18570
18571 // And has a few special cases for zext.
18572 if (Opcode == ISD::AND) {
18573 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18574 // otherwise use SLLI + SRLI.
18575 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18576 if (IsLegalMask(NewMask))
18577 return UseMask(NewMask);
18578
18579 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18580 if (VT == MVT::i64) {
18581 APInt NewMask = APInt(64, 0xffffffff);
18582 if (IsLegalMask(NewMask))
18583 return UseMask(NewMask);
18584 }
18585 }
18586
18587 // For the remaining optimizations, we need to be able to make a negative
18588 // number through a combination of mask and undemanded bits.
18589 if (!ExpandedMask.isNegative())
18590 return false;
18591
18592 // What is the fewest number of bits we need to represent the negative number.
18593 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18594
18595 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18596 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18597 // If we can't create a simm12, we shouldn't change opaque constants.
18598 APInt NewMask = ShrunkMask;
18599 if (MinSignedBits <= 12)
18600 NewMask.setBitsFrom(11);
18601 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18602 NewMask.setBitsFrom(31);
18603 else
18604 return false;
18605
18606 // Check that our new mask is a subset of the demanded mask.
18607 assert(IsLegalMask(NewMask));
18608 return UseMask(NewMask);
18609}
18610
18611static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18612 static const uint64_t GREVMasks[] = {
18613 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18614 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18615
18616 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18617 unsigned Shift = 1 << Stage;
18618 if (ShAmt & Shift) {
18619 uint64_t Mask = GREVMasks[Stage];
18620 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18621 if (IsGORC)
18622 Res |= x;
18623 x = Res;
18624 }
18625 }
18626
18627 return x;
18628}
18629
18631 KnownBits &Known,
18632 const APInt &DemandedElts,
18633 const SelectionDAG &DAG,
18634 unsigned Depth) const {
18635 unsigned BitWidth = Known.getBitWidth();
18636 unsigned Opc = Op.getOpcode();
18637 assert((Opc >= ISD::BUILTIN_OP_END ||
18638 Opc == ISD::INTRINSIC_WO_CHAIN ||
18639 Opc == ISD::INTRINSIC_W_CHAIN ||
18640 Opc == ISD::INTRINSIC_VOID) &&
18641 "Should use MaskedValueIsZero if you don't know whether Op"
18642 " is a target node!");
18643
18644 Known.resetAll();
18645 switch (Opc) {
18646 default: break;
18647 case RISCVISD::SELECT_CC: {
18648 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18649 // If we don't know any bits, early out.
18650 if (Known.isUnknown())
18651 break;
18652 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18653
18654 // Only known if known in both the LHS and RHS.
18655 Known = Known.intersectWith(Known2);
18656 break;
18657 }
18660 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18661 // Result is either all zero or operand 0. We can propagate zeros, but not
18662 // ones.
18663 Known.One.clearAllBits();
18664 break;
18665 case RISCVISD::REMUW: {
18666 KnownBits Known2;
18667 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18668 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18669 // We only care about the lower 32 bits.
18670 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18671 // Restore the original width by sign extending.
18672 Known = Known.sext(BitWidth);
18673 break;
18674 }
18675 case RISCVISD::DIVUW: {
18676 KnownBits Known2;
18677 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18678 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18679 // We only care about the lower 32 bits.
18680 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18681 // Restore the original width by sign extending.
18682 Known = Known.sext(BitWidth);
18683 break;
18684 }
18685 case RISCVISD::SLLW: {
18686 KnownBits Known2;
18687 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18688 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18689 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18690 // Restore the original width by sign extending.
18691 Known = Known.sext(BitWidth);
18692 break;
18693 }
18694 case RISCVISD::CTZW: {
18695 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18696 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18697 unsigned LowBits = llvm::bit_width(PossibleTZ);
18698 Known.Zero.setBitsFrom(LowBits);
18699 break;
18700 }
18701 case RISCVISD::CLZW: {
18702 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18703 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18704 unsigned LowBits = llvm::bit_width(PossibleLZ);
18705 Known.Zero.setBitsFrom(LowBits);
18706 break;
18707 }
18708 case RISCVISD::BREV8:
18709 case RISCVISD::ORC_B: {
18710 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18711 // control value of 7 is equivalent to brev8 and orc.b.
18712 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18713 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18714 // To compute zeros, we need to invert the value and invert it back after.
18715 Known.Zero =
18716 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18717 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18718 break;
18719 }
18720 case RISCVISD::READ_VLENB: {
18721 // We can use the minimum and maximum VLEN values to bound VLENB. We
18722 // know VLEN must be a power of two.
18723 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18724 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18725 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18726 Known.Zero.setLowBits(Log2_32(MinVLenB));
18727 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18728 if (MaxVLenB == MinVLenB)
18729 Known.One.setBit(Log2_32(MinVLenB));
18730 break;
18731 }
18732 case RISCVISD::FCLASS: {
18733 // fclass will only set one of the low 10 bits.
18734 Known.Zero.setBitsFrom(10);
18735 break;
18736 }
18739 unsigned IntNo =
18740 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18741 switch (IntNo) {
18742 default:
18743 // We can't do anything for most intrinsics.
18744 break;
18745 case Intrinsic::riscv_vsetvli:
18746 case Intrinsic::riscv_vsetvlimax: {
18747 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18748 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18749 RISCVII::VLMUL VLMUL =
18750 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18751 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18752 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18753 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18754 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18755
18756 // Result of vsetvli must be not larger than AVL.
18757 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18758 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18759
18760 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18761 if (BitWidth > KnownZeroFirstBit)
18762 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18763 break;
18764 }
18765 }
18766 break;
18767 }
18768 }
18769}
18770
18772 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18773 unsigned Depth) const {
18774 switch (Op.getOpcode()) {
18775 default:
18776 break;
18777 case RISCVISD::SELECT_CC: {
18778 unsigned Tmp =
18779 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18780 if (Tmp == 1) return 1; // Early out.
18781 unsigned Tmp2 =
18782 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18783 return std::min(Tmp, Tmp2);
18784 }
18787 // Output is either all zero or operand 0. We can propagate sign bit count
18788 // from operand 0.
18789 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18790 case RISCVISD::ABSW: {
18791 // We expand this at isel to negw+max. The result will have 33 sign bits
18792 // if the input has at least 33 sign bits.
18793 unsigned Tmp =
18794 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18795 if (Tmp < 33) return 1;
18796 return 33;
18797 }
18798 case RISCVISD::SLLW:
18799 case RISCVISD::SRAW:
18800 case RISCVISD::SRLW:
18801 case RISCVISD::DIVW:
18802 case RISCVISD::DIVUW:
18803 case RISCVISD::REMUW:
18804 case RISCVISD::ROLW:
18805 case RISCVISD::RORW:
18810 // TODO: As the result is sign-extended, this is conservatively correct. A
18811 // more precise answer could be calculated for SRAW depending on known
18812 // bits in the shift amount.
18813 return 33;
18814 case RISCVISD::VMV_X_S: {
18815 // The number of sign bits of the scalar result is computed by obtaining the
18816 // element type of the input vector operand, subtracting its width from the
18817 // XLEN, and then adding one (sign bit within the element type). If the
18818 // element type is wider than XLen, the least-significant XLEN bits are
18819 // taken.
18820 unsigned XLen = Subtarget.getXLen();
18821 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
18822 if (EltBits <= XLen)
18823 return XLen - EltBits + 1;
18824 break;
18825 }
18827 unsigned IntNo = Op.getConstantOperandVal(1);
18828 switch (IntNo) {
18829 default:
18830 break;
18831 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18832 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18833 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18834 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18835 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18836 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18837 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18838 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18839 case Intrinsic::riscv_masked_cmpxchg_i64:
18840 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18841 // narrow atomic operation. These are implemented using atomic
18842 // operations at the minimum supported atomicrmw/cmpxchg width whose
18843 // result is then sign extended to XLEN. With +A, the minimum width is
18844 // 32 for both 64 and 32.
18845 assert(Subtarget.getXLen() == 64);
18847 assert(Subtarget.hasStdExtA());
18848 return 33;
18849 }
18850 break;
18851 }
18852 }
18853
18854 return 1;
18855}
18856
18858 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18859 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18860
18861 // TODO: Add more target nodes.
18862 switch (Op.getOpcode()) {
18864 // Integer select_cc cannot create poison.
18865 // TODO: What are the FP poison semantics?
18866 // TODO: This instruction blocks poison from the unselected operand, can
18867 // we do anything with that?
18868 return !Op.getValueType().isInteger();
18869 }
18871 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18872}
18873
18874const Constant *
18876 assert(Ld && "Unexpected null LoadSDNode");
18877 if (!ISD::isNormalLoad(Ld))
18878 return nullptr;
18879
18880 SDValue Ptr = Ld->getBasePtr();
18881
18882 // Only constant pools with no offset are supported.
18883 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18884 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18885 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18886 CNode->getOffset() != 0)
18887 return nullptr;
18888
18889 return CNode;
18890 };
18891
18892 // Simple case, LLA.
18893 if (Ptr.getOpcode() == RISCVISD::LLA) {
18894 auto *CNode = GetSupportedConstantPool(Ptr);
18895 if (!CNode || CNode->getTargetFlags() != 0)
18896 return nullptr;
18897
18898 return CNode->getConstVal();
18899 }
18900
18901 // Look for a HI and ADD_LO pair.
18902 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18903 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18904 return nullptr;
18905
18906 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18907 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18908
18909 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18910 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18911 return nullptr;
18912
18913 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18914 return nullptr;
18915
18916 return CNodeLo->getConstVal();
18917}
18918
18920 MachineBasicBlock *BB) {
18921 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18922
18923 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18924 // Should the count have wrapped while it was being read, we need to try
18925 // again.
18926 // For example:
18927 // ```
18928 // read:
18929 // csrrs x3, counterh # load high word of counter
18930 // csrrs x2, counter # load low word of counter
18931 // csrrs x4, counterh # load high word of counter
18932 // bne x3, x4, read # check if high word reads match, otherwise try again
18933 // ```
18934
18935 MachineFunction &MF = *BB->getParent();
18936 const BasicBlock *LLVMBB = BB->getBasicBlock();
18938
18939 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
18940 MF.insert(It, LoopMBB);
18941
18942 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
18943 MF.insert(It, DoneMBB);
18944
18945 // Transfer the remainder of BB and its successor edges to DoneMBB.
18946 DoneMBB->splice(DoneMBB->begin(), BB,
18947 std::next(MachineBasicBlock::iterator(MI)), BB->end());
18949
18950 BB->addSuccessor(LoopMBB);
18951
18953 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18954 Register LoReg = MI.getOperand(0).getReg();
18955 Register HiReg = MI.getOperand(1).getReg();
18956 int64_t LoCounter = MI.getOperand(2).getImm();
18957 int64_t HiCounter = MI.getOperand(3).getImm();
18958 DebugLoc DL = MI.getDebugLoc();
18959
18961 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
18962 .addImm(HiCounter)
18963 .addReg(RISCV::X0);
18964 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
18965 .addImm(LoCounter)
18966 .addReg(RISCV::X0);
18967 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
18968 .addImm(HiCounter)
18969 .addReg(RISCV::X0);
18970
18971 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
18972 .addReg(HiReg)
18973 .addReg(ReadAgainReg)
18974 .addMBB(LoopMBB);
18975
18976 LoopMBB->addSuccessor(LoopMBB);
18977 LoopMBB->addSuccessor(DoneMBB);
18978
18979 MI.eraseFromParent();
18980
18981 return DoneMBB;
18982}
18983
18986 const RISCVSubtarget &Subtarget) {
18987 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18988
18989 MachineFunction &MF = *BB->getParent();
18990 DebugLoc DL = MI.getDebugLoc();
18993 Register LoReg = MI.getOperand(0).getReg();
18994 Register HiReg = MI.getOperand(1).getReg();
18995 Register SrcReg = MI.getOperand(2).getReg();
18996
18997 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18998 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18999
19000 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19001 RI, Register());
19003 MachineMemOperand *MMOLo =
19007 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19008 .addFrameIndex(FI)
19009 .addImm(0)
19010 .addMemOperand(MMOLo);
19011 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19012 .addFrameIndex(FI)
19013 .addImm(4)
19014 .addMemOperand(MMOHi);
19015 MI.eraseFromParent(); // The pseudo instruction is gone now.
19016 return BB;
19017}
19018
19021 const RISCVSubtarget &Subtarget) {
19022 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19023 "Unexpected instruction");
19024
19025 MachineFunction &MF = *BB->getParent();
19026 DebugLoc DL = MI.getDebugLoc();
19029 Register DstReg = MI.getOperand(0).getReg();
19030 Register LoReg = MI.getOperand(1).getReg();
19031 Register HiReg = MI.getOperand(2).getReg();
19032
19033 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19034 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19035
19037 MachineMemOperand *MMOLo =
19041 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19042 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19043 .addFrameIndex(FI)
19044 .addImm(0)
19045 .addMemOperand(MMOLo);
19046 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19047 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19048 .addFrameIndex(FI)
19049 .addImm(4)
19050 .addMemOperand(MMOHi);
19051 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19052 MI.eraseFromParent(); // The pseudo instruction is gone now.
19053 return BB;
19054}
19055
19057 switch (MI.getOpcode()) {
19058 default:
19059 return false;
19060 case RISCV::Select_GPR_Using_CC_GPR:
19061 case RISCV::Select_GPR_Using_CC_Imm:
19062 case RISCV::Select_FPR16_Using_CC_GPR:
19063 case RISCV::Select_FPR16INX_Using_CC_GPR:
19064 case RISCV::Select_FPR32_Using_CC_GPR:
19065 case RISCV::Select_FPR32INX_Using_CC_GPR:
19066 case RISCV::Select_FPR64_Using_CC_GPR:
19067 case RISCV::Select_FPR64INX_Using_CC_GPR:
19068 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19069 return true;
19070 }
19071}
19072
19074 unsigned RelOpcode, unsigned EqOpcode,
19075 const RISCVSubtarget &Subtarget) {
19076 DebugLoc DL = MI.getDebugLoc();
19077 Register DstReg = MI.getOperand(0).getReg();
19078 Register Src1Reg = MI.getOperand(1).getReg();
19079 Register Src2Reg = MI.getOperand(2).getReg();
19081 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19083
19084 // Save the current FFLAGS.
19085 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19086
19087 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19088 .addReg(Src1Reg)
19089 .addReg(Src2Reg);
19092
19093 // Restore the FFLAGS.
19094 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19095 .addReg(SavedFFlags, RegState::Kill);
19096
19097 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19098 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19099 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19100 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19103
19104 // Erase the pseudoinstruction.
19105 MI.eraseFromParent();
19106 return BB;
19107}
19108
19109static MachineBasicBlock *
19111 MachineBasicBlock *ThisMBB,
19112 const RISCVSubtarget &Subtarget) {
19113 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19114 // Without this, custom-inserter would have generated:
19115 //
19116 // A
19117 // | \
19118 // | B
19119 // | /
19120 // C
19121 // | \
19122 // | D
19123 // | /
19124 // E
19125 //
19126 // A: X = ...; Y = ...
19127 // B: empty
19128 // C: Z = PHI [X, A], [Y, B]
19129 // D: empty
19130 // E: PHI [X, C], [Z, D]
19131 //
19132 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19133 //
19134 // A
19135 // | \
19136 // | C
19137 // | /|
19138 // |/ |
19139 // | |
19140 // | D
19141 // | /
19142 // E
19143 //
19144 // A: X = ...; Y = ...
19145 // D: empty
19146 // E: PHI [X, A], [X, C], [Y, D]
19147
19148 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19149 const DebugLoc &DL = First.getDebugLoc();
19150 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19151 MachineFunction *F = ThisMBB->getParent();
19152 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19153 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19154 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19155 MachineFunction::iterator It = ++ThisMBB->getIterator();
19156 F->insert(It, FirstMBB);
19157 F->insert(It, SecondMBB);
19158 F->insert(It, SinkMBB);
19159
19160 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19161 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19163 ThisMBB->end());
19164 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19165
19166 // Fallthrough block for ThisMBB.
19167 ThisMBB->addSuccessor(FirstMBB);
19168 // Fallthrough block for FirstMBB.
19169 FirstMBB->addSuccessor(SecondMBB);
19170 ThisMBB->addSuccessor(SinkMBB);
19171 FirstMBB->addSuccessor(SinkMBB);
19172 // This is fallthrough.
19173 SecondMBB->addSuccessor(SinkMBB);
19174
19175 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19176 Register FLHS = First.getOperand(1).getReg();
19177 Register FRHS = First.getOperand(2).getReg();
19178 // Insert appropriate branch.
19179 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19180 .addReg(FLHS)
19181 .addReg(FRHS)
19182 .addMBB(SinkMBB);
19183
19184 Register SLHS = Second.getOperand(1).getReg();
19185 Register SRHS = Second.getOperand(2).getReg();
19186 Register Op1Reg4 = First.getOperand(4).getReg();
19187 Register Op1Reg5 = First.getOperand(5).getReg();
19188
19189 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19190 // Insert appropriate branch.
19191 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19192 .addReg(SLHS)
19193 .addReg(SRHS)
19194 .addMBB(SinkMBB);
19195
19196 Register DestReg = Second.getOperand(0).getReg();
19197 Register Op2Reg4 = Second.getOperand(4).getReg();
19198 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19199 .addReg(Op2Reg4)
19200 .addMBB(ThisMBB)
19201 .addReg(Op1Reg4)
19202 .addMBB(FirstMBB)
19203 .addReg(Op1Reg5)
19204 .addMBB(SecondMBB);
19205
19206 // Now remove the Select_FPRX_s.
19207 First.eraseFromParent();
19208 Second.eraseFromParent();
19209 return SinkMBB;
19210}
19211
19214 const RISCVSubtarget &Subtarget) {
19215 // To "insert" Select_* instructions, we actually have to insert the triangle
19216 // control-flow pattern. The incoming instructions know the destination vreg
19217 // to set, the condition code register to branch on, the true/false values to
19218 // select between, and the condcode to use to select the appropriate branch.
19219 //
19220 // We produce the following control flow:
19221 // HeadMBB
19222 // | \
19223 // | IfFalseMBB
19224 // | /
19225 // TailMBB
19226 //
19227 // When we find a sequence of selects we attempt to optimize their emission
19228 // by sharing the control flow. Currently we only handle cases where we have
19229 // multiple selects with the exact same condition (same LHS, RHS and CC).
19230 // The selects may be interleaved with other instructions if the other
19231 // instructions meet some requirements we deem safe:
19232 // - They are not pseudo instructions.
19233 // - They are debug instructions. Otherwise,
19234 // - They do not have side-effects, do not access memory and their inputs do
19235 // not depend on the results of the select pseudo-instructions.
19236 // The TrueV/FalseV operands of the selects cannot depend on the result of
19237 // previous selects in the sequence.
19238 // These conditions could be further relaxed. See the X86 target for a
19239 // related approach and more information.
19240 //
19241 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19242 // is checked here and handled by a separate function -
19243 // EmitLoweredCascadedSelect.
19244
19245 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19246 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19247 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19248 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19249 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19250 Next->getOperand(5).isKill())
19251 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19252
19253 Register LHS = MI.getOperand(1).getReg();
19254 Register RHS;
19255 if (MI.getOperand(2).isReg())
19256 RHS = MI.getOperand(2).getReg();
19257 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19258
19259 SmallVector<MachineInstr *, 4> SelectDebugValues;
19260 SmallSet<Register, 4> SelectDests;
19261 SelectDests.insert(MI.getOperand(0).getReg());
19262
19263 MachineInstr *LastSelectPseudo = &MI;
19264 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19265 SequenceMBBI != E; ++SequenceMBBI) {
19266 if (SequenceMBBI->isDebugInstr())
19267 continue;
19268 if (isSelectPseudo(*SequenceMBBI)) {
19269 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19270 !SequenceMBBI->getOperand(2).isReg() ||
19271 SequenceMBBI->getOperand(2).getReg() != RHS ||
19272 SequenceMBBI->getOperand(3).getImm() != CC ||
19273 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19274 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19275 break;
19276 LastSelectPseudo = &*SequenceMBBI;
19277 SequenceMBBI->collectDebugValues(SelectDebugValues);
19278 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19279 continue;
19280 }
19281 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19282 SequenceMBBI->mayLoadOrStore() ||
19283 SequenceMBBI->usesCustomInsertionHook())
19284 break;
19285 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19286 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19287 }))
19288 break;
19289 }
19290
19291 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19292 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19293 DebugLoc DL = MI.getDebugLoc();
19295
19296 MachineBasicBlock *HeadMBB = BB;
19297 MachineFunction *F = BB->getParent();
19298 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19299 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19300
19301 F->insert(I, IfFalseMBB);
19302 F->insert(I, TailMBB);
19303
19304 // Set the call frame size on entry to the new basic blocks.
19305 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19306 IfFalseMBB->setCallFrameSize(CallFrameSize);
19307 TailMBB->setCallFrameSize(CallFrameSize);
19308
19309 // Transfer debug instructions associated with the selects to TailMBB.
19310 for (MachineInstr *DebugInstr : SelectDebugValues) {
19311 TailMBB->push_back(DebugInstr->removeFromParent());
19312 }
19313
19314 // Move all instructions after the sequence to TailMBB.
19315 TailMBB->splice(TailMBB->end(), HeadMBB,
19316 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19317 // Update machine-CFG edges by transferring all successors of the current
19318 // block to the new block which will contain the Phi nodes for the selects.
19319 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19320 // Set the successors for HeadMBB.
19321 HeadMBB->addSuccessor(IfFalseMBB);
19322 HeadMBB->addSuccessor(TailMBB);
19323
19324 // Insert appropriate branch.
19325 if (MI.getOperand(2).isImm())
19326 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19327 .addReg(LHS)
19328 .addImm(MI.getOperand(2).getImm())
19329 .addMBB(TailMBB);
19330 else
19331 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19332 .addReg(LHS)
19333 .addReg(RHS)
19334 .addMBB(TailMBB);
19335
19336 // IfFalseMBB just falls through to TailMBB.
19337 IfFalseMBB->addSuccessor(TailMBB);
19338
19339 // Create PHIs for all of the select pseudo-instructions.
19340 auto SelectMBBI = MI.getIterator();
19341 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19342 auto InsertionPoint = TailMBB->begin();
19343 while (SelectMBBI != SelectEnd) {
19344 auto Next = std::next(SelectMBBI);
19345 if (isSelectPseudo(*SelectMBBI)) {
19346 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19347 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19348 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19349 .addReg(SelectMBBI->getOperand(4).getReg())
19350 .addMBB(HeadMBB)
19351 .addReg(SelectMBBI->getOperand(5).getReg())
19352 .addMBB(IfFalseMBB);
19353 SelectMBBI->eraseFromParent();
19354 }
19355 SelectMBBI = Next;
19356 }
19357
19358 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19359 return TailMBB;
19360}
19361
19362// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19363static const RISCV::RISCVMaskedPseudoInfo *
19364lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19366 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19367 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19369 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19370 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19371 return Masked;
19372}
19373
19376 unsigned CVTXOpc) {
19377 DebugLoc DL = MI.getDebugLoc();
19378
19380
19382 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19383
19384 // Save the old value of FFLAGS.
19385 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19386
19387 assert(MI.getNumOperands() == 7);
19388
19389 // Emit a VFCVT_X_F
19390 const TargetRegisterInfo *TRI =
19392 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19393 Register Tmp = MRI.createVirtualRegister(RC);
19394 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19395 .add(MI.getOperand(1))
19396 .add(MI.getOperand(2))
19397 .add(MI.getOperand(3))
19398 .add(MachineOperand::CreateImm(7)) // frm = DYN
19399 .add(MI.getOperand(4))
19400 .add(MI.getOperand(5))
19401 .add(MI.getOperand(6))
19402 .add(MachineOperand::CreateReg(RISCV::FRM,
19403 /*IsDef*/ false,
19404 /*IsImp*/ true));
19405
19406 // Emit a VFCVT_F_X
19407 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19408 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19409 // There is no E8 variant for VFCVT_F_X.
19410 assert(Log2SEW >= 4);
19411 unsigned CVTFOpc =
19412 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19413 ->MaskedPseudo;
19414
19415 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19416 .add(MI.getOperand(0))
19417 .add(MI.getOperand(1))
19418 .addReg(Tmp)
19419 .add(MI.getOperand(3))
19420 .add(MachineOperand::CreateImm(7)) // frm = DYN
19421 .add(MI.getOperand(4))
19422 .add(MI.getOperand(5))
19423 .add(MI.getOperand(6))
19424 .add(MachineOperand::CreateReg(RISCV::FRM,
19425 /*IsDef*/ false,
19426 /*IsImp*/ true));
19427
19428 // Restore FFLAGS.
19429 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19430 .addReg(SavedFFLAGS, RegState::Kill);
19431
19432 // Erase the pseudoinstruction.
19433 MI.eraseFromParent();
19434 return BB;
19435}
19436
19438 const RISCVSubtarget &Subtarget) {
19439 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19440 const TargetRegisterClass *RC;
19441 switch (MI.getOpcode()) {
19442 default:
19443 llvm_unreachable("Unexpected opcode");
19444 case RISCV::PseudoFROUND_H:
19445 CmpOpc = RISCV::FLT_H;
19446 F2IOpc = RISCV::FCVT_W_H;
19447 I2FOpc = RISCV::FCVT_H_W;
19448 FSGNJOpc = RISCV::FSGNJ_H;
19449 FSGNJXOpc = RISCV::FSGNJX_H;
19450 RC = &RISCV::FPR16RegClass;
19451 break;
19452 case RISCV::PseudoFROUND_H_INX:
19453 CmpOpc = RISCV::FLT_H_INX;
19454 F2IOpc = RISCV::FCVT_W_H_INX;
19455 I2FOpc = RISCV::FCVT_H_W_INX;
19456 FSGNJOpc = RISCV::FSGNJ_H_INX;
19457 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19458 RC = &RISCV::GPRF16RegClass;
19459 break;
19460 case RISCV::PseudoFROUND_S:
19461 CmpOpc = RISCV::FLT_S;
19462 F2IOpc = RISCV::FCVT_W_S;
19463 I2FOpc = RISCV::FCVT_S_W;
19464 FSGNJOpc = RISCV::FSGNJ_S;
19465 FSGNJXOpc = RISCV::FSGNJX_S;
19466 RC = &RISCV::FPR32RegClass;
19467 break;
19468 case RISCV::PseudoFROUND_S_INX:
19469 CmpOpc = RISCV::FLT_S_INX;
19470 F2IOpc = RISCV::FCVT_W_S_INX;
19471 I2FOpc = RISCV::FCVT_S_W_INX;
19472 FSGNJOpc = RISCV::FSGNJ_S_INX;
19473 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19474 RC = &RISCV::GPRF32RegClass;
19475 break;
19476 case RISCV::PseudoFROUND_D:
19477 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19478 CmpOpc = RISCV::FLT_D;
19479 F2IOpc = RISCV::FCVT_L_D;
19480 I2FOpc = RISCV::FCVT_D_L;
19481 FSGNJOpc = RISCV::FSGNJ_D;
19482 FSGNJXOpc = RISCV::FSGNJX_D;
19483 RC = &RISCV::FPR64RegClass;
19484 break;
19485 case RISCV::PseudoFROUND_D_INX:
19486 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19487 CmpOpc = RISCV::FLT_D_INX;
19488 F2IOpc = RISCV::FCVT_L_D_INX;
19489 I2FOpc = RISCV::FCVT_D_L_INX;
19490 FSGNJOpc = RISCV::FSGNJ_D_INX;
19491 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19492 RC = &RISCV::GPRRegClass;
19493 break;
19494 }
19495
19496 const BasicBlock *BB = MBB->getBasicBlock();
19497 DebugLoc DL = MI.getDebugLoc();
19499
19501 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19502 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19503
19504 F->insert(I, CvtMBB);
19505 F->insert(I, DoneMBB);
19506 // Move all instructions after the sequence to DoneMBB.
19507 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19508 MBB->end());
19509 // Update machine-CFG edges by transferring all successors of the current
19510 // block to the new block which will contain the Phi nodes for the selects.
19512 // Set the successors for MBB.
19513 MBB->addSuccessor(CvtMBB);
19514 MBB->addSuccessor(DoneMBB);
19515
19516 Register DstReg = MI.getOperand(0).getReg();
19517 Register SrcReg = MI.getOperand(1).getReg();
19518 Register MaxReg = MI.getOperand(2).getReg();
19519 int64_t FRM = MI.getOperand(3).getImm();
19520
19521 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19523
19524 Register FabsReg = MRI.createVirtualRegister(RC);
19525 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19526
19527 // Compare the FP value to the max value.
19528 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19529 auto MIB =
19530 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19533
19534 // Insert branch.
19535 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19536 .addReg(CmpReg)
19537 .addReg(RISCV::X0)
19538 .addMBB(DoneMBB);
19539
19540 CvtMBB->addSuccessor(DoneMBB);
19541
19542 // Convert to integer.
19543 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19544 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19547
19548 // Convert back to FP.
19549 Register I2FReg = MRI.createVirtualRegister(RC);
19550 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19553
19554 // Restore the sign bit.
19555 Register CvtReg = MRI.createVirtualRegister(RC);
19556 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19557
19558 // Merge the results.
19559 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19560 .addReg(SrcReg)
19561 .addMBB(MBB)
19562 .addReg(CvtReg)
19563 .addMBB(CvtMBB);
19564
19565 MI.eraseFromParent();
19566 return DoneMBB;
19567}
19568
19571 MachineBasicBlock *BB) const {
19572 switch (MI.getOpcode()) {
19573 default:
19574 llvm_unreachable("Unexpected instr type to insert");
19575 case RISCV::ReadCounterWide:
19576 assert(!Subtarget.is64Bit() &&
19577 "ReadCounterWide is only to be used on riscv32");
19578 return emitReadCounterWidePseudo(MI, BB);
19579 case RISCV::Select_GPR_Using_CC_GPR:
19580 case RISCV::Select_GPR_Using_CC_Imm:
19581 case RISCV::Select_FPR16_Using_CC_GPR:
19582 case RISCV::Select_FPR16INX_Using_CC_GPR:
19583 case RISCV::Select_FPR32_Using_CC_GPR:
19584 case RISCV::Select_FPR32INX_Using_CC_GPR:
19585 case RISCV::Select_FPR64_Using_CC_GPR:
19586 case RISCV::Select_FPR64INX_Using_CC_GPR:
19587 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19588 return emitSelectPseudo(MI, BB, Subtarget);
19589 case RISCV::BuildPairF64Pseudo:
19590 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19591 case RISCV::SplitF64Pseudo:
19592 return emitSplitF64Pseudo(MI, BB, Subtarget);
19593 case RISCV::PseudoQuietFLE_H:
19594 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19595 case RISCV::PseudoQuietFLE_H_INX:
19596 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19597 case RISCV::PseudoQuietFLT_H:
19598 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19599 case RISCV::PseudoQuietFLT_H_INX:
19600 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19601 case RISCV::PseudoQuietFLE_S:
19602 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19603 case RISCV::PseudoQuietFLE_S_INX:
19604 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19605 case RISCV::PseudoQuietFLT_S:
19606 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19607 case RISCV::PseudoQuietFLT_S_INX:
19608 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19609 case RISCV::PseudoQuietFLE_D:
19610 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19611 case RISCV::PseudoQuietFLE_D_INX:
19612 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19613 case RISCV::PseudoQuietFLE_D_IN32X:
19614 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19615 Subtarget);
19616 case RISCV::PseudoQuietFLT_D:
19617 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19618 case RISCV::PseudoQuietFLT_D_INX:
19619 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19620 case RISCV::PseudoQuietFLT_D_IN32X:
19621 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19622 Subtarget);
19623
19624 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19625 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19626 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19627 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19628 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19629 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19630 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19631 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19632 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19633 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19634 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19635 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19636 case RISCV::PseudoFROUND_H:
19637 case RISCV::PseudoFROUND_H_INX:
19638 case RISCV::PseudoFROUND_S:
19639 case RISCV::PseudoFROUND_S_INX:
19640 case RISCV::PseudoFROUND_D:
19641 case RISCV::PseudoFROUND_D_INX:
19642 case RISCV::PseudoFROUND_D_IN32X:
19643 return emitFROUND(MI, BB, Subtarget);
19644 case TargetOpcode::STATEPOINT:
19645 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19646 // while jal call instruction (where statepoint will be lowered at the end)
19647 // has implicit def. This def is early-clobber as it will be set at
19648 // the moment of the call and earlier than any use is read.
19649 // Add this implicit dead def here as a workaround.
19650 MI.addOperand(*MI.getMF(),
19652 RISCV::X1, /*isDef*/ true,
19653 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19654 /*isUndef*/ false, /*isEarlyClobber*/ true));
19655 [[fallthrough]];
19656 case TargetOpcode::STACKMAP:
19657 case TargetOpcode::PATCHPOINT:
19658 if (!Subtarget.is64Bit())
19659 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19660 "supported on 64-bit targets");
19661 return emitPatchPoint(MI, BB);
19662 }
19663}
19664
19666 SDNode *Node) const {
19667 // Add FRM dependency to any instructions with dynamic rounding mode.
19668 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19669 if (Idx < 0) {
19670 // Vector pseudos have FRM index indicated by TSFlags.
19671 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19672 if (Idx < 0)
19673 return;
19674 }
19675 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19676 return;
19677 // If the instruction already reads FRM, don't add another read.
19678 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19679 return;
19680 MI.addOperand(
19681 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19682}
19683
19684void RISCVTargetLowering::analyzeInputArgs(
19685 MachineFunction &MF, CCState &CCInfo,
19686 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19687 RISCVCCAssignFn Fn) const {
19688 unsigned NumArgs = Ins.size();
19690
19691 for (unsigned i = 0; i != NumArgs; ++i) {
19692 MVT ArgVT = Ins[i].VT;
19693 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19694
19695 Type *ArgTy = nullptr;
19696 if (IsRet)
19697 ArgTy = FType->getReturnType();
19698 else if (Ins[i].isOrigArg())
19699 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19700
19701 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19702 /*IsFixed=*/true, IsRet, ArgTy)) {
19703 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19704 << ArgVT << '\n');
19705 llvm_unreachable(nullptr);
19706 }
19707 }
19708}
19709
19710void RISCVTargetLowering::analyzeOutputArgs(
19711 MachineFunction &MF, CCState &CCInfo,
19712 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19713 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19714 unsigned NumArgs = Outs.size();
19715
19716 for (unsigned i = 0; i != NumArgs; i++) {
19717 MVT ArgVT = Outs[i].VT;
19718 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19719 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19720
19721 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19722 Outs[i].IsFixed, IsRet, OrigTy)) {
19723 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19724 << ArgVT << "\n");
19725 llvm_unreachable(nullptr);
19726 }
19727 }
19728}
19729
19730// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19731// values.
19733 const CCValAssign &VA, const SDLoc &DL,
19734 const RISCVSubtarget &Subtarget) {
19735 if (VA.needsCustom()) {
19736 if (VA.getLocVT().isInteger() &&
19737 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19738 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19739 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19740 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19742 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19743 llvm_unreachable("Unexpected Custom handling.");
19744 }
19745
19746 switch (VA.getLocInfo()) {
19747 default:
19748 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19749 case CCValAssign::Full:
19750 break;
19751 case CCValAssign::BCvt:
19752 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19753 break;
19754 }
19755 return Val;
19756}
19757
19758// The caller is responsible for loading the full value if the argument is
19759// passed with CCValAssign::Indirect.
19761 const CCValAssign &VA, const SDLoc &DL,
19762 const ISD::InputArg &In,
19763 const RISCVTargetLowering &TLI) {
19766 EVT LocVT = VA.getLocVT();
19767 SDValue Val;
19768 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19769 Register VReg = RegInfo.createVirtualRegister(RC);
19770 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19771 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19772
19773 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19774 if (In.isOrigArg()) {
19775 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19776 if (OrigArg->getType()->isIntegerTy()) {
19777 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19778 // An input zero extended from i31 can also be considered sign extended.
19779 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19780 (BitWidth < 32 && In.Flags.isZExt())) {
19782 RVFI->addSExt32Register(VReg);
19783 }
19784 }
19785 }
19786
19788 return Val;
19789
19790 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19791}
19792
19794 const CCValAssign &VA, const SDLoc &DL,
19795 const RISCVSubtarget &Subtarget) {
19796 EVT LocVT = VA.getLocVT();
19797
19798 if (VA.needsCustom()) {
19799 if (LocVT.isInteger() &&
19800 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19801 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19802 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19803 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19804 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19805 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19806 llvm_unreachable("Unexpected Custom handling.");
19807 }
19808
19809 switch (VA.getLocInfo()) {
19810 default:
19811 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19812 case CCValAssign::Full:
19813 break;
19814 case CCValAssign::BCvt:
19815 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19816 break;
19817 }
19818 return Val;
19819}
19820
19821// The caller is responsible for loading the full value if the argument is
19822// passed with CCValAssign::Indirect.
19824 const CCValAssign &VA, const SDLoc &DL) {
19826 MachineFrameInfo &MFI = MF.getFrameInfo();
19827 EVT LocVT = VA.getLocVT();
19828 EVT ValVT = VA.getValVT();
19830 if (VA.getLocInfo() == CCValAssign::Indirect) {
19831 // When the value is a scalable vector, we save the pointer which points to
19832 // the scalable vector value in the stack. The ValVT will be the pointer
19833 // type, instead of the scalable vector type.
19834 ValVT = LocVT;
19835 }
19836 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19837 /*IsImmutable=*/true);
19838 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19839 SDValue Val;
19840
19842 switch (VA.getLocInfo()) {
19843 default:
19844 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19845 case CCValAssign::Full:
19847 case CCValAssign::BCvt:
19848 break;
19849 }
19850 Val = DAG.getExtLoad(
19851 ExtType, DL, LocVT, Chain, FIN,
19853 return Val;
19854}
19855
19857 const CCValAssign &VA,
19858 const CCValAssign &HiVA,
19859 const SDLoc &DL) {
19860 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19861 "Unexpected VA");
19863 MachineFrameInfo &MFI = MF.getFrameInfo();
19865
19866 assert(VA.isRegLoc() && "Expected register VA assignment");
19867
19868 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19869 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19870 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19871 SDValue Hi;
19872 if (HiVA.isMemLoc()) {
19873 // Second half of f64 is passed on the stack.
19874 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19875 /*IsImmutable=*/true);
19876 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19877 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19879 } else {
19880 // Second half of f64 is passed in another GPR.
19881 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19882 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19883 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19884 }
19885 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19886}
19887
19888// Transform physical registers into virtual registers.
19890 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19891 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19892 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19893
19895
19896 switch (CallConv) {
19897 default:
19898 report_fatal_error("Unsupported calling convention");
19899 case CallingConv::C:
19900 case CallingConv::Fast:
19902 case CallingConv::GRAAL:
19904 break;
19905 case CallingConv::GHC:
19906 if (Subtarget.hasStdExtE())
19907 report_fatal_error("GHC calling convention is not supported on RVE!");
19908 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19909 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19910 "(Zdinx/D) instruction set extensions");
19911 }
19912
19913 const Function &Func = MF.getFunction();
19914 if (Func.hasFnAttribute("interrupt")) {
19915 if (!Func.arg_empty())
19917 "Functions with the interrupt attribute cannot have arguments!");
19918
19919 StringRef Kind =
19920 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19921
19922 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19924 "Function interrupt attribute argument not supported!");
19925 }
19926
19927 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19928 MVT XLenVT = Subtarget.getXLenVT();
19929 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19930 // Used with vargs to acumulate store chains.
19931 std::vector<SDValue> OutChains;
19932
19933 // Assign locations to all of the incoming arguments.
19935 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19936
19937 if (CallConv == CallingConv::GHC)
19939 else
19940 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19942 : CC_RISCV);
19943
19944 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19945 CCValAssign &VA = ArgLocs[i];
19946 SDValue ArgValue;
19947 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19948 // case.
19949 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19950 assert(VA.needsCustom());
19951 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19952 } else if (VA.isRegLoc())
19953 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19954 else
19955 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19956
19957 if (VA.getLocInfo() == CCValAssign::Indirect) {
19958 // If the original argument was split and passed by reference (e.g. i128
19959 // on RV32), we need to load all parts of it here (using the same
19960 // address). Vectors may be partly split to registers and partly to the
19961 // stack, in which case the base address is partly offset and subsequent
19962 // stores are relative to that.
19963 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19965 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19966 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19967 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19968 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19969 CCValAssign &PartVA = ArgLocs[i + 1];
19970 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19971 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19972 if (PartVA.getValVT().isScalableVector())
19973 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19974 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19975 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19977 ++i;
19978 ++InsIdx;
19979 }
19980 continue;
19981 }
19982 InVals.push_back(ArgValue);
19983 }
19984
19985 if (any_of(ArgLocs,
19986 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19987 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19988
19989 if (IsVarArg) {
19990 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19991 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19992 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19993 MachineFrameInfo &MFI = MF.getFrameInfo();
19994 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19996
19997 // Size of the vararg save area. For now, the varargs save area is either
19998 // zero or large enough to hold a0-a7.
19999 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20000 int FI;
20001
20002 // If all registers are allocated, then all varargs must be passed on the
20003 // stack and we don't need to save any argregs.
20004 if (VarArgsSaveSize == 0) {
20005 int VaArgOffset = CCInfo.getStackSize();
20006 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20007 } else {
20008 int VaArgOffset = -VarArgsSaveSize;
20009 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20010
20011 // If saving an odd number of registers then create an extra stack slot to
20012 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20013 // offsets to even-numbered registered remain 2*XLEN-aligned.
20014 if (Idx % 2) {
20016 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20017 VarArgsSaveSize += XLenInBytes;
20018 }
20019
20020 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20021
20022 // Copy the integer registers that may have been used for passing varargs
20023 // to the vararg save area.
20024 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20025 const Register Reg = RegInfo.createVirtualRegister(RC);
20026 RegInfo.addLiveIn(ArgRegs[I], Reg);
20027 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20028 SDValue Store = DAG.getStore(
20029 Chain, DL, ArgValue, FIN,
20030 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20031 OutChains.push_back(Store);
20032 FIN =
20033 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20034 }
20035 }
20036
20037 // Record the frame index of the first variable argument
20038 // which is a value necessary to VASTART.
20039 RVFI->setVarArgsFrameIndex(FI);
20040 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20041 }
20042
20043 // All stores are grouped in one node to allow the matching between
20044 // the size of Ins and InVals. This only happens for vararg functions.
20045 if (!OutChains.empty()) {
20046 OutChains.push_back(Chain);
20047 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20048 }
20049
20050 return Chain;
20051}
20052
20053/// isEligibleForTailCallOptimization - Check whether the call is eligible
20054/// for tail call optimization.
20055/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20056bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20057 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20058 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20059
20060 auto CalleeCC = CLI.CallConv;
20061 auto &Outs = CLI.Outs;
20062 auto &Caller = MF.getFunction();
20063 auto CallerCC = Caller.getCallingConv();
20064
20065 // Exception-handling functions need a special set of instructions to
20066 // indicate a return to the hardware. Tail-calling another function would
20067 // probably break this.
20068 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20069 // should be expanded as new function attributes are introduced.
20070 if (Caller.hasFnAttribute("interrupt"))
20071 return false;
20072
20073 // Do not tail call opt if the stack is used to pass parameters.
20074 if (CCInfo.getStackSize() != 0)
20075 return false;
20076
20077 // Do not tail call opt if any parameters need to be passed indirectly.
20078 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20079 // passed indirectly. So the address of the value will be passed in a
20080 // register, or if not available, then the address is put on the stack. In
20081 // order to pass indirectly, space on the stack often needs to be allocated
20082 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20083 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20084 // are passed CCValAssign::Indirect.
20085 for (auto &VA : ArgLocs)
20086 if (VA.getLocInfo() == CCValAssign::Indirect)
20087 return false;
20088
20089 // Do not tail call opt if either caller or callee uses struct return
20090 // semantics.
20091 auto IsCallerStructRet = Caller.hasStructRetAttr();
20092 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20093 if (IsCallerStructRet || IsCalleeStructRet)
20094 return false;
20095
20096 // The callee has to preserve all registers the caller needs to preserve.
20097 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20098 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20099 if (CalleeCC != CallerCC) {
20100 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20101 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20102 return false;
20103 }
20104
20105 // Byval parameters hand the function a pointer directly into the stack area
20106 // we want to reuse during a tail call. Working around this *is* possible
20107 // but less efficient and uglier in LowerCall.
20108 for (auto &Arg : Outs)
20109 if (Arg.Flags.isByVal())
20110 return false;
20111
20112 return true;
20113}
20114
20116 return DAG.getDataLayout().getPrefTypeAlign(
20117 VT.getTypeForEVT(*DAG.getContext()));
20118}
20119
20120// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20121// and output parameter nodes.
20123 SmallVectorImpl<SDValue> &InVals) const {
20124 SelectionDAG &DAG = CLI.DAG;
20125 SDLoc &DL = CLI.DL;
20127 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20129 SDValue Chain = CLI.Chain;
20130 SDValue Callee = CLI.Callee;
20131 bool &IsTailCall = CLI.IsTailCall;
20132 CallingConv::ID CallConv = CLI.CallConv;
20133 bool IsVarArg = CLI.IsVarArg;
20134 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20135 MVT XLenVT = Subtarget.getXLenVT();
20136
20138
20139 // Analyze the operands of the call, assigning locations to each operand.
20141 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20142
20143 if (CallConv == CallingConv::GHC) {
20144 if (Subtarget.hasStdExtE())
20145 report_fatal_error("GHC calling convention is not supported on RVE!");
20146 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20147 } else
20148 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20150 : CC_RISCV);
20151
20152 // Check if it's really possible to do a tail call.
20153 if (IsTailCall)
20154 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20155
20156 if (IsTailCall)
20157 ++NumTailCalls;
20158 else if (CLI.CB && CLI.CB->isMustTailCall())
20159 report_fatal_error("failed to perform tail call elimination on a call "
20160 "site marked musttail");
20161
20162 // Get a count of how many bytes are to be pushed on the stack.
20163 unsigned NumBytes = ArgCCInfo.getStackSize();
20164
20165 // Create local copies for byval args
20166 SmallVector<SDValue, 8> ByValArgs;
20167 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20168 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20169 if (!Flags.isByVal())
20170 continue;
20171
20172 SDValue Arg = OutVals[i];
20173 unsigned Size = Flags.getByValSize();
20174 Align Alignment = Flags.getNonZeroByValAlign();
20175
20176 int FI =
20177 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20178 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20179 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20180
20181 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20182 /*IsVolatile=*/false,
20183 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20185 ByValArgs.push_back(FIPtr);
20186 }
20187
20188 if (!IsTailCall)
20189 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20190
20191 // Copy argument values to their designated locations.
20193 SmallVector<SDValue, 8> MemOpChains;
20194 SDValue StackPtr;
20195 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20196 ++i, ++OutIdx) {
20197 CCValAssign &VA = ArgLocs[i];
20198 SDValue ArgValue = OutVals[OutIdx];
20199 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20200
20201 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20202 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20203 assert(VA.isRegLoc() && "Expected register VA assignment");
20204 assert(VA.needsCustom());
20205 SDValue SplitF64 = DAG.getNode(
20206 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20207 SDValue Lo = SplitF64.getValue(0);
20208 SDValue Hi = SplitF64.getValue(1);
20209
20210 Register RegLo = VA.getLocReg();
20211 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20212
20213 // Get the CCValAssign for the Hi part.
20214 CCValAssign &HiVA = ArgLocs[++i];
20215
20216 if (HiVA.isMemLoc()) {
20217 // Second half of f64 is passed on the stack.
20218 if (!StackPtr.getNode())
20219 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20221 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20222 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20223 // Emit the store.
20224 MemOpChains.push_back(DAG.getStore(
20225 Chain, DL, Hi, Address,
20227 } else {
20228 // Second half of f64 is passed in another GPR.
20229 Register RegHigh = HiVA.getLocReg();
20230 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20231 }
20232 continue;
20233 }
20234
20235 // Promote the value if needed.
20236 // For now, only handle fully promoted and indirect arguments.
20237 if (VA.getLocInfo() == CCValAssign::Indirect) {
20238 // Store the argument in a stack slot and pass its address.
20239 Align StackAlign =
20240 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20241 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20242 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20243 // If the original argument was split (e.g. i128), we need
20244 // to store the required parts of it here (and pass just one address).
20245 // Vectors may be partly split to registers and partly to the stack, in
20246 // which case the base address is partly offset and subsequent stores are
20247 // relative to that.
20248 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20249 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20250 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20251 // Calculate the total size to store. We don't have access to what we're
20252 // actually storing other than performing the loop and collecting the
20253 // info.
20255 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20256 SDValue PartValue = OutVals[OutIdx + 1];
20257 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20258 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20259 EVT PartVT = PartValue.getValueType();
20260 if (PartVT.isScalableVector())
20261 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20262 StoredSize += PartVT.getStoreSize();
20263 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20264 Parts.push_back(std::make_pair(PartValue, Offset));
20265 ++i;
20266 ++OutIdx;
20267 }
20268 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20269 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20270 MemOpChains.push_back(
20271 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20273 for (const auto &Part : Parts) {
20274 SDValue PartValue = Part.first;
20275 SDValue PartOffset = Part.second;
20277 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20278 MemOpChains.push_back(
20279 DAG.getStore(Chain, DL, PartValue, Address,
20281 }
20282 ArgValue = SpillSlot;
20283 } else {
20284 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20285 }
20286
20287 // Use local copy if it is a byval arg.
20288 if (Flags.isByVal())
20289 ArgValue = ByValArgs[j++];
20290
20291 if (VA.isRegLoc()) {
20292 // Queue up the argument copies and emit them at the end.
20293 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20294 } else {
20295 assert(VA.isMemLoc() && "Argument not register or memory");
20296 assert(!IsTailCall && "Tail call not allowed if stack is used "
20297 "for passing parameters");
20298
20299 // Work out the address of the stack slot.
20300 if (!StackPtr.getNode())
20301 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20303 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20305
20306 // Emit the store.
20307 MemOpChains.push_back(
20308 DAG.getStore(Chain, DL, ArgValue, Address,
20310 }
20311 }
20312
20313 // Join the stores, which are independent of one another.
20314 if (!MemOpChains.empty())
20315 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20316
20317 SDValue Glue;
20318
20319 // Build a sequence of copy-to-reg nodes, chained and glued together.
20320 for (auto &Reg : RegsToPass) {
20321 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20322 Glue = Chain.getValue(1);
20323 }
20324
20325 // Validate that none of the argument registers have been marked as
20326 // reserved, if so report an error. Do the same for the return address if this
20327 // is not a tailcall.
20328 validateCCReservedRegs(RegsToPass, MF);
20329 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20331 MF.getFunction(),
20332 "Return address register required, but has been reserved."});
20333
20334 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20335 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20336 // split it and then direct call can be matched by PseudoCALL.
20337 bool CalleeIsLargeExternalSymbol = false;
20339 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20340 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20341 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20342 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20343 CalleeIsLargeExternalSymbol = true;
20344 }
20345 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20346 const GlobalValue *GV = S->getGlobal();
20347 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20348 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20349 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20350 }
20351
20352 // The first call operand is the chain and the second is the target address.
20354 Ops.push_back(Chain);
20355 Ops.push_back(Callee);
20356
20357 // Add argument registers to the end of the list so that they are
20358 // known live into the call.
20359 for (auto &Reg : RegsToPass)
20360 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20361
20362 // Add a register mask operand representing the call-preserved registers.
20363 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20364 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20365 assert(Mask && "Missing call preserved mask for calling convention");
20366 Ops.push_back(DAG.getRegisterMask(Mask));
20367
20368 // Glue the call to the argument copies, if any.
20369 if (Glue.getNode())
20370 Ops.push_back(Glue);
20371
20372 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20373 "Unexpected CFI type for a direct call");
20374
20375 // Emit the call.
20376 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20377
20378 // Use software guarded branch for large code model non-indirect calls
20379 // Tail call to external symbol will have a null CLI.CB and we need another
20380 // way to determine the callsite type
20381 bool NeedSWGuarded = false;
20383 Subtarget.hasStdExtZicfilp() &&
20384 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20385 NeedSWGuarded = true;
20386
20387 if (IsTailCall) {
20389 unsigned CallOpc =
20390 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20391 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20392 if (CLI.CFIType)
20393 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20394 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20395 return Ret;
20396 }
20397
20398 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20399 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20400 if (CLI.CFIType)
20401 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20402 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20403 Glue = Chain.getValue(1);
20404
20405 // Mark the end of the call, which is glued to the call itself.
20406 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20407 Glue = Chain.getValue(1);
20408
20409 // Assign locations to each value returned by this call.
20411 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20412 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20413
20414 // Copy all of the result registers out of their specified physreg.
20415 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20416 auto &VA = RVLocs[i];
20417 // Copy the value out
20418 SDValue RetValue =
20419 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20420 // Glue the RetValue to the end of the call sequence
20421 Chain = RetValue.getValue(1);
20422 Glue = RetValue.getValue(2);
20423
20424 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20425 assert(VA.needsCustom());
20426 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20427 MVT::i32, Glue);
20428 Chain = RetValue2.getValue(1);
20429 Glue = RetValue2.getValue(2);
20430 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20431 RetValue2);
20432 } else
20433 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20434
20435 InVals.push_back(RetValue);
20436 }
20437
20438 return Chain;
20439}
20440
20442 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20443 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20445 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20446
20447 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20448 MVT VT = Outs[i].VT;
20449 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20450 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20451 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20452 return false;
20453 }
20454 return true;
20455}
20456
20457SDValue
20459 bool IsVarArg,
20461 const SmallVectorImpl<SDValue> &OutVals,
20462 const SDLoc &DL, SelectionDAG &DAG) const {
20464 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20465
20466 // Stores the assignment of the return value to a location.
20468
20469 // Info about the registers and stack slot.
20470 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20471 *DAG.getContext());
20472
20473 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20474 nullptr, CC_RISCV);
20475
20476 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20477 report_fatal_error("GHC functions return void only");
20478
20479 SDValue Glue;
20480 SmallVector<SDValue, 4> RetOps(1, Chain);
20481
20482 // Copy the result values into the output registers.
20483 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20484 SDValue Val = OutVals[OutIdx];
20485 CCValAssign &VA = RVLocs[i];
20486 assert(VA.isRegLoc() && "Can only return in registers!");
20487
20488 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20489 // Handle returning f64 on RV32D with a soft float ABI.
20490 assert(VA.isRegLoc() && "Expected return via registers");
20491 assert(VA.needsCustom());
20492 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20493 DAG.getVTList(MVT::i32, MVT::i32), Val);
20494 SDValue Lo = SplitF64.getValue(0);
20495 SDValue Hi = SplitF64.getValue(1);
20496 Register RegLo = VA.getLocReg();
20497 Register RegHi = RVLocs[++i].getLocReg();
20498
20499 if (STI.isRegisterReservedByUser(RegLo) ||
20500 STI.isRegisterReservedByUser(RegHi))
20502 MF.getFunction(),
20503 "Return value register required, but has been reserved."});
20504
20505 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20506 Glue = Chain.getValue(1);
20507 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20508 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20509 Glue = Chain.getValue(1);
20510 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20511 } else {
20512 // Handle a 'normal' return.
20513 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20514 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20515
20516 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20518 MF.getFunction(),
20519 "Return value register required, but has been reserved."});
20520
20521 // Guarantee that all emitted copies are stuck together.
20522 Glue = Chain.getValue(1);
20523 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20524 }
20525 }
20526
20527 RetOps[0] = Chain; // Update chain.
20528
20529 // Add the glue node if we have it.
20530 if (Glue.getNode()) {
20531 RetOps.push_back(Glue);
20532 }
20533
20534 if (any_of(RVLocs,
20535 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20536 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20537
20538 unsigned RetOpc = RISCVISD::RET_GLUE;
20539 // Interrupt service routines use different return instructions.
20540 const Function &Func = DAG.getMachineFunction().getFunction();
20541 if (Func.hasFnAttribute("interrupt")) {
20542 if (!Func.getReturnType()->isVoidTy())
20544 "Functions with the interrupt attribute must have void return type!");
20545
20547 StringRef Kind =
20548 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20549
20550 if (Kind == "supervisor")
20551 RetOpc = RISCVISD::SRET_GLUE;
20552 else
20553 RetOpc = RISCVISD::MRET_GLUE;
20554 }
20555
20556 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20557}
20558
20559void RISCVTargetLowering::validateCCReservedRegs(
20560 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20561 MachineFunction &MF) const {
20562 const Function &F = MF.getFunction();
20563 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20564
20565 if (llvm::any_of(Regs, [&STI](auto Reg) {
20566 return STI.isRegisterReservedByUser(Reg.first);
20567 }))
20568 F.getContext().diagnose(DiagnosticInfoUnsupported{
20569 F, "Argument register required, but has been reserved."});
20570}
20571
20572// Check if the result of the node is only used as a return value, as
20573// otherwise we can't perform a tail-call.
20575 if (N->getNumValues() != 1)
20576 return false;
20577 if (!N->hasNUsesOfValue(1, 0))
20578 return false;
20579
20580 SDNode *Copy = *N->user_begin();
20581
20582 if (Copy->getOpcode() == ISD::BITCAST) {
20583 return isUsedByReturnOnly(Copy, Chain);
20584 }
20585
20586 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20587 // with soft float ABIs.
20588 if (Copy->getOpcode() != ISD::CopyToReg) {
20589 return false;
20590 }
20591
20592 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20593 // isn't safe to perform a tail call.
20594 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20595 return false;
20596
20597 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20598 bool HasRet = false;
20599 for (SDNode *Node : Copy->users()) {
20600 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20601 return false;
20602 HasRet = true;
20603 }
20604 if (!HasRet)
20605 return false;
20606
20607 Chain = Copy->getOperand(0);
20608 return true;
20609}
20610
20612 return CI->isTailCall();
20613}
20614
20615const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20616#define NODE_NAME_CASE(NODE) \
20617 case RISCVISD::NODE: \
20618 return "RISCVISD::" #NODE;
20619 // clang-format off
20620 switch ((RISCVISD::NodeType)Opcode) {
20622 break;
20623 NODE_NAME_CASE(RET_GLUE)
20624 NODE_NAME_CASE(SRET_GLUE)
20625 NODE_NAME_CASE(MRET_GLUE)
20626 NODE_NAME_CASE(CALL)
20627 NODE_NAME_CASE(TAIL)
20628 NODE_NAME_CASE(SELECT_CC)
20629 NODE_NAME_CASE(BR_CC)
20630 NODE_NAME_CASE(BuildGPRPair)
20631 NODE_NAME_CASE(SplitGPRPair)
20632 NODE_NAME_CASE(BuildPairF64)
20633 NODE_NAME_CASE(SplitF64)
20634 NODE_NAME_CASE(ADD_LO)
20635 NODE_NAME_CASE(HI)
20636 NODE_NAME_CASE(LLA)
20637 NODE_NAME_CASE(ADD_TPREL)
20638 NODE_NAME_CASE(MULHSU)
20639 NODE_NAME_CASE(SHL_ADD)
20640 NODE_NAME_CASE(SLLW)
20641 NODE_NAME_CASE(SRAW)
20642 NODE_NAME_CASE(SRLW)
20643 NODE_NAME_CASE(DIVW)
20644 NODE_NAME_CASE(DIVUW)
20645 NODE_NAME_CASE(REMUW)
20646 NODE_NAME_CASE(ROLW)
20647 NODE_NAME_CASE(RORW)
20648 NODE_NAME_CASE(CLZW)
20649 NODE_NAME_CASE(CTZW)
20650 NODE_NAME_CASE(ABSW)
20651 NODE_NAME_CASE(FMV_H_X)
20652 NODE_NAME_CASE(FMV_X_ANYEXTH)
20653 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20654 NODE_NAME_CASE(FMV_W_X_RV64)
20655 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20656 NODE_NAME_CASE(FCVT_X)
20657 NODE_NAME_CASE(FCVT_XU)
20658 NODE_NAME_CASE(FCVT_W_RV64)
20659 NODE_NAME_CASE(FCVT_WU_RV64)
20660 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20661 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20662 NODE_NAME_CASE(FROUND)
20663 NODE_NAME_CASE(FCLASS)
20664 NODE_NAME_CASE(FSGNJX)
20665 NODE_NAME_CASE(FMAX)
20666 NODE_NAME_CASE(FMIN)
20667 NODE_NAME_CASE(FLI)
20668 NODE_NAME_CASE(READ_COUNTER_WIDE)
20669 NODE_NAME_CASE(BREV8)
20670 NODE_NAME_CASE(ORC_B)
20671 NODE_NAME_CASE(ZIP)
20672 NODE_NAME_CASE(UNZIP)
20673 NODE_NAME_CASE(CLMUL)
20674 NODE_NAME_CASE(CLMULH)
20675 NODE_NAME_CASE(CLMULR)
20676 NODE_NAME_CASE(MOPR)
20677 NODE_NAME_CASE(MOPRR)
20678 NODE_NAME_CASE(SHA256SIG0)
20679 NODE_NAME_CASE(SHA256SIG1)
20680 NODE_NAME_CASE(SHA256SUM0)
20681 NODE_NAME_CASE(SHA256SUM1)
20682 NODE_NAME_CASE(SM4KS)
20683 NODE_NAME_CASE(SM4ED)
20684 NODE_NAME_CASE(SM3P0)
20685 NODE_NAME_CASE(SM3P1)
20686 NODE_NAME_CASE(TH_LWD)
20687 NODE_NAME_CASE(TH_LWUD)
20688 NODE_NAME_CASE(TH_LDD)
20689 NODE_NAME_CASE(TH_SWD)
20690 NODE_NAME_CASE(TH_SDD)
20691 NODE_NAME_CASE(VMV_V_V_VL)
20692 NODE_NAME_CASE(VMV_V_X_VL)
20693 NODE_NAME_CASE(VFMV_V_F_VL)
20694 NODE_NAME_CASE(VMV_X_S)
20695 NODE_NAME_CASE(VMV_S_X_VL)
20696 NODE_NAME_CASE(VFMV_S_F_VL)
20697 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20698 NODE_NAME_CASE(READ_VLENB)
20699 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20700 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20701 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20702 NODE_NAME_CASE(VSLIDEUP_VL)
20703 NODE_NAME_CASE(VSLIDE1UP_VL)
20704 NODE_NAME_CASE(VSLIDEDOWN_VL)
20705 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20706 NODE_NAME_CASE(VFSLIDE1UP_VL)
20707 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20708 NODE_NAME_CASE(VID_VL)
20709 NODE_NAME_CASE(VFNCVT_ROD_VL)
20710 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20711 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20712 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20713 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20714 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20715 NODE_NAME_CASE(VECREDUCE_AND_VL)
20716 NODE_NAME_CASE(VECREDUCE_OR_VL)
20717 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20718 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20719 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20720 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20721 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20722 NODE_NAME_CASE(ADD_VL)
20723 NODE_NAME_CASE(AND_VL)
20724 NODE_NAME_CASE(MUL_VL)
20725 NODE_NAME_CASE(OR_VL)
20726 NODE_NAME_CASE(SDIV_VL)
20727 NODE_NAME_CASE(SHL_VL)
20728 NODE_NAME_CASE(SREM_VL)
20729 NODE_NAME_CASE(SRA_VL)
20730 NODE_NAME_CASE(SRL_VL)
20731 NODE_NAME_CASE(ROTL_VL)
20732 NODE_NAME_CASE(ROTR_VL)
20733 NODE_NAME_CASE(SUB_VL)
20734 NODE_NAME_CASE(UDIV_VL)
20735 NODE_NAME_CASE(UREM_VL)
20736 NODE_NAME_CASE(XOR_VL)
20737 NODE_NAME_CASE(AVGFLOORS_VL)
20738 NODE_NAME_CASE(AVGFLOORU_VL)
20739 NODE_NAME_CASE(AVGCEILS_VL)
20740 NODE_NAME_CASE(AVGCEILU_VL)
20741 NODE_NAME_CASE(SADDSAT_VL)
20742 NODE_NAME_CASE(UADDSAT_VL)
20743 NODE_NAME_CASE(SSUBSAT_VL)
20744 NODE_NAME_CASE(USUBSAT_VL)
20745 NODE_NAME_CASE(FADD_VL)
20746 NODE_NAME_CASE(FSUB_VL)
20747 NODE_NAME_CASE(FMUL_VL)
20748 NODE_NAME_CASE(FDIV_VL)
20749 NODE_NAME_CASE(FNEG_VL)
20750 NODE_NAME_CASE(FABS_VL)
20751 NODE_NAME_CASE(FSQRT_VL)
20752 NODE_NAME_CASE(FCLASS_VL)
20753 NODE_NAME_CASE(VFMADD_VL)
20754 NODE_NAME_CASE(VFNMADD_VL)
20755 NODE_NAME_CASE(VFMSUB_VL)
20756 NODE_NAME_CASE(VFNMSUB_VL)
20757 NODE_NAME_CASE(VFWMADD_VL)
20758 NODE_NAME_CASE(VFWNMADD_VL)
20759 NODE_NAME_CASE(VFWMSUB_VL)
20760 NODE_NAME_CASE(VFWNMSUB_VL)
20761 NODE_NAME_CASE(FCOPYSIGN_VL)
20762 NODE_NAME_CASE(SMIN_VL)
20763 NODE_NAME_CASE(SMAX_VL)
20764 NODE_NAME_CASE(UMIN_VL)
20765 NODE_NAME_CASE(UMAX_VL)
20766 NODE_NAME_CASE(BITREVERSE_VL)
20767 NODE_NAME_CASE(BSWAP_VL)
20768 NODE_NAME_CASE(CTLZ_VL)
20769 NODE_NAME_CASE(CTTZ_VL)
20770 NODE_NAME_CASE(CTPOP_VL)
20771 NODE_NAME_CASE(VFMIN_VL)
20772 NODE_NAME_CASE(VFMAX_VL)
20773 NODE_NAME_CASE(MULHS_VL)
20774 NODE_NAME_CASE(MULHU_VL)
20775 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20776 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20777 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20778 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20779 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20780 NODE_NAME_CASE(SINT_TO_FP_VL)
20781 NODE_NAME_CASE(UINT_TO_FP_VL)
20782 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20783 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20784 NODE_NAME_CASE(FP_EXTEND_VL)
20785 NODE_NAME_CASE(FP_ROUND_VL)
20786 NODE_NAME_CASE(STRICT_FADD_VL)
20787 NODE_NAME_CASE(STRICT_FSUB_VL)
20788 NODE_NAME_CASE(STRICT_FMUL_VL)
20789 NODE_NAME_CASE(STRICT_FDIV_VL)
20790 NODE_NAME_CASE(STRICT_FSQRT_VL)
20791 NODE_NAME_CASE(STRICT_VFMADD_VL)
20792 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20793 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20794 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20795 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20796 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20797 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20798 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20799 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20800 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20801 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20802 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20803 NODE_NAME_CASE(STRICT_FSETCC_VL)
20804 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20805 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20806 NODE_NAME_CASE(VWMUL_VL)
20807 NODE_NAME_CASE(VWMULU_VL)
20808 NODE_NAME_CASE(VWMULSU_VL)
20809 NODE_NAME_CASE(VWADD_VL)
20810 NODE_NAME_CASE(VWADDU_VL)
20811 NODE_NAME_CASE(VWSUB_VL)
20812 NODE_NAME_CASE(VWSUBU_VL)
20813 NODE_NAME_CASE(VWADD_W_VL)
20814 NODE_NAME_CASE(VWADDU_W_VL)
20815 NODE_NAME_CASE(VWSUB_W_VL)
20816 NODE_NAME_CASE(VWSUBU_W_VL)
20817 NODE_NAME_CASE(VWSLL_VL)
20818 NODE_NAME_CASE(VFWMUL_VL)
20819 NODE_NAME_CASE(VFWADD_VL)
20820 NODE_NAME_CASE(VFWSUB_VL)
20821 NODE_NAME_CASE(VFWADD_W_VL)
20822 NODE_NAME_CASE(VFWSUB_W_VL)
20823 NODE_NAME_CASE(VWMACC_VL)
20824 NODE_NAME_CASE(VWMACCU_VL)
20825 NODE_NAME_CASE(VWMACCSU_VL)
20826 NODE_NAME_CASE(SETCC_VL)
20827 NODE_NAME_CASE(VMERGE_VL)
20828 NODE_NAME_CASE(VMAND_VL)
20829 NODE_NAME_CASE(VMOR_VL)
20830 NODE_NAME_CASE(VMXOR_VL)
20831 NODE_NAME_CASE(VMCLR_VL)
20832 NODE_NAME_CASE(VMSET_VL)
20833 NODE_NAME_CASE(VRGATHER_VX_VL)
20834 NODE_NAME_CASE(VRGATHER_VV_VL)
20835 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20836 NODE_NAME_CASE(VSEXT_VL)
20837 NODE_NAME_CASE(VZEXT_VL)
20838 NODE_NAME_CASE(VCPOP_VL)
20839 NODE_NAME_CASE(VFIRST_VL)
20840 NODE_NAME_CASE(READ_CSR)
20841 NODE_NAME_CASE(WRITE_CSR)
20842 NODE_NAME_CASE(SWAP_CSR)
20843 NODE_NAME_CASE(CZERO_EQZ)
20844 NODE_NAME_CASE(CZERO_NEZ)
20845 NODE_NAME_CASE(SW_GUARDED_BRIND)
20846 NODE_NAME_CASE(SW_GUARDED_CALL)
20847 NODE_NAME_CASE(SW_GUARDED_TAIL)
20848 NODE_NAME_CASE(TUPLE_INSERT)
20849 NODE_NAME_CASE(TUPLE_EXTRACT)
20850 NODE_NAME_CASE(SF_VC_XV_SE)
20851 NODE_NAME_CASE(SF_VC_IV_SE)
20852 NODE_NAME_CASE(SF_VC_VV_SE)
20853 NODE_NAME_CASE(SF_VC_FV_SE)
20854 NODE_NAME_CASE(SF_VC_XVV_SE)
20855 NODE_NAME_CASE(SF_VC_IVV_SE)
20856 NODE_NAME_CASE(SF_VC_VVV_SE)
20857 NODE_NAME_CASE(SF_VC_FVV_SE)
20858 NODE_NAME_CASE(SF_VC_XVW_SE)
20859 NODE_NAME_CASE(SF_VC_IVW_SE)
20860 NODE_NAME_CASE(SF_VC_VVW_SE)
20861 NODE_NAME_CASE(SF_VC_FVW_SE)
20862 NODE_NAME_CASE(SF_VC_V_X_SE)
20863 NODE_NAME_CASE(SF_VC_V_I_SE)
20864 NODE_NAME_CASE(SF_VC_V_XV_SE)
20865 NODE_NAME_CASE(SF_VC_V_IV_SE)
20866 NODE_NAME_CASE(SF_VC_V_VV_SE)
20867 NODE_NAME_CASE(SF_VC_V_FV_SE)
20868 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20869 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20870 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20871 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20872 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20873 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20874 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20875 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20876 }
20877 // clang-format on
20878 return nullptr;
20879#undef NODE_NAME_CASE
20880}
20881
20882/// getConstraintType - Given a constraint letter, return the type of
20883/// constraint it is for this target.
20886 if (Constraint.size() == 1) {
20887 switch (Constraint[0]) {
20888 default:
20889 break;
20890 case 'f':
20891 case 'R':
20892 return C_RegisterClass;
20893 case 'I':
20894 case 'J':
20895 case 'K':
20896 return C_Immediate;
20897 case 'A':
20898 return C_Memory;
20899 case 's':
20900 case 'S': // A symbolic address
20901 return C_Other;
20902 }
20903 } else {
20904 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
20905 return C_RegisterClass;
20906 if (Constraint == "cr" || Constraint == "cf")
20907 return C_RegisterClass;
20908 }
20909 return TargetLowering::getConstraintType(Constraint);
20910}
20911
20912std::pair<unsigned, const TargetRegisterClass *>
20914 StringRef Constraint,
20915 MVT VT) const {
20916 // First, see if this is a constraint that directly corresponds to a RISC-V
20917 // register class.
20918 if (Constraint.size() == 1) {
20919 switch (Constraint[0]) {
20920 case 'r':
20921 // TODO: Support fixed vectors up to XLen for P extension?
20922 if (VT.isVector())
20923 break;
20924 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20925 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20926 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20927 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20928 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20929 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20930 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20931 case 'f':
20932 if (VT == MVT::f16) {
20933 if (Subtarget.hasStdExtZfhmin())
20934 return std::make_pair(0U, &RISCV::FPR16RegClass);
20935 if (Subtarget.hasStdExtZhinxmin())
20936 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20937 } else if (VT == MVT::f32) {
20938 if (Subtarget.hasStdExtF())
20939 return std::make_pair(0U, &RISCV::FPR32RegClass);
20940 if (Subtarget.hasStdExtZfinx())
20941 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20942 } else if (VT == MVT::f64) {
20943 if (Subtarget.hasStdExtD())
20944 return std::make_pair(0U, &RISCV::FPR64RegClass);
20945 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20946 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20947 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20948 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20949 }
20950 break;
20951 case 'R':
20952 if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
20953 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20954 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20955 default:
20956 break;
20957 }
20958 } else if (Constraint == "vr") {
20959 for (const auto *RC :
20960 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
20961 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
20962 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
20963 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
20964 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
20965 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
20966 &RISCV::VRN2M4RegClass}) {
20967 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20968 return std::make_pair(0U, RC);
20969 }
20970 } else if (Constraint == "vd") {
20971 for (const auto *RC :
20972 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
20973 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
20974 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
20975 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
20976 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
20977 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
20978 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
20979 &RISCV::VRN2M4NoV0RegClass}) {
20980 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20981 return std::make_pair(0U, RC);
20982 }
20983 } else if (Constraint == "vm") {
20984 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20985 return std::make_pair(0U, &RISCV::VMV0RegClass);
20986 } else if (Constraint == "cr") {
20987 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20988 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20989 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20990 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20991 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20992 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20993 if (!VT.isVector())
20994 return std::make_pair(0U, &RISCV::GPRCRegClass);
20995 } else if (Constraint == "cf") {
20996 if (VT == MVT::f16) {
20997 if (Subtarget.hasStdExtZfhmin())
20998 return std::make_pair(0U, &RISCV::FPR16CRegClass);
20999 if (Subtarget.hasStdExtZhinxmin())
21000 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21001 } else if (VT == MVT::f32) {
21002 if (Subtarget.hasStdExtF())
21003 return std::make_pair(0U, &RISCV::FPR32CRegClass);
21004 if (Subtarget.hasStdExtZfinx())
21005 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21006 } else if (VT == MVT::f64) {
21007 if (Subtarget.hasStdExtD())
21008 return std::make_pair(0U, &RISCV::FPR64CRegClass);
21009 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21010 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21011 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21012 return std::make_pair(0U, &RISCV::GPRCRegClass);
21013 }
21014 }
21015
21016 // Clang will correctly decode the usage of register name aliases into their
21017 // official names. However, other frontends like `rustc` do not. This allows
21018 // users of these frontends to use the ABI names for registers in LLVM-style
21019 // register constraints.
21020 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
21021 .Case("{zero}", RISCV::X0)
21022 .Case("{ra}", RISCV::X1)
21023 .Case("{sp}", RISCV::X2)
21024 .Case("{gp}", RISCV::X3)
21025 .Case("{tp}", RISCV::X4)
21026 .Case("{t0}", RISCV::X5)
21027 .Case("{t1}", RISCV::X6)
21028 .Case("{t2}", RISCV::X7)
21029 .Cases("{s0}", "{fp}", RISCV::X8)
21030 .Case("{s1}", RISCV::X9)
21031 .Case("{a0}", RISCV::X10)
21032 .Case("{a1}", RISCV::X11)
21033 .Case("{a2}", RISCV::X12)
21034 .Case("{a3}", RISCV::X13)
21035 .Case("{a4}", RISCV::X14)
21036 .Case("{a5}", RISCV::X15)
21037 .Case("{a6}", RISCV::X16)
21038 .Case("{a7}", RISCV::X17)
21039 .Case("{s2}", RISCV::X18)
21040 .Case("{s3}", RISCV::X19)
21041 .Case("{s4}", RISCV::X20)
21042 .Case("{s5}", RISCV::X21)
21043 .Case("{s6}", RISCV::X22)
21044 .Case("{s7}", RISCV::X23)
21045 .Case("{s8}", RISCV::X24)
21046 .Case("{s9}", RISCV::X25)
21047 .Case("{s10}", RISCV::X26)
21048 .Case("{s11}", RISCV::X27)
21049 .Case("{t3}", RISCV::X28)
21050 .Case("{t4}", RISCV::X29)
21051 .Case("{t5}", RISCV::X30)
21052 .Case("{t6}", RISCV::X31)
21053 .Default(RISCV::NoRegister);
21054 if (XRegFromAlias != RISCV::NoRegister)
21055 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21056
21057 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21058 // TableGen record rather than the AsmName to choose registers for InlineAsm
21059 // constraints, plus we want to match those names to the widest floating point
21060 // register type available, manually select floating point registers here.
21061 //
21062 // The second case is the ABI name of the register, so that frontends can also
21063 // use the ABI names in register constraint lists.
21064 if (Subtarget.hasStdExtF()) {
21065 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21066 .Cases("{f0}", "{ft0}", RISCV::F0_F)
21067 .Cases("{f1}", "{ft1}", RISCV::F1_F)
21068 .Cases("{f2}", "{ft2}", RISCV::F2_F)
21069 .Cases("{f3}", "{ft3}", RISCV::F3_F)
21070 .Cases("{f4}", "{ft4}", RISCV::F4_F)
21071 .Cases("{f5}", "{ft5}", RISCV::F5_F)
21072 .Cases("{f6}", "{ft6}", RISCV::F6_F)
21073 .Cases("{f7}", "{ft7}", RISCV::F7_F)
21074 .Cases("{f8}", "{fs0}", RISCV::F8_F)
21075 .Cases("{f9}", "{fs1}", RISCV::F9_F)
21076 .Cases("{f10}", "{fa0}", RISCV::F10_F)
21077 .Cases("{f11}", "{fa1}", RISCV::F11_F)
21078 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21079 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21080 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21081 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21082 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21083 .Cases("{f17}", "{fa7}", RISCV::F17_F)
21084 .Cases("{f18}", "{fs2}", RISCV::F18_F)
21085 .Cases("{f19}", "{fs3}", RISCV::F19_F)
21086 .Cases("{f20}", "{fs4}", RISCV::F20_F)
21087 .Cases("{f21}", "{fs5}", RISCV::F21_F)
21088 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21089 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21090 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21091 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21092 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21093 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21094 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21095 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21096 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21097 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21098 .Default(RISCV::NoRegister);
21099 if (FReg != RISCV::NoRegister) {
21100 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21101 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21102 unsigned RegNo = FReg - RISCV::F0_F;
21103 unsigned DReg = RISCV::F0_D + RegNo;
21104 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21105 }
21106 if (VT == MVT::f32 || VT == MVT::Other)
21107 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21108 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21109 unsigned RegNo = FReg - RISCV::F0_F;
21110 unsigned HReg = RISCV::F0_H + RegNo;
21111 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21112 }
21113 }
21114 }
21115
21116 if (Subtarget.hasVInstructions()) {
21117 Register VReg = StringSwitch<Register>(Constraint.lower())
21118 .Case("{v0}", RISCV::V0)
21119 .Case("{v1}", RISCV::V1)
21120 .Case("{v2}", RISCV::V2)
21121 .Case("{v3}", RISCV::V3)
21122 .Case("{v4}", RISCV::V4)
21123 .Case("{v5}", RISCV::V5)
21124 .Case("{v6}", RISCV::V6)
21125 .Case("{v7}", RISCV::V7)
21126 .Case("{v8}", RISCV::V8)
21127 .Case("{v9}", RISCV::V9)
21128 .Case("{v10}", RISCV::V10)
21129 .Case("{v11}", RISCV::V11)
21130 .Case("{v12}", RISCV::V12)
21131 .Case("{v13}", RISCV::V13)
21132 .Case("{v14}", RISCV::V14)
21133 .Case("{v15}", RISCV::V15)
21134 .Case("{v16}", RISCV::V16)
21135 .Case("{v17}", RISCV::V17)
21136 .Case("{v18}", RISCV::V18)
21137 .Case("{v19}", RISCV::V19)
21138 .Case("{v20}", RISCV::V20)
21139 .Case("{v21}", RISCV::V21)
21140 .Case("{v22}", RISCV::V22)
21141 .Case("{v23}", RISCV::V23)
21142 .Case("{v24}", RISCV::V24)
21143 .Case("{v25}", RISCV::V25)
21144 .Case("{v26}", RISCV::V26)
21145 .Case("{v27}", RISCV::V27)
21146 .Case("{v28}", RISCV::V28)
21147 .Case("{v29}", RISCV::V29)
21148 .Case("{v30}", RISCV::V30)
21149 .Case("{v31}", RISCV::V31)
21150 .Default(RISCV::NoRegister);
21151 if (VReg != RISCV::NoRegister) {
21152 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21153 return std::make_pair(VReg, &RISCV::VMRegClass);
21154 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21155 return std::make_pair(VReg, &RISCV::VRRegClass);
21156 for (const auto *RC :
21157 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21158 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21159 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21160 return std::make_pair(VReg, RC);
21161 }
21162 }
21163 }
21164 }
21165
21166 std::pair<Register, const TargetRegisterClass *> Res =
21168
21169 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21170 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21171 // Subtarget into account.
21172 if (Res.second == &RISCV::GPRF16RegClass ||
21173 Res.second == &RISCV::GPRF32RegClass ||
21174 Res.second == &RISCV::GPRPairRegClass)
21175 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21176
21177 return Res;
21178}
21179
21182 // Currently only support length 1 constraints.
21183 if (ConstraintCode.size() == 1) {
21184 switch (ConstraintCode[0]) {
21185 case 'A':
21187 default:
21188 break;
21189 }
21190 }
21191
21192 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21193}
21194
21196 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21197 SelectionDAG &DAG) const {
21198 // Currently only support length 1 constraints.
21199 if (Constraint.size() == 1) {
21200 switch (Constraint[0]) {
21201 case 'I':
21202 // Validate & create a 12-bit signed immediate operand.
21203 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21204 uint64_t CVal = C->getSExtValue();
21205 if (isInt<12>(CVal))
21206 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21207 Subtarget.getXLenVT()));
21208 }
21209 return;
21210 case 'J':
21211 // Validate & create an integer zero operand.
21212 if (isNullConstant(Op))
21213 Ops.push_back(
21214 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21215 return;
21216 case 'K':
21217 // Validate & create a 5-bit unsigned immediate operand.
21218 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21219 uint64_t CVal = C->getZExtValue();
21220 if (isUInt<5>(CVal))
21221 Ops.push_back(
21222 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21223 }
21224 return;
21225 case 'S':
21227 return;
21228 default:
21229 break;
21230 }
21231 }
21232 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21233}
21234
21236 Instruction *Inst,
21237 AtomicOrdering Ord) const {
21238 if (Subtarget.hasStdExtZtso()) {
21239 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21240 return Builder.CreateFence(Ord);
21241 return nullptr;
21242 }
21243
21244 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21245 return Builder.CreateFence(Ord);
21246 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21247 return Builder.CreateFence(AtomicOrdering::Release);
21248 return nullptr;
21249}
21250
21252 Instruction *Inst,
21253 AtomicOrdering Ord) const {
21254 if (Subtarget.hasStdExtZtso()) {
21255 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21256 return Builder.CreateFence(Ord);
21257 return nullptr;
21258 }
21259
21260 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21261 return Builder.CreateFence(AtomicOrdering::Acquire);
21262 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21265 return nullptr;
21266}
21267
21270 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21271 // point operations can't be used in an lr/sc sequence without breaking the
21272 // forward-progress guarantee.
21273 if (AI->isFloatingPointOperation() ||
21279
21280 // Don't expand forced atomics, we want to have __sync libcalls instead.
21281 if (Subtarget.hasForcedAtomics())
21283
21284 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21285 if (AI->getOperation() == AtomicRMWInst::Nand) {
21286 if (Subtarget.hasStdExtZacas() &&
21287 (Size >= 32 || Subtarget.hasStdExtZabha()))
21289 if (Size < 32)
21291 }
21292
21293 if (Size < 32 && !Subtarget.hasStdExtZabha())
21295
21297}
21298
21299static Intrinsic::ID
21301 if (XLen == 32) {
21302 switch (BinOp) {
21303 default:
21304 llvm_unreachable("Unexpected AtomicRMW BinOp");
21306 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21307 case AtomicRMWInst::Add:
21308 return Intrinsic::riscv_masked_atomicrmw_add_i32;
21309 case AtomicRMWInst::Sub:
21310 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21312 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21313 case AtomicRMWInst::Max:
21314 return Intrinsic::riscv_masked_atomicrmw_max_i32;
21315 case AtomicRMWInst::Min:
21316 return Intrinsic::riscv_masked_atomicrmw_min_i32;
21318 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21320 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21321 }
21322 }
21323
21324 if (XLen == 64) {
21325 switch (BinOp) {
21326 default:
21327 llvm_unreachable("Unexpected AtomicRMW BinOp");
21329 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21330 case AtomicRMWInst::Add:
21331 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21332 case AtomicRMWInst::Sub:
21333 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21335 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21336 case AtomicRMWInst::Max:
21337 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21338 case AtomicRMWInst::Min:
21339 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21341 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21343 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21344 }
21345 }
21346
21347 llvm_unreachable("Unexpected XLen\n");
21348}
21349
21351 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21352 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21353 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21354 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21355 // mask, as this produces better code than the LR/SC loop emitted by
21356 // int_riscv_masked_atomicrmw_xchg.
21357 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21358 isa<ConstantInt>(AI->getValOperand())) {
21359 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21360 if (CVal->isZero())
21361 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21362 Builder.CreateNot(Mask, "Inv_Mask"),
21363 AI->getAlign(), Ord);
21364 if (CVal->isMinusOne())
21365 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21366 AI->getAlign(), Ord);
21367 }
21368
21369 unsigned XLen = Subtarget.getXLen();
21370 Value *Ordering =
21371 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21372 Type *Tys[] = {AlignedAddr->getType()};
21374 AI->getModule(),
21376
21377 if (XLen == 64) {
21378 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21379 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21380 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21381 }
21382
21383 Value *Result;
21384
21385 // Must pass the shift amount needed to sign extend the loaded value prior
21386 // to performing a signed comparison for min/max. ShiftAmt is the number of
21387 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21388 // is the number of bits to left+right shift the value in order to
21389 // sign-extend.
21390 if (AI->getOperation() == AtomicRMWInst::Min ||
21392 const DataLayout &DL = AI->getDataLayout();
21393 unsigned ValWidth =
21394 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21395 Value *SextShamt =
21396 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21397 Result = Builder.CreateCall(LrwOpScwLoop,
21398 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21399 } else {
21400 Result =
21401 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21402 }
21403
21404 if (XLen == 64)
21405 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21406 return Result;
21407}
21408
21411 AtomicCmpXchgInst *CI) const {
21412 // Don't expand forced atomics, we want to have __sync libcalls instead.
21413 if (Subtarget.hasForcedAtomics())
21415
21417 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21418 (Size == 8 || Size == 16))
21421}
21422
21424 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21425 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21426 unsigned XLen = Subtarget.getXLen();
21427 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21428 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21429 if (XLen == 64) {
21430 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21431 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21432 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21433 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21434 }
21435 Type *Tys[] = {AlignedAddr->getType()};
21436 Value *Result = Builder.CreateIntrinsic(
21437 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21438 if (XLen == 64)
21439 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21440 return Result;
21441}
21442
21444 EVT DataVT) const {
21445 // We have indexed loads for all supported EEW types. Indices are always
21446 // zero extended.
21447 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21448 isTypeLegal(Extend.getValueType()) &&
21449 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21450 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21451}
21452
21454 EVT VT) const {
21455 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21456 return false;
21457
21458 switch (FPVT.getSimpleVT().SimpleTy) {
21459 case MVT::f16:
21460 return Subtarget.hasStdExtZfhmin();
21461 case MVT::f32:
21462 return Subtarget.hasStdExtF();
21463 case MVT::f64:
21464 return Subtarget.hasStdExtD();
21465 default:
21466 return false;
21467 }
21468}
21469
21471 // If we are using the small code model, we can reduce size of jump table
21472 // entry to 4 bytes.
21473 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21476 }
21478}
21479
21481 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21482 unsigned uid, MCContext &Ctx) const {
21483 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21485 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21486}
21487
21489 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21490 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21491 // a power of two as well.
21492 // FIXME: This doesn't work for zve32, but that's already broken
21493 // elsewhere for the same reason.
21494 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21495 static_assert(RISCV::RVVBitsPerBlock == 64,
21496 "RVVBitsPerBlock changed, audit needed");
21497 return true;
21498}
21499
21501 SDValue &Offset,
21503 SelectionDAG &DAG) const {
21504 // Target does not support indexed loads.
21505 if (!Subtarget.hasVendorXTHeadMemIdx())
21506 return false;
21507
21508 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21509 return false;
21510
21511 Base = Op->getOperand(0);
21512 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21513 int64_t RHSC = RHS->getSExtValue();
21514 if (Op->getOpcode() == ISD::SUB)
21515 RHSC = -(uint64_t)RHSC;
21516
21517 // The constants that can be encoded in the THeadMemIdx instructions
21518 // are of the form (sign_extend(imm5) << imm2).
21519 bool isLegalIndexedOffset = false;
21520 for (unsigned i = 0; i < 4; i++)
21521 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21522 isLegalIndexedOffset = true;
21523 break;
21524 }
21525
21526 if (!isLegalIndexedOffset)
21527 return false;
21528
21529 Offset = Op->getOperand(1);
21530 return true;
21531 }
21532
21533 return false;
21534}
21535
21537 SDValue &Offset,
21539 SelectionDAG &DAG) const {
21540 EVT VT;
21541 SDValue Ptr;
21542 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21543 VT = LD->getMemoryVT();
21544 Ptr = LD->getBasePtr();
21545 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21546 VT = ST->getMemoryVT();
21547 Ptr = ST->getBasePtr();
21548 } else
21549 return false;
21550
21551 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21552 return false;
21553
21554 AM = ISD::PRE_INC;
21555 return true;
21556}
21557
21559 SDValue &Base,
21560 SDValue &Offset,
21562 SelectionDAG &DAG) const {
21563 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21564 if (Op->getOpcode() != ISD::ADD)
21565 return false;
21566
21567 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21568 Base = LS->getBasePtr();
21569 else
21570 return false;
21571
21572 if (Base == Op->getOperand(0))
21573 Offset = Op->getOperand(1);
21574 else if (Base == Op->getOperand(1))
21575 Offset = Op->getOperand(0);
21576 else
21577 return false;
21578
21579 AM = ISD::POST_INC;
21580 return true;
21581 }
21582
21583 EVT VT;
21584 SDValue Ptr;
21585 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21586 VT = LD->getMemoryVT();
21587 Ptr = LD->getBasePtr();
21588 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21589 VT = ST->getMemoryVT();
21590 Ptr = ST->getBasePtr();
21591 } else
21592 return false;
21593
21594 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21595 return false;
21596 // Post-indexing updates the base, so it's not a valid transform
21597 // if that's not the same as the load's pointer.
21598 if (Ptr != Base)
21599 return false;
21600
21601 AM = ISD::POST_INC;
21602 return true;
21603}
21604
21606 EVT VT) const {
21607 EVT SVT = VT.getScalarType();
21608
21609 if (!SVT.isSimple())
21610 return false;
21611
21612 switch (SVT.getSimpleVT().SimpleTy) {
21613 case MVT::f16:
21614 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21615 : Subtarget.hasStdExtZfhOrZhinx();
21616 case MVT::f32:
21617 return Subtarget.hasStdExtFOrZfinx();
21618 case MVT::f64:
21619 return Subtarget.hasStdExtDOrZdinx();
21620 default:
21621 break;
21622 }
21623
21624 return false;
21625}
21626
21628 // Zacas will use amocas.w which does not require extension.
21629 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21630}
21631
21633 const Constant *PersonalityFn) const {
21634 return RISCV::X10;
21635}
21636
21638 const Constant *PersonalityFn) const {
21639 return RISCV::X11;
21640}
21641
21643 // Return false to suppress the unnecessary extensions if the LibCall
21644 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21645 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21646 Type.getSizeInBits() < Subtarget.getXLen()))
21647 return false;
21648
21649 return true;
21650}
21651
21653 bool IsSigned) const {
21654 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21655 return true;
21656
21657 return IsSigned;
21658}
21659
21661 SDValue C) const {
21662 // Check integral scalar types.
21663 if (!VT.isScalarInteger())
21664 return false;
21665
21666 // Omit the optimization if the sub target has the M extension and the data
21667 // size exceeds XLen.
21668 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21669 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21670 return false;
21671
21672 auto *ConstNode = cast<ConstantSDNode>(C);
21673 const APInt &Imm = ConstNode->getAPIntValue();
21674
21675 // Break the MUL to a SLLI and an ADD/SUB.
21676 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21677 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21678 return true;
21679
21680 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21681 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21682 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21683 (Imm - 8).isPowerOf2()))
21684 return true;
21685
21686 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21687 // a pair of LUI/ADDI.
21688 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21689 ConstNode->hasOneUse()) {
21690 APInt ImmS = Imm.ashr(Imm.countr_zero());
21691 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21692 (1 - ImmS).isPowerOf2())
21693 return true;
21694 }
21695
21696 return false;
21697}
21698
21700 SDValue ConstNode) const {
21701 // Let the DAGCombiner decide for vectors.
21702 EVT VT = AddNode.getValueType();
21703 if (VT.isVector())
21704 return true;
21705
21706 // Let the DAGCombiner decide for larger types.
21707 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21708 return true;
21709
21710 // It is worse if c1 is simm12 while c1*c2 is not.
21711 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21712 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21713 const APInt &C1 = C1Node->getAPIntValue();
21714 const APInt &C2 = C2Node->getAPIntValue();
21715 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21716 return false;
21717
21718 // Default to true and let the DAGCombiner decide.
21719 return true;
21720}
21721
21723 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21724 unsigned *Fast) const {
21725 if (!VT.isVector()) {
21726 if (Fast)
21727 *Fast = Subtarget.enableUnalignedScalarMem();
21728 return Subtarget.enableUnalignedScalarMem();
21729 }
21730
21731 // All vector implementations must support element alignment
21732 EVT ElemVT = VT.getVectorElementType();
21733 if (Alignment >= ElemVT.getStoreSize()) {
21734 if (Fast)
21735 *Fast = 1;
21736 return true;
21737 }
21738
21739 // Note: We lower an unmasked unaligned vector access to an equally sized
21740 // e8 element type access. Given this, we effectively support all unmasked
21741 // misaligned accesses. TODO: Work through the codegen implications of
21742 // allowing such accesses to be formed, and considered fast.
21743 if (Fast)
21744 *Fast = Subtarget.enableUnalignedVectorMem();
21745 return Subtarget.enableUnalignedVectorMem();
21746}
21747
21748
21750 const AttributeList &FuncAttributes) const {
21751 if (!Subtarget.hasVInstructions())
21752 return MVT::Other;
21753
21754 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21755 return MVT::Other;
21756
21757 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21758 // has an expansion threshold, and we want the number of hardware memory
21759 // operations to correspond roughly to that threshold. LMUL>1 operations
21760 // are typically expanded linearly internally, and thus correspond to more
21761 // than one actual memory operation. Note that store merging and load
21762 // combining will typically form larger LMUL operations from the LMUL1
21763 // operations emitted here, and that's okay because combining isn't
21764 // introducing new memory operations; it's just merging existing ones.
21765 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21766 if (Op.size() < MinVLenInBytes)
21767 // TODO: Figure out short memops. For the moment, do the default thing
21768 // which ends up using scalar sequences.
21769 return MVT::Other;
21770
21771 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21772 // fixed vectors.
21773 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21774 return MVT::Other;
21775
21776 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21777 // a large scalar constant and instead use vmv.v.x/i to do the
21778 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21779 // maximize the chance we can encode the size in the vsetvli.
21780 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21781 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21782
21783 // Do we have sufficient alignment for our preferred VT? If not, revert
21784 // to largest size allowed by our alignment criteria.
21785 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21786 Align RequiredAlign(PreferredVT.getStoreSize());
21787 if (Op.isFixedDstAlign())
21788 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21789 if (Op.isMemcpy())
21790 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21791 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21792 }
21793 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21794}
21795
21797 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21798 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21799 bool IsABIRegCopy = CC.has_value();
21800 EVT ValueVT = Val.getValueType();
21801
21802 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21803 if ((ValueVT == PairVT ||
21804 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21805 ValueVT == MVT::f64)) &&
21806 NumParts == 1 && PartVT == MVT::Untyped) {
21807 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21808 MVT XLenVT = Subtarget.getXLenVT();
21809 if (ValueVT == MVT::f64)
21810 Val = DAG.getBitcast(MVT::i64, Val);
21811 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
21812 // Always creating an MVT::Untyped part, so always use
21813 // RISCVISD::BuildGPRPair.
21814 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
21815 return true;
21816 }
21817
21818 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21819 PartVT == MVT::f32) {
21820 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21821 // nan, and cast to f32.
21822 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21823 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21824 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21825 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21826 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21827 Parts[0] = Val;
21828 return true;
21829 }
21830
21831 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
21832#ifndef NDEBUG
21833 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
21834 [[maybe_unused]] unsigned ValLMUL =
21836 ValNF * RISCV::RVVBitsPerBlock);
21837 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
21838 [[maybe_unused]] unsigned PartLMUL =
21840 PartNF * RISCV::RVVBitsPerBlock);
21841 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
21842 "RISC-V vector tuple type only accepts same register class type "
21843 "TUPLE_INSERT");
21844#endif
21845
21846 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
21847 Val, DAG.getVectorIdxConstant(0, DL));
21848 Parts[0] = Val;
21849 return true;
21850 }
21851
21852 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21853 LLVMContext &Context = *DAG.getContext();
21854 EVT ValueEltVT = ValueVT.getVectorElementType();
21855 EVT PartEltVT = PartVT.getVectorElementType();
21856 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21857 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21858 if (PartVTBitSize % ValueVTBitSize == 0) {
21859 assert(PartVTBitSize >= ValueVTBitSize);
21860 // If the element types are different, bitcast to the same element type of
21861 // PartVT first.
21862 // Give an example here, we want copy a <vscale x 1 x i8> value to
21863 // <vscale x 4 x i16>.
21864 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21865 // subvector, then we can bitcast to <vscale x 4 x i16>.
21866 if (ValueEltVT != PartEltVT) {
21867 if (PartVTBitSize > ValueVTBitSize) {
21868 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21869 assert(Count != 0 && "The number of element should not be zero.");
21870 EVT SameEltTypeVT =
21871 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21872 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21873 DAG.getUNDEF(SameEltTypeVT), Val,
21874 DAG.getVectorIdxConstant(0, DL));
21875 }
21876 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21877 } else {
21878 Val =
21879 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21880 Val, DAG.getVectorIdxConstant(0, DL));
21881 }
21882 Parts[0] = Val;
21883 return true;
21884 }
21885 }
21886
21887 return false;
21888}
21889
21891 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21892 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21893 bool IsABIRegCopy = CC.has_value();
21894
21895 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21896 if ((ValueVT == PairVT ||
21897 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21898 ValueVT == MVT::f64)) &&
21899 NumParts == 1 && PartVT == MVT::Untyped) {
21900 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21901 MVT XLenVT = Subtarget.getXLenVT();
21902
21903 SDValue Val = Parts[0];
21904 // Always starting with an MVT::Untyped part, so always use
21905 // RISCVISD::SplitGPRPair
21906 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
21907 Val);
21908 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
21909 Val.getValue(1));
21910 if (ValueVT == MVT::f64)
21911 Val = DAG.getBitcast(ValueVT, Val);
21912 return Val;
21913 }
21914
21915 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21916 PartVT == MVT::f32) {
21917 SDValue Val = Parts[0];
21918
21919 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21920 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21921 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21922 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21923 return Val;
21924 }
21925
21926 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21927 LLVMContext &Context = *DAG.getContext();
21928 SDValue Val = Parts[0];
21929 EVT ValueEltVT = ValueVT.getVectorElementType();
21930 EVT PartEltVT = PartVT.getVectorElementType();
21931 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21932 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21933 if (PartVTBitSize % ValueVTBitSize == 0) {
21934 assert(PartVTBitSize >= ValueVTBitSize);
21935 EVT SameEltTypeVT = ValueVT;
21936 // If the element types are different, convert it to the same element type
21937 // of PartVT.
21938 // Give an example here, we want copy a <vscale x 1 x i8> value from
21939 // <vscale x 4 x i16>.
21940 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21941 // then we can extract <vscale x 1 x i8>.
21942 if (ValueEltVT != PartEltVT) {
21943 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21944 assert(Count != 0 && "The number of element should not be zero.");
21945 SameEltTypeVT =
21946 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21947 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21948 }
21949 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21950 DAG.getVectorIdxConstant(0, DL));
21951 return Val;
21952 }
21953 }
21954 return SDValue();
21955}
21956
21958 // When aggressively optimizing for code size, we prefer to use a div
21959 // instruction, as it is usually smaller than the alternative sequence.
21960 // TODO: Add vector division?
21961 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21962 return OptSize && !VT.isVector();
21963}
21964
21966 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21967 // some situation.
21968 unsigned Opc = N->getOpcode();
21969 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21970 return false;
21971 return true;
21972}
21973
21974static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21975 Module *M = IRB.GetInsertBlock()->getModule();
21976 Function *ThreadPointerFunc =
21977 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
21978 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21979 IRB.CreateCall(ThreadPointerFunc), Offset);
21980}
21981
21983 // Fuchsia provides a fixed TLS slot for the stack cookie.
21984 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21985 if (Subtarget.isTargetFuchsia())
21986 return useTpOffset(IRB, -0x10);
21987
21988 // Android provides a fixed TLS slot for the stack cookie. See the definition
21989 // of TLS_SLOT_STACK_GUARD in
21990 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21991 if (Subtarget.isTargetAndroid())
21992 return useTpOffset(IRB, -0x18);
21993
21994 Module *M = IRB.GetInsertBlock()->getModule();
21995
21996 if (M->getStackProtectorGuard() == "tls") {
21997 // Users must specify the offset explicitly
21998 int Offset = M->getStackProtectorGuardOffset();
21999 return useTpOffset(IRB, Offset);
22000 }
22001
22003}
22004
22006 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
22007 const DataLayout &DL) const {
22008 EVT VT = getValueType(DL, VTy);
22009 // Don't lower vlseg/vsseg for vector types that can't be split.
22010 if (!isTypeLegal(VT))
22011 return false;
22012
22014 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
22015 Alignment))
22016 return false;
22017
22018 MVT ContainerVT = VT.getSimpleVT();
22019
22020 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22021 if (!Subtarget.useRVVForFixedLengthVectors())
22022 return false;
22023 // Sometimes the interleaved access pass picks up splats as interleaves of
22024 // one element. Don't lower these.
22025 if (FVTy->getNumElements() < 2)
22026 return false;
22027
22029 } else {
22030 // The intrinsics for scalable vectors are not overloaded on pointer type
22031 // and can only handle the default address space.
22032 if (AddrSpace)
22033 return false;
22034 }
22035
22036 // Need to make sure that EMUL * NFIELDS ≤ 8
22037 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22038 if (Fractional)
22039 return true;
22040 return Factor * LMUL <= 8;
22041}
22042
22044 Align Alignment) const {
22045 if (!Subtarget.hasVInstructions())
22046 return false;
22047
22048 // Only support fixed vectors if we know the minimum vector size.
22049 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22050 return false;
22051
22052 EVT ScalarType = DataType.getScalarType();
22053 if (!isLegalElementTypeForRVV(ScalarType))
22054 return false;
22055
22056 if (!Subtarget.enableUnalignedVectorMem() &&
22057 Alignment < ScalarType.getStoreSize())
22058 return false;
22059
22060 return true;
22061}
22062
22064 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22065 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22066 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22067 Intrinsic::riscv_seg8_load};
22068
22069/// Lower an interleaved load into a vlsegN intrinsic.
22070///
22071/// E.g. Lower an interleaved load (Factor = 2):
22072/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22073/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
22074/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
22075///
22076/// Into:
22077/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22078/// %ptr, i64 4)
22079/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22080/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22083 ArrayRef<unsigned> Indices, unsigned Factor) const {
22084 assert(Indices.size() == Shuffles.size());
22085
22086 IRBuilder<> Builder(LI);
22087
22088 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22089 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22091 LI->getDataLayout()))
22092 return false;
22093
22094 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22095
22096 // If the segment load is going to be performed segment at a time anyways
22097 // and there's only one element used, use a strided load instead. This
22098 // will be equally fast, and create less vector register pressure.
22099 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22100 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22101 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22102 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22103 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22104 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22105 Value *VL = Builder.getInt32(VTy->getNumElements());
22106
22107 CallInst *CI =
22108 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22109 {VTy, BasePtr->getType(), Stride->getType()},
22110 {BasePtr, Stride, Mask, VL});
22111 CI->addParamAttr(
22113 Shuffles[0]->replaceAllUsesWith(CI);
22114 return true;
22115 };
22116
22117 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22118
22119 CallInst *VlsegN = Builder.CreateIntrinsic(
22120 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22121 {LI->getPointerOperand(), VL});
22122
22123 for (unsigned i = 0; i < Shuffles.size(); i++) {
22124 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22125 Shuffles[i]->replaceAllUsesWith(SubVec);
22126 }
22127
22128 return true;
22129}
22130
22132 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22133 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22134 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22135 Intrinsic::riscv_seg8_store};
22136
22137/// Lower an interleaved store into a vssegN intrinsic.
22138///
22139/// E.g. Lower an interleaved store (Factor = 3):
22140/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22141/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22142/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22143///
22144/// Into:
22145/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22146/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22147/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22148/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22149/// %ptr, i32 4)
22150///
22151/// Note that the new shufflevectors will be removed and we'll only generate one
22152/// vsseg3 instruction in CodeGen.
22154 ShuffleVectorInst *SVI,
22155 unsigned Factor) const {
22156 IRBuilder<> Builder(SI);
22157 auto Mask = SVI->getShuffleMask();
22158 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22159 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22160 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22161 ShuffleVTy->getNumElements() / Factor);
22162 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22163 SI->getPointerAddressSpace(),
22164 SI->getDataLayout()))
22165 return false;
22166
22167 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22168
22169 unsigned Index;
22170 // If the segment store only has one active lane (i.e. the interleave is
22171 // just a spread shuffle), we can use a strided store instead. This will
22172 // be equally fast, and create less vector register pressure.
22173 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22174 isSpreadMask(Mask, Factor, Index)) {
22175 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22176 Value *Data = SVI->getOperand(0);
22177 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22178 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22179 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22180 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22181 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22182 Value *VL = Builder.getInt32(VTy->getNumElements());
22183
22184 CallInst *CI = Builder.CreateIntrinsic(
22185 Intrinsic::experimental_vp_strided_store,
22186 {Data->getType(), BasePtr->getType(), Stride->getType()},
22187 {Data, BasePtr, Stride, Mask, VL});
22188 CI->addParamAttr(
22189 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22190
22191 return true;
22192 }
22193
22195 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22196 {VTy, SI->getPointerOperandType(), XLenTy});
22197
22199
22200 for (unsigned i = 0; i < Factor; i++) {
22201 Value *Shuffle = Builder.CreateShuffleVector(
22202 SVI->getOperand(0), SVI->getOperand(1),
22203 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22204 Ops.push_back(Shuffle);
22205 }
22206 // This VL should be OK (should be executable in one vsseg instruction,
22207 // potentially under larger LMULs) because we checked that the fixed vector
22208 // type fits in isLegalInterleavedAccessType
22209 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22210 Ops.append({SI->getPointerOperand(), VL});
22211
22212 Builder.CreateCall(VssegNFunc, Ops);
22213
22214 return true;
22215}
22216
22218 IntrinsicInst *DI, LoadInst *LI,
22219 SmallVectorImpl<Instruction *> &DeadInsts) const {
22220 assert(LI->isSimple());
22221 IRBuilder<> Builder(LI);
22222
22223 // Only deinterleave2 supported at present.
22224 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
22225 return false;
22226
22227 const unsigned Factor = 2;
22228
22229 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
22230 const DataLayout &DL = LI->getDataLayout();
22231
22232 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22233 LI->getPointerAddressSpace(), DL))
22234 return false;
22235
22236 Value *Return;
22237 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22238
22239 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22240 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22241 Return =
22242 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22243 {ResVTy, LI->getPointerOperandType(), XLenTy},
22244 {LI->getPointerOperand(), VL});
22245 } else {
22246 static const Intrinsic::ID IntrIds[] = {
22247 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22248 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22249 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22250 Intrinsic::riscv_vlseg8};
22251
22252 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22253 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22254 Type *VecTupTy = TargetExtType::get(
22255 LI->getContext(), "riscv.vector.tuple",
22257 NumElts * SEW / 8),
22258 Factor);
22259
22260 Value *VL = Constant::getAllOnesValue(XLenTy);
22261
22262 Value *Vlseg = Builder.CreateIntrinsic(
22263 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22264 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22265 ConstantInt::get(XLenTy, Log2_64(SEW))});
22266
22267 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22268 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22269 for (unsigned i = 0; i < Factor; ++i) {
22270 Value *VecExtract = Builder.CreateIntrinsic(
22271 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22272 {Vlseg, Builder.getInt32(i)});
22273 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22274 }
22275 }
22276
22277 DI->replaceAllUsesWith(Return);
22278
22279 return true;
22280}
22281
22284 SmallVectorImpl<Instruction *> &DeadInsts) const {
22285 assert(SI->isSimple());
22286 IRBuilder<> Builder(SI);
22287
22288 // Only interleave2 supported at present.
22289 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
22290 return false;
22291
22292 const unsigned Factor = 2;
22293
22294 VectorType *InVTy = cast<VectorType>(II->getArgOperand(0)->getType());
22295 const DataLayout &DL = SI->getDataLayout();
22296
22297 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22298 SI->getPointerAddressSpace(), DL))
22299 return false;
22300
22301 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22302
22303 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22304 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22305 Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
22306 {InVTy, SI->getPointerOperandType(), XLenTy},
22307 {II->getArgOperand(0), II->getArgOperand(1),
22308 SI->getPointerOperand(), VL});
22309 } else {
22310 static const Intrinsic::ID IntrIds[] = {
22311 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22312 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22313 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22314 Intrinsic::riscv_vsseg8};
22315
22316 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22317 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22318 Type *VecTupTy = TargetExtType::get(
22319 SI->getContext(), "riscv.vector.tuple",
22320 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22321 NumElts * SEW / 8),
22322 Factor);
22323
22325 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22326
22327 Value *VL = Constant::getAllOnesValue(XLenTy);
22328
22329 Value *StoredVal = PoisonValue::get(VecTupTy);
22330 for (unsigned i = 0; i < Factor; ++i)
22331 StoredVal = Builder.CreateIntrinsic(
22332 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22333 {StoredVal, II->getArgOperand(i), Builder.getInt32(i)});
22334
22335 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22336 ConstantInt::get(XLenTy, Log2_64(SEW))});
22337 }
22338
22339 return true;
22340}
22341
22345 const TargetInstrInfo *TII) const {
22346 assert(MBBI->isCall() && MBBI->getCFIType() &&
22347 "Invalid call instruction for a KCFI check");
22348 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22349 MBBI->getOpcode()));
22350
22351 MachineOperand &Target = MBBI->getOperand(0);
22352 Target.setIsRenamable(false);
22353
22354 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22355 .addReg(Target.getReg())
22356 .addImm(MBBI->getCFIType())
22357 .getInstr();
22358}
22359
22360#define GET_REGISTER_MATCHER
22361#include "RISCVGenAsmMatcher.inc"
22362
22365 const MachineFunction &MF) const {
22367 if (Reg == RISCV::NoRegister)
22369 if (Reg == RISCV::NoRegister)
22371 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22372 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22373 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22374 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22375 StringRef(RegName) + "\"."));
22376 return Reg;
22377}
22378
22381 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22382
22383 if (NontemporalInfo == nullptr)
22385
22386 // 1 for default value work as __RISCV_NTLH_ALL
22387 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22388 // 3 -> __RISCV_NTLH_ALL_PRIVATE
22389 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22390 // 5 -> __RISCV_NTLH_ALL
22391 int NontemporalLevel = 5;
22392 const MDNode *RISCVNontemporalInfo =
22393 I.getMetadata("riscv-nontemporal-domain");
22394 if (RISCVNontemporalInfo != nullptr)
22395 NontemporalLevel =
22396 cast<ConstantInt>(
22397 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22398 ->getValue())
22399 ->getZExtValue();
22400
22401 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22402 "RISC-V target doesn't support this non-temporal domain.");
22403
22404 NontemporalLevel -= 2;
22406 if (NontemporalLevel & 0b1)
22407 Flags |= MONontemporalBit0;
22408 if (NontemporalLevel & 0b10)
22409 Flags |= MONontemporalBit1;
22410
22411 return Flags;
22412}
22413
22416
22417 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22419 TargetFlags |= (NodeFlags & MONontemporalBit0);
22420 TargetFlags |= (NodeFlags & MONontemporalBit1);
22421 return TargetFlags;
22422}
22423
22425 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22426 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22427}
22428
22430 if (VT.isScalableVector())
22431 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22432 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22433 return true;
22434 return Subtarget.hasStdExtZbb() &&
22435 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22436}
22437
22439 ISD::CondCode Cond) const {
22440 return isCtpopFast(VT) ? 0 : 1;
22441}
22442
22444 const Instruction *I) const {
22445 if (Subtarget.hasStdExtZalasr()) {
22446 if (Subtarget.hasStdExtZtso()) {
22447 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22448 // should be lowered to plain load/store. The easiest way to do this is
22449 // to say we should insert fences for them, and the fence insertion code
22450 // will just not insert any fences
22451 auto *LI = dyn_cast<LoadInst>(I);
22452 auto *SI = dyn_cast<StoreInst>(I);
22453 if ((LI &&
22454 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22455 (SI &&
22456 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22457 // Here, this is a load or store which is seq_cst, and needs a .aq or
22458 // .rl therefore we shouldn't try to insert fences
22459 return false;
22460 }
22461 // Here, we are a TSO inst that isn't a seq_cst load/store
22462 return isa<LoadInst>(I) || isa<StoreInst>(I);
22463 }
22464 return false;
22465 }
22466 // Note that one specific case requires fence insertion for an
22467 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22468 // than this hook due to limitations in the interface here.
22469 return isa<LoadInst>(I) || isa<StoreInst>(I);
22470}
22471
22473
22474 // GISel support is in progress or complete for these opcodes.
22475 unsigned Op = Inst.getOpcode();
22476 if (Op == Instruction::Add || Op == Instruction::Sub ||
22477 Op == Instruction::And || Op == Instruction::Or ||
22478 Op == Instruction::Xor || Op == Instruction::InsertElement ||
22479 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22480 Op == Instruction::Freeze || Op == Instruction::Store)
22481 return false;
22482
22483 if (Inst.getType()->isScalableTy())
22484 return true;
22485
22486 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22487 if (Inst.getOperand(i)->getType()->isScalableTy() &&
22488 !isa<ReturnInst>(&Inst))
22489 return true;
22490
22491 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22492 if (AI->getAllocatedType()->isScalableTy())
22493 return true;
22494 }
22495
22496 return false;
22497}
22498
22499SDValue
22500RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22501 SelectionDAG &DAG,
22502 SmallVectorImpl<SDNode *> &Created) const {
22504 if (isIntDivCheap(N->getValueType(0), Attr))
22505 return SDValue(N, 0); // Lower SDIV as SDIV
22506
22507 // Only perform this transform if short forward branch opt is supported.
22508 if (!Subtarget.hasShortForwardBranchOpt())
22509 return SDValue();
22510 EVT VT = N->getValueType(0);
22511 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22512 return SDValue();
22513
22514 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22515 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22516 return SDValue();
22517 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22518}
22519
22520bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22521 EVT VT, const APInt &AndMask) const {
22522 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22523 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22525}
22526
22527unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22528 return Subtarget.getMinimumJumpTableEntries();
22529}
22530
22533 int JTI,
22534 SelectionDAG &DAG) const {
22535 if (Subtarget.hasStdExtZicfilp()) {
22536 // When Zicfilp enabled, we need to use software guarded branch for jump
22537 // table branch.
22538 SDValue Chain = Value;
22539 // Jump table debug info is only needed if CodeView is enabled.
22541 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22542 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22543 }
22544 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22545}
22546
22547// If an output pattern produces multiple instructions tablegen may pick an
22548// arbitrary type from an instructions destination register class to use for the
22549// VT of that MachineSDNode. This VT may be used to look up the representative
22550// register class. If the type isn't legal, the default implementation will
22551// not find a register class.
22552//
22553// Some integer types smaller than XLen are listed in the GPR register class to
22554// support isel patterns for GISel, but are not legal in SelectionDAG. The
22555// arbitrary type tablegen picks may be one of these smaller types.
22556//
22557// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22558// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22559std::pair<const TargetRegisterClass *, uint8_t>
22560RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22561 MVT VT) const {
22562 switch (VT.SimpleTy) {
22563 default:
22564 break;
22565 case MVT::i8:
22566 case MVT::i16:
22567 case MVT::i32:
22569 case MVT::bf16:
22570 case MVT::f16:
22572 }
22573
22575}
22576
22578
22579#define GET_RISCVVIntrinsicsTable_IMPL
22580#include "RISCVGenSearchableTables.inc"
22581
22582} // namespace llvm::RISCVVIntrinsicsTable
22583
22585
22586 // If the function specifically requests inline stack probes, emit them.
22587 if (MF.getFunction().hasFnAttribute("probe-stack"))
22588 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22589 "inline-asm";
22590
22591 return false;
22592}
22593
22595 Align StackAlign) const {
22596 // The default stack probe size is 4096 if the function has no
22597 // stack-probe-size attribute.
22598 const Function &Fn = MF.getFunction();
22599 unsigned StackProbeSize =
22600 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22601 // Round down to the stack alignment.
22602 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22603 return StackProbeSize ? StackProbeSize : StackAlign.value();
22604}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1111
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1886
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2561
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1841
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2044
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1986
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:866
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1756
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1386
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2532
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2448
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1861
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2018
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2704
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:404
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:397
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:505
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:906
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:740
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:384
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1259
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1568
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1568
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1555
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1651
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)