LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1531 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1534 if (Subtarget.hasVendorXTHeadMemPair())
1536 if (Subtarget.useRVVForFixedLengthVectors())
1538
1539 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1540 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1541
1542 // Disable strict node mutation.
1543 IsStrictFPEnabled = true;
1544 EnableExtLdPromotion = true;
1545
1546 // Let the subtarget decide if a predictable select is more expensive than the
1547 // corresponding branch. This information is used in CGP/SelectOpt to decide
1548 // when to convert selects into branches.
1549 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1550
1551 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1552 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1553
1555 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1556 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1557
1559 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1560 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1561
1562 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1563 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1564}
1565
1567 LLVMContext &Context,
1568 EVT VT) const {
1569 if (!VT.isVector())
1570 return getPointerTy(DL);
1571 if (Subtarget.hasVInstructions() &&
1572 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1573 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1575}
1576
1577MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1578 return Subtarget.getXLenVT();
1579}
1580
1581// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1582bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1583 unsigned VF,
1584 bool IsScalable) const {
1585 if (!Subtarget.hasVInstructions())
1586 return true;
1587
1588 if (!IsScalable)
1589 return true;
1590
1591 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1592 return true;
1593
1594 // Don't allow VF=1 if those types are't legal.
1595 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1596 return true;
1597
1598 // VLEN=32 support is incomplete.
1599 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1600 return true;
1601
1602 // The maximum VF is for the smallest element width with LMUL=8.
1603 // VF must be a power of 2.
1604 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1605 return VF > MaxVF || !isPowerOf2_32(VF);
1606}
1607
1609 return !Subtarget.hasVInstructions() ||
1610 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1611}
1612
1614 const CallInst &I,
1615 MachineFunction &MF,
1616 unsigned Intrinsic) const {
1617 auto &DL = I.getDataLayout();
1618
1619 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1620 bool IsUnitStrided, bool UsePtrVal = false) {
1622 // We can't use ptrVal if the intrinsic can access memory before the
1623 // pointer. This means we can't use it for strided or indexed intrinsics.
1624 if (UsePtrVal)
1625 Info.ptrVal = I.getArgOperand(PtrOp);
1626 else
1627 Info.fallbackAddressSpace =
1628 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1629 Type *MemTy;
1630 if (IsStore) {
1631 // Store value is the first operand.
1632 MemTy = I.getArgOperand(0)->getType();
1633 } else {
1634 // Use return type. If it's segment load, return type is a struct.
1635 MemTy = I.getType();
1636 if (MemTy->isStructTy())
1637 MemTy = MemTy->getStructElementType(0);
1638 }
1639 if (!IsUnitStrided)
1640 MemTy = MemTy->getScalarType();
1641
1642 Info.memVT = getValueType(DL, MemTy);
1643 if (MemTy->isTargetExtTy()) {
1644 // RISC-V vector tuple type's alignment type should be its element type.
1645 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1646 MemTy = Type::getIntNTy(
1647 MemTy->getContext(),
1648 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1649 ->getZExtValue());
1650 Info.align = DL.getABITypeAlign(MemTy);
1651 } else {
1652 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1653 }
1655 Info.flags |=
1657 return true;
1658 };
1659
1660 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1662
1664 switch (Intrinsic) {
1665 default:
1666 return false;
1667 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1668 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1669 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1670 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1671 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1672 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1675 case Intrinsic::riscv_masked_cmpxchg_i32:
1677 Info.memVT = MVT::i32;
1678 Info.ptrVal = I.getArgOperand(0);
1679 Info.offset = 0;
1680 Info.align = Align(4);
1683 return true;
1684 case Intrinsic::riscv_seg2_load:
1685 case Intrinsic::riscv_seg3_load:
1686 case Intrinsic::riscv_seg4_load:
1687 case Intrinsic::riscv_seg5_load:
1688 case Intrinsic::riscv_seg6_load:
1689 case Intrinsic::riscv_seg7_load:
1690 case Intrinsic::riscv_seg8_load:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1692 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1693 case Intrinsic::riscv_seg2_store:
1694 case Intrinsic::riscv_seg3_store:
1695 case Intrinsic::riscv_seg4_store:
1696 case Intrinsic::riscv_seg5_store:
1697 case Intrinsic::riscv_seg6_store:
1698 case Intrinsic::riscv_seg7_store:
1699 case Intrinsic::riscv_seg8_store:
1700 // Operands are (vec, ..., vec, ptr, vl)
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1704 case Intrinsic::riscv_vle:
1705 case Intrinsic::riscv_vle_mask:
1706 case Intrinsic::riscv_vleff:
1707 case Intrinsic::riscv_vleff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ true,
1711 /*UsePtrVal*/ true);
1712 case Intrinsic::riscv_vse:
1713 case Intrinsic::riscv_vse_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ true,
1717 /*UsePtrVal*/ true);
1718 case Intrinsic::riscv_vlse:
1719 case Intrinsic::riscv_vlse_mask:
1720 case Intrinsic::riscv_vloxei:
1721 case Intrinsic::riscv_vloxei_mask:
1722 case Intrinsic::riscv_vluxei:
1723 case Intrinsic::riscv_vluxei_mask:
1724 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1725 /*IsStore*/ false,
1726 /*IsUnitStrided*/ false);
1727 case Intrinsic::riscv_vsse:
1728 case Intrinsic::riscv_vsse_mask:
1729 case Intrinsic::riscv_vsoxei:
1730 case Intrinsic::riscv_vsoxei_mask:
1731 case Intrinsic::riscv_vsuxei:
1732 case Intrinsic::riscv_vsuxei_mask:
1733 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1734 /*IsStore*/ true,
1735 /*IsUnitStrided*/ false);
1736 case Intrinsic::riscv_vlseg2:
1737 case Intrinsic::riscv_vlseg3:
1738 case Intrinsic::riscv_vlseg4:
1739 case Intrinsic::riscv_vlseg5:
1740 case Intrinsic::riscv_vlseg6:
1741 case Intrinsic::riscv_vlseg7:
1742 case Intrinsic::riscv_vlseg8:
1743 case Intrinsic::riscv_vlseg2ff:
1744 case Intrinsic::riscv_vlseg3ff:
1745 case Intrinsic::riscv_vlseg4ff:
1746 case Intrinsic::riscv_vlseg5ff:
1747 case Intrinsic::riscv_vlseg6ff:
1748 case Intrinsic::riscv_vlseg7ff:
1749 case Intrinsic::riscv_vlseg8ff:
1750 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1751 /*IsStore*/ false,
1752 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1753 case Intrinsic::riscv_vlseg2_mask:
1754 case Intrinsic::riscv_vlseg3_mask:
1755 case Intrinsic::riscv_vlseg4_mask:
1756 case Intrinsic::riscv_vlseg5_mask:
1757 case Intrinsic::riscv_vlseg6_mask:
1758 case Intrinsic::riscv_vlseg7_mask:
1759 case Intrinsic::riscv_vlseg8_mask:
1760 case Intrinsic::riscv_vlseg2ff_mask:
1761 case Intrinsic::riscv_vlseg3ff_mask:
1762 case Intrinsic::riscv_vlseg4ff_mask:
1763 case Intrinsic::riscv_vlseg5ff_mask:
1764 case Intrinsic::riscv_vlseg6ff_mask:
1765 case Intrinsic::riscv_vlseg7ff_mask:
1766 case Intrinsic::riscv_vlseg8ff_mask:
1767 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1768 /*IsStore*/ false,
1769 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1770 case Intrinsic::riscv_vlsseg2:
1771 case Intrinsic::riscv_vlsseg3:
1772 case Intrinsic::riscv_vlsseg4:
1773 case Intrinsic::riscv_vlsseg5:
1774 case Intrinsic::riscv_vlsseg6:
1775 case Intrinsic::riscv_vlsseg7:
1776 case Intrinsic::riscv_vlsseg8:
1777 case Intrinsic::riscv_vloxseg2:
1778 case Intrinsic::riscv_vloxseg3:
1779 case Intrinsic::riscv_vloxseg4:
1780 case Intrinsic::riscv_vloxseg5:
1781 case Intrinsic::riscv_vloxseg6:
1782 case Intrinsic::riscv_vloxseg7:
1783 case Intrinsic::riscv_vloxseg8:
1784 case Intrinsic::riscv_vluxseg2:
1785 case Intrinsic::riscv_vluxseg3:
1786 case Intrinsic::riscv_vluxseg4:
1787 case Intrinsic::riscv_vluxseg5:
1788 case Intrinsic::riscv_vluxseg6:
1789 case Intrinsic::riscv_vluxseg7:
1790 case Intrinsic::riscv_vluxseg8:
1791 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1792 /*IsStore*/ false,
1793 /*IsUnitStrided*/ false);
1794 case Intrinsic::riscv_vlsseg2_mask:
1795 case Intrinsic::riscv_vlsseg3_mask:
1796 case Intrinsic::riscv_vlsseg4_mask:
1797 case Intrinsic::riscv_vlsseg5_mask:
1798 case Intrinsic::riscv_vlsseg6_mask:
1799 case Intrinsic::riscv_vlsseg7_mask:
1800 case Intrinsic::riscv_vlsseg8_mask:
1801 case Intrinsic::riscv_vloxseg2_mask:
1802 case Intrinsic::riscv_vloxseg3_mask:
1803 case Intrinsic::riscv_vloxseg4_mask:
1804 case Intrinsic::riscv_vloxseg5_mask:
1805 case Intrinsic::riscv_vloxseg6_mask:
1806 case Intrinsic::riscv_vloxseg7_mask:
1807 case Intrinsic::riscv_vloxseg8_mask:
1808 case Intrinsic::riscv_vluxseg2_mask:
1809 case Intrinsic::riscv_vluxseg3_mask:
1810 case Intrinsic::riscv_vluxseg4_mask:
1811 case Intrinsic::riscv_vluxseg5_mask:
1812 case Intrinsic::riscv_vluxseg6_mask:
1813 case Intrinsic::riscv_vluxseg7_mask:
1814 case Intrinsic::riscv_vluxseg8_mask:
1815 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1816 /*IsStore*/ false,
1817 /*IsUnitStrided*/ false);
1818 case Intrinsic::riscv_vsseg2:
1819 case Intrinsic::riscv_vsseg3:
1820 case Intrinsic::riscv_vsseg4:
1821 case Intrinsic::riscv_vsseg5:
1822 case Intrinsic::riscv_vsseg6:
1823 case Intrinsic::riscv_vsseg7:
1824 case Intrinsic::riscv_vsseg8:
1825 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1826 /*IsStore*/ true,
1827 /*IsUnitStrided*/ false);
1828 case Intrinsic::riscv_vsseg2_mask:
1829 case Intrinsic::riscv_vsseg3_mask:
1830 case Intrinsic::riscv_vsseg4_mask:
1831 case Intrinsic::riscv_vsseg5_mask:
1832 case Intrinsic::riscv_vsseg6_mask:
1833 case Intrinsic::riscv_vsseg7_mask:
1834 case Intrinsic::riscv_vsseg8_mask:
1835 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1836 /*IsStore*/ true,
1837 /*IsUnitStrided*/ false);
1838 case Intrinsic::riscv_vssseg2:
1839 case Intrinsic::riscv_vssseg3:
1840 case Intrinsic::riscv_vssseg4:
1841 case Intrinsic::riscv_vssseg5:
1842 case Intrinsic::riscv_vssseg6:
1843 case Intrinsic::riscv_vssseg7:
1844 case Intrinsic::riscv_vssseg8:
1845 case Intrinsic::riscv_vsoxseg2:
1846 case Intrinsic::riscv_vsoxseg3:
1847 case Intrinsic::riscv_vsoxseg4:
1848 case Intrinsic::riscv_vsoxseg5:
1849 case Intrinsic::riscv_vsoxseg6:
1850 case Intrinsic::riscv_vsoxseg7:
1851 case Intrinsic::riscv_vsoxseg8:
1852 case Intrinsic::riscv_vsuxseg2:
1853 case Intrinsic::riscv_vsuxseg3:
1854 case Intrinsic::riscv_vsuxseg4:
1855 case Intrinsic::riscv_vsuxseg5:
1856 case Intrinsic::riscv_vsuxseg6:
1857 case Intrinsic::riscv_vsuxseg7:
1858 case Intrinsic::riscv_vsuxseg8:
1859 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1860 /*IsStore*/ true,
1861 /*IsUnitStrided*/ false);
1862 case Intrinsic::riscv_vssseg2_mask:
1863 case Intrinsic::riscv_vssseg3_mask:
1864 case Intrinsic::riscv_vssseg4_mask:
1865 case Intrinsic::riscv_vssseg5_mask:
1866 case Intrinsic::riscv_vssseg6_mask:
1867 case Intrinsic::riscv_vssseg7_mask:
1868 case Intrinsic::riscv_vssseg8_mask:
1869 case Intrinsic::riscv_vsoxseg2_mask:
1870 case Intrinsic::riscv_vsoxseg3_mask:
1871 case Intrinsic::riscv_vsoxseg4_mask:
1872 case Intrinsic::riscv_vsoxseg5_mask:
1873 case Intrinsic::riscv_vsoxseg6_mask:
1874 case Intrinsic::riscv_vsoxseg7_mask:
1875 case Intrinsic::riscv_vsoxseg8_mask:
1876 case Intrinsic::riscv_vsuxseg2_mask:
1877 case Intrinsic::riscv_vsuxseg3_mask:
1878 case Intrinsic::riscv_vsuxseg4_mask:
1879 case Intrinsic::riscv_vsuxseg5_mask:
1880 case Intrinsic::riscv_vsuxseg6_mask:
1881 case Intrinsic::riscv_vsuxseg7_mask:
1882 case Intrinsic::riscv_vsuxseg8_mask:
1883 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1884 /*IsStore*/ true,
1885 /*IsUnitStrided*/ false);
1886 }
1887}
1888
1890 const AddrMode &AM, Type *Ty,
1891 unsigned AS,
1892 Instruction *I) const {
1893 // No global is ever allowed as a base.
1894 if (AM.BaseGV)
1895 return false;
1896
1897 // None of our addressing modes allows a scalable offset
1898 if (AM.ScalableOffset)
1899 return false;
1900
1901 // RVV instructions only support register addressing.
1902 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1903 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1904
1905 // Require a 12-bit signed offset.
1906 if (!isInt<12>(AM.BaseOffs))
1907 return false;
1908
1909 switch (AM.Scale) {
1910 case 0: // "r+i" or just "i", depending on HasBaseReg.
1911 break;
1912 case 1:
1913 if (!AM.HasBaseReg) // allow "r+i".
1914 break;
1915 return false; // disallow "r+r" or "r+r+i".
1916 default:
1917 return false;
1918 }
1919
1920 return true;
1921}
1922
1924 return isInt<12>(Imm);
1925}
1926
1928 return isInt<12>(Imm);
1929}
1930
1931// On RV32, 64-bit integers are split into their high and low parts and held
1932// in two different registers, so the trunc is free since the low register can
1933// just be used.
1934// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1935// isTruncateFree?
1937 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1938 return false;
1939 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1940 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1941 return (SrcBits == 64 && DestBits == 32);
1942}
1943
1945 // We consider i64->i32 free on RV64 since we have good selection of W
1946 // instructions that make promoting operations back to i64 free in many cases.
1947 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1948 !DstVT.isInteger())
1949 return false;
1950 unsigned SrcBits = SrcVT.getSizeInBits();
1951 unsigned DestBits = DstVT.getSizeInBits();
1952 return (SrcBits == 64 && DestBits == 32);
1953}
1954
1956 EVT SrcVT = Val.getValueType();
1957 // free truncate from vnsrl and vnsra
1958 if (Subtarget.hasVInstructions() &&
1959 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1960 SrcVT.isVector() && VT2.isVector()) {
1961 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1962 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1963 if (SrcBits == DestBits * 2) {
1964 return true;
1965 }
1966 }
1967 return TargetLowering::isTruncateFree(Val, VT2);
1968}
1969
1971 // Zexts are free if they can be combined with a load.
1972 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1973 // poorly with type legalization of compares preferring sext.
1974 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1975 EVT MemVT = LD->getMemoryVT();
1976 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1977 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1978 LD->getExtensionType() == ISD::ZEXTLOAD))
1979 return true;
1980 }
1981
1982 return TargetLowering::isZExtFree(Val, VT2);
1983}
1984
1986 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1987}
1988
1990 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1991}
1992
1994 return Subtarget.hasStdExtZbb() ||
1995 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1996}
1997
1999 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 const Instruction &AndI) const {
2005 // We expect to be able to match a bit extraction instruction if the Zbs
2006 // extension is supported and the mask is a power of two. However, we
2007 // conservatively return false if the mask would fit in an ANDI instruction,
2008 // on the basis that it's possible the sinking+duplication of the AND in
2009 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2010 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2011 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2012 return false;
2013 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2014 if (!Mask)
2015 return false;
2016 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2017}
2018
2020 EVT VT = Y.getValueType();
2021
2022 // FIXME: Support vectors once we have tests.
2023 if (VT.isVector())
2024 return false;
2025
2026 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2027 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2028}
2029
2031 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2032 if (Subtarget.hasStdExtZbs())
2033 return X.getValueType().isScalarInteger();
2034 auto *C = dyn_cast<ConstantSDNode>(Y);
2035 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2036 if (Subtarget.hasVendorXTHeadBs())
2037 return C != nullptr;
2038 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2039 return C && C->getAPIntValue().ule(10);
2040}
2041
2043 EVT VT) const {
2044 // Only enable for rvv.
2045 if (!VT.isVector() || !Subtarget.hasVInstructions())
2046 return false;
2047
2048 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2049 return false;
2050
2051 return true;
2052}
2053
2055 Type *Ty) const {
2056 assert(Ty->isIntegerTy());
2057
2058 unsigned BitSize = Ty->getIntegerBitWidth();
2059 if (BitSize > Subtarget.getXLen())
2060 return false;
2061
2062 // Fast path, assume 32-bit immediates are cheap.
2063 int64_t Val = Imm.getSExtValue();
2064 if (isInt<32>(Val))
2065 return true;
2066
2067 // A constant pool entry may be more aligned thant he load we're trying to
2068 // replace. If we don't support unaligned scalar mem, prefer the constant
2069 // pool.
2070 // TODO: Can the caller pass down the alignment?
2071 if (!Subtarget.enableUnalignedScalarMem())
2072 return true;
2073
2074 // Prefer to keep the load if it would require many instructions.
2075 // This uses the same threshold we use for constant pools but doesn't
2076 // check useConstantPoolForLargeInts.
2077 // TODO: Should we keep the load only when we're definitely going to emit a
2078 // constant pool?
2079
2081 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2082}
2083
2087 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2088 SelectionDAG &DAG) const {
2089 // One interesting pattern that we'd want to form is 'bit extract':
2090 // ((1 >> Y) & 1) ==/!= 0
2091 // But we also need to be careful not to try to reverse that fold.
2092
2093 // Is this '((1 >> Y) & 1)'?
2094 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2095 return false; // Keep the 'bit extract' pattern.
2096
2097 // Will this be '((1 >> Y) & 1)' after the transform?
2098 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2099 return true; // Do form the 'bit extract' pattern.
2100
2101 // If 'X' is a constant, and we transform, then we will immediately
2102 // try to undo the fold, thus causing endless combine loop.
2103 // So only do the transform if X is not a constant. This matches the default
2104 // implementation of this function.
2105 return !XC;
2106}
2107
2109 unsigned Opc = VecOp.getOpcode();
2110
2111 // Assume target opcodes can't be scalarized.
2112 // TODO - do we have any exceptions?
2113 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2114 return false;
2115
2116 // If the vector op is not supported, try to convert to scalar.
2117 EVT VecVT = VecOp.getValueType();
2118 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2119 return true;
2120
2121 // If the vector op is supported, but the scalar op is not, the transform may
2122 // not be worthwhile.
2123 // Permit a vector binary operation can be converted to scalar binary
2124 // operation which is custom lowered with illegal type.
2125 EVT ScalarVT = VecVT.getScalarType();
2126 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2127 isOperationCustom(Opc, ScalarVT);
2128}
2129
2131 const GlobalAddressSDNode *GA) const {
2132 // In order to maximise the opportunity for common subexpression elimination,
2133 // keep a separate ADD node for the global address offset instead of folding
2134 // it in the global address node. Later peephole optimisations may choose to
2135 // fold it back in when profitable.
2136 return false;
2137}
2138
2139// Returns 0-31 if the fli instruction is available for the type and this is
2140// legal FP immediate for the type. Returns -1 otherwise.
2142 if (!Subtarget.hasStdExtZfa())
2143 return -1;
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return -1;
2157
2158 return RISCVLoadFPImm::getLoadFPImm(Imm);
2159}
2160
2162 bool ForCodeSize) const {
2163 bool IsLegalVT = false;
2164 if (VT == MVT::f16)
2165 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2166 else if (VT == MVT::f32)
2167 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2168 else if (VT == MVT::f64)
2169 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2170 else if (VT == MVT::bf16)
2171 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2172
2173 if (!IsLegalVT)
2174 return false;
2175
2176 if (getLegalZfaFPImm(Imm, VT) >= 0)
2177 return true;
2178
2179 // Cannot create a 64 bit floating-point immediate value for rv32.
2180 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2181 // td can handle +0.0 or -0.0 already.
2182 // -0.0 can be created by fmv + fneg.
2183 return Imm.isZero();
2184 }
2185
2186 // Special case: fmv + fneg
2187 if (Imm.isNegZero())
2188 return true;
2189
2190 // Building an integer and then converting requires a fmv at the end of
2191 // the integer sequence. The fmv is not required for Zfinx.
2192 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2193 const int Cost =
2194 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2195 Subtarget.getXLen(), Subtarget);
2196 return Cost <= FPImmCost;
2197}
2198
2199// TODO: This is very conservative.
2201 unsigned Index) const {
2203 return false;
2204
2205 // Only support extracting a fixed from a fixed vector for now.
2206 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2207 return false;
2208
2209 EVT EltVT = ResVT.getVectorElementType();
2210 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2211
2212 // The smallest type we can slide is i8.
2213 // TODO: We can extract index 0 from a mask vector without a slide.
2214 if (EltVT == MVT::i1)
2215 return false;
2216
2217 unsigned ResElts = ResVT.getVectorNumElements();
2218 unsigned SrcElts = SrcVT.getVectorNumElements();
2219
2220 unsigned MinVLen = Subtarget.getRealMinVLen();
2221 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2222
2223 // If we're extracting only data from the first VLEN bits of the source
2224 // then we can always do this with an m1 vslidedown.vx. Restricting the
2225 // Index ensures we can use a vslidedown.vi.
2226 // TODO: We can generalize this when the exact VLEN is known.
2227 if (Index + ResElts <= MinVLMAX && Index < 31)
2228 return true;
2229
2230 // Convervatively only handle extracting half of a vector.
2231 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2232 // the upper half of a vector until we have more test coverage.
2233 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2234 // a cheap extract. However, this case is important in practice for
2235 // shuffled extracts of longer vectors. How resolve?
2236 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2237}
2238
2241 EVT VT) const {
2242 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2243 // We might still end up using a GPR but that will be decided based on ABI.
2244 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2245 !Subtarget.hasStdExtZfhminOrZhinxmin())
2246 return MVT::f32;
2247
2249
2250 return PartVT;
2251}
2252
2253unsigned
2255 std::optional<MVT> RegisterVT) const {
2256 // Pair inline assembly operand
2257 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2258 *RegisterVT == MVT::Untyped)
2259 return 1;
2260
2261 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2262}
2263
2266 EVT VT) const {
2267 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2268 // We might still end up using a GPR but that will be decided based on ABI.
2269 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2270 !Subtarget.hasStdExtZfhminOrZhinxmin())
2271 return 1;
2272
2274}
2275
2277 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2278 unsigned &NumIntermediates, MVT &RegisterVT) const {
2280 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2281
2282 return NumRegs;
2283}
2284
2285// Changes the condition code and swaps operands if necessary, so the SetCC
2286// operation matches one of the comparisons supported directly by branches
2287// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2288// with 1/-1.
2289static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2290 ISD::CondCode &CC, SelectionDAG &DAG) {
2291 // If this is a single bit test that can't be handled by ANDI, shift the
2292 // bit to be tested to the MSB and perform a signed compare with 0.
2293 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2294 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2295 isa<ConstantSDNode>(LHS.getOperand(1))) {
2296 uint64_t Mask = LHS.getConstantOperandVal(1);
2297 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2298 unsigned ShAmt = 0;
2299 if (isPowerOf2_64(Mask)) {
2301 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2302 } else {
2303 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2304 }
2305
2306 LHS = LHS.getOperand(0);
2307 if (ShAmt != 0)
2308 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2309 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2310 return;
2311 }
2312 }
2313
2314 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2315 int64_t C = RHSC->getSExtValue();
2316 switch (CC) {
2317 default: break;
2318 case ISD::SETGT:
2319 // Convert X > -1 to X >= 0.
2320 if (C == -1) {
2321 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2322 CC = ISD::SETGE;
2323 return;
2324 }
2325 break;
2326 case ISD::SETLT:
2327 // Convert X < 1 to 0 >= X.
2328 if (C == 1) {
2329 RHS = LHS;
2330 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2331 CC = ISD::SETGE;
2332 return;
2333 }
2334 break;
2335 }
2336 }
2337
2338 switch (CC) {
2339 default:
2340 break;
2341 case ISD::SETGT:
2342 case ISD::SETLE:
2343 case ISD::SETUGT:
2344 case ISD::SETULE:
2346 std::swap(LHS, RHS);
2347 break;
2348 }
2349}
2350
2352 if (VT.isRISCVVectorTuple()) {
2353 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2354 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2355 return RISCVII::LMUL_F8;
2356 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2357 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2358 return RISCVII::LMUL_F4;
2359 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2360 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2361 return RISCVII::LMUL_F2;
2362 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2363 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2364 return RISCVII::LMUL_1;
2365 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2366 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2367 return RISCVII::LMUL_2;
2368 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2369 return RISCVII::LMUL_4;
2370 llvm_unreachable("Invalid vector tuple type LMUL.");
2371 }
2372
2373 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2374 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2375 if (VT.getVectorElementType() == MVT::i1)
2376 KnownSize *= 8;
2377
2378 switch (KnownSize) {
2379 default:
2380 llvm_unreachable("Invalid LMUL.");
2381 case 8:
2383 case 16:
2385 case 32:
2387 case 64:
2389 case 128:
2391 case 256:
2393 case 512:
2395 }
2396}
2397
2399 switch (LMul) {
2400 default:
2401 llvm_unreachable("Invalid LMUL.");
2406 return RISCV::VRRegClassID;
2408 return RISCV::VRM2RegClassID;
2410 return RISCV::VRM4RegClassID;
2412 return RISCV::VRM8RegClassID;
2413 }
2414}
2415
2416unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2417 RISCVII::VLMUL LMUL = getLMUL(VT);
2418 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2419 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2420 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2421 LMUL == RISCVII::VLMUL::LMUL_1) {
2422 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm1_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2427 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm2_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2432 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm4_0 + Index;
2435 }
2436 llvm_unreachable("Invalid vector type.");
2437}
2438
2440 if (VT.isRISCVVectorTuple()) {
2441 unsigned NF = VT.getRISCVVectorTupleNumFields();
2442 unsigned RegsPerField =
2443 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2444 (NF * RISCV::RVVBitsPerBlock));
2445 switch (RegsPerField) {
2446 case 1:
2447 if (NF == 2)
2448 return RISCV::VRN2M1RegClassID;
2449 if (NF == 3)
2450 return RISCV::VRN3M1RegClassID;
2451 if (NF == 4)
2452 return RISCV::VRN4M1RegClassID;
2453 if (NF == 5)
2454 return RISCV::VRN5M1RegClassID;
2455 if (NF == 6)
2456 return RISCV::VRN6M1RegClassID;
2457 if (NF == 7)
2458 return RISCV::VRN7M1RegClassID;
2459 if (NF == 8)
2460 return RISCV::VRN8M1RegClassID;
2461 break;
2462 case 2:
2463 if (NF == 2)
2464 return RISCV::VRN2M2RegClassID;
2465 if (NF == 3)
2466 return RISCV::VRN3M2RegClassID;
2467 if (NF == 4)
2468 return RISCV::VRN4M2RegClassID;
2469 break;
2470 case 4:
2471 assert(NF == 2);
2472 return RISCV::VRN2M4RegClassID;
2473 default:
2474 break;
2475 }
2476 llvm_unreachable("Invalid vector tuple type RegClass.");
2477 }
2478
2479 if (VT.getVectorElementType() == MVT::i1)
2480 return RISCV::VRRegClassID;
2481 return getRegClassIDForLMUL(getLMUL(VT));
2482}
2483
2484// Attempt to decompose a subvector insert/extract between VecVT and
2485// SubVecVT via subregister indices. Returns the subregister index that
2486// can perform the subvector insert/extract with the given element index, as
2487// well as the index corresponding to any leftover subvectors that must be
2488// further inserted/extracted within the register class for SubVecVT.
2489std::pair<unsigned, unsigned>
2491 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2492 const RISCVRegisterInfo *TRI) {
2493 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2494 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2495 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2496 "Register classes not ordered");
2497 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2498 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2499
2500 // If VecVT is a vector tuple type, either it's the tuple type with same
2501 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2502 if (VecVT.isRISCVVectorTuple()) {
2503 if (VecRegClassID == SubRegClassID)
2504 return {RISCV::NoSubRegister, 0};
2505
2506 assert(SubVecVT.isScalableVector() &&
2507 "Only allow scalable vector subvector.");
2508 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2509 "Invalid vector tuple insert/extract for vector and subvector with "
2510 "different LMUL.");
2511 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2512 }
2513
2514 // Try to compose a subregister index that takes us from the incoming
2515 // LMUL>1 register class down to the outgoing one. At each step we half
2516 // the LMUL:
2517 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2518 // Note that this is not guaranteed to find a subregister index, such as
2519 // when we are extracting from one VR type to another.
2520 unsigned SubRegIdx = RISCV::NoSubRegister;
2521 for (const unsigned RCID :
2522 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2523 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2524 VecVT = VecVT.getHalfNumVectorElementsVT();
2525 bool IsHi =
2526 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2527 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2528 getSubregIndexByMVT(VecVT, IsHi));
2529 if (IsHi)
2530 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2531 }
2532 return {SubRegIdx, InsertExtractIdx};
2533}
2534
2535// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2536// stores for those types.
2537bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2538 return !Subtarget.useRVVForFixedLengthVectors() ||
2539 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2540}
2541
2543 if (!ScalarTy.isSimple())
2544 return false;
2545 switch (ScalarTy.getSimpleVT().SimpleTy) {
2546 case MVT::iPTR:
2547 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2548 case MVT::i8:
2549 case MVT::i16:
2550 case MVT::i32:
2551 return true;
2552 case MVT::i64:
2553 return Subtarget.hasVInstructionsI64();
2554 case MVT::f16:
2555 return Subtarget.hasVInstructionsF16Minimal();
2556 case MVT::bf16:
2557 return Subtarget.hasVInstructionsBF16Minimal();
2558 case MVT::f32:
2559 return Subtarget.hasVInstructionsF32();
2560 case MVT::f64:
2561 return Subtarget.hasVInstructionsF64();
2562 default:
2563 return false;
2564 }
2565}
2566
2567
2568unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2569 return NumRepeatedDivisors;
2570}
2571
2573 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2574 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2575 "Unexpected opcode");
2576 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2577 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2579 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2580 if (!II)
2581 return SDValue();
2582 return Op.getOperand(II->VLOperand + 1 + HasChain);
2583}
2584
2586 const RISCVSubtarget &Subtarget) {
2587 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2588 if (!Subtarget.useRVVForFixedLengthVectors())
2589 return false;
2590
2591 // We only support a set of vector types with a consistent maximum fixed size
2592 // across all supported vector element types to avoid legalization issues.
2593 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2594 // fixed-length vector type we support is 1024 bytes.
2595 if (VT.getFixedSizeInBits() > 1024 * 8)
2596 return false;
2597
2598 unsigned MinVLen = Subtarget.getRealMinVLen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601
2602 // Don't use RVV for vectors we cannot scalarize if required.
2603 switch (EltVT.SimpleTy) {
2604 // i1 is supported but has different rules.
2605 default:
2606 return false;
2607 case MVT::i1:
2608 // Masks can only use a single register.
2609 if (VT.getVectorNumElements() > MinVLen)
2610 return false;
2611 MinVLen /= 8;
2612 break;
2613 case MVT::i8:
2614 case MVT::i16:
2615 case MVT::i32:
2616 break;
2617 case MVT::i64:
2618 if (!Subtarget.hasVInstructionsI64())
2619 return false;
2620 break;
2621 case MVT::f16:
2622 if (!Subtarget.hasVInstructionsF16Minimal())
2623 return false;
2624 break;
2625 case MVT::bf16:
2626 if (!Subtarget.hasVInstructionsBF16Minimal())
2627 return false;
2628 break;
2629 case MVT::f32:
2630 if (!Subtarget.hasVInstructionsF32())
2631 return false;
2632 break;
2633 case MVT::f64:
2634 if (!Subtarget.hasVInstructionsF64())
2635 return false;
2636 break;
2637 }
2638
2639 // Reject elements larger than ELEN.
2640 if (EltVT.getSizeInBits() > Subtarget.getELen())
2641 return false;
2642
2643 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2644 // Don't use RVV for types that don't fit.
2645 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2646 return false;
2647
2648 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2649 // the base fixed length RVV support in place.
2650 if (!VT.isPow2VectorType())
2651 return false;
2652
2653 return true;
2654}
2655
2656bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2657 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2658}
2659
2660// Return the largest legal scalable vector type that matches VT's element type.
2662 const RISCVSubtarget &Subtarget) {
2663 // This may be called before legal types are setup.
2664 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2665 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2666 "Expected legal fixed length vector!");
2667
2668 unsigned MinVLen = Subtarget.getRealMinVLen();
2669 unsigned MaxELen = Subtarget.getELen();
2670
2671 MVT EltVT = VT.getVectorElementType();
2672 switch (EltVT.SimpleTy) {
2673 default:
2674 llvm_unreachable("unexpected element type for RVV container");
2675 case MVT::i1:
2676 case MVT::i8:
2677 case MVT::i16:
2678 case MVT::i32:
2679 case MVT::i64:
2680 case MVT::bf16:
2681 case MVT::f16:
2682 case MVT::f32:
2683 case MVT::f64: {
2684 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2685 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2686 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2687 unsigned NumElts =
2689 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2690 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2691 return MVT::getScalableVectorVT(EltVT, NumElts);
2692 }
2693 }
2694}
2695
2697 const RISCVSubtarget &Subtarget) {
2699 Subtarget);
2700}
2701
2703 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2704}
2705
2706// Grow V to consume an entire RVV register.
2708 const RISCVSubtarget &Subtarget) {
2709 assert(VT.isScalableVector() &&
2710 "Expected to convert into a scalable vector!");
2711 assert(V.getValueType().isFixedLengthVector() &&
2712 "Expected a fixed length vector operand!");
2713 SDLoc DL(V);
2714 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2715 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2716}
2717
2718// Shrink V so it's just big enough to maintain a VT's worth of data.
2720 const RISCVSubtarget &Subtarget) {
2722 "Expected to convert into a fixed length vector!");
2723 assert(V.getValueType().isScalableVector() &&
2724 "Expected a scalable vector operand!");
2725 SDLoc DL(V);
2726 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2727 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2728}
2729
2730/// Return the type of the mask type suitable for masking the provided
2731/// vector type. This is simply an i1 element type vector of the same
2732/// (possibly scalable) length.
2733static MVT getMaskTypeFor(MVT VecVT) {
2734 assert(VecVT.isVector());
2736 return MVT::getVectorVT(MVT::i1, EC);
2737}
2738
2739/// Creates an all ones mask suitable for masking a vector of type VecTy with
2740/// vector length VL. .
2741static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2742 SelectionDAG &DAG) {
2743 MVT MaskVT = getMaskTypeFor(VecVT);
2744 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2745}
2746
2747static std::pair<SDValue, SDValue>
2749 const RISCVSubtarget &Subtarget) {
2750 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2751 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2752 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2753 return {Mask, VL};
2754}
2755
2756static std::pair<SDValue, SDValue>
2757getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2758 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2761 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2762 return {Mask, VL};
2763}
2764
2765// Gets the two common "VL" operands: an all-ones mask and the vector length.
2766// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2767// the vector type that the fixed-length vector is contained in. Otherwise if
2768// VecVT is scalable, then ContainerVT should be the same as VecVT.
2769static std::pair<SDValue, SDValue>
2770getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2771 const RISCVSubtarget &Subtarget) {
2772 if (VecVT.isFixedLengthVector())
2773 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2774 Subtarget);
2775 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2776 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2777}
2778
2780 SelectionDAG &DAG) const {
2781 assert(VecVT.isScalableVector() && "Expected scalable vector");
2782 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2783 VecVT.getVectorElementCount());
2784}
2785
2786std::pair<unsigned, unsigned>
2788 const RISCVSubtarget &Subtarget) {
2789 assert(VecVT.isScalableVector() && "Expected scalable vector");
2790
2791 unsigned EltSize = VecVT.getScalarSizeInBits();
2792 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2793
2794 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2795 unsigned MaxVLMAX =
2796 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2797
2798 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2799 unsigned MinVLMAX =
2800 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2801
2802 return std::make_pair(MinVLMAX, MaxVLMAX);
2803}
2804
2805// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2806// of either is (currently) supported. This can get us into an infinite loop
2807// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2808// as a ..., etc.
2809// Until either (or both) of these can reliably lower any node, reporting that
2810// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2811// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2812// which is not desirable.
2814 EVT VT, unsigned DefinedValues) const {
2815 return false;
2816}
2817
2819 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2820 // implementation-defined.
2821 if (!VT.isVector())
2823 unsigned DLenFactor = Subtarget.getDLenFactor();
2824 unsigned Cost;
2825 if (VT.isScalableVector()) {
2826 unsigned LMul;
2827 bool Fractional;
2828 std::tie(LMul, Fractional) =
2830 if (Fractional)
2831 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2832 else
2833 Cost = (LMul * DLenFactor);
2834 } else {
2835 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2836 }
2837 return Cost;
2838}
2839
2840
2841/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2842/// is generally quadratic in the number of vreg implied by LMUL. Note that
2843/// operand (index and possibly mask) are handled separately.
2845 return getLMULCost(VT) * getLMULCost(VT);
2846}
2847
2848/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2849/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2855/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2856/// for the type VT. (This does not cover the vslide1up or vslide1down
2857/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2860 return getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2868 return getLMULCost(VT);
2869}
2870
2872 const RISCVSubtarget &Subtarget) {
2873 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2874 // bf16 conversions are always promoted to f32.
2875 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2876 Op.getValueType() == MVT::bf16) {
2877 bool IsStrict = Op->isStrictFPOpcode();
2878
2879 SDLoc DL(Op);
2880 if (IsStrict) {
2881 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2882 {Op.getOperand(0), Op.getOperand(1)});
2883 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2884 {Op.getValueType(), MVT::Other},
2885 {Val.getValue(1), Val.getValue(0),
2886 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2887 }
2888 return DAG.getNode(
2889 ISD::FP_ROUND, DL, Op.getValueType(),
2890 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2892 }
2893
2894 // Other operations are legal.
2895 return Op;
2896}
2897
2899 const RISCVSubtarget &Subtarget) {
2900 // RISC-V FP-to-int conversions saturate to the destination register size, but
2901 // don't produce 0 for nan. We can use a conversion instruction and fix the
2902 // nan case with a compare and a select.
2903 SDValue Src = Op.getOperand(0);
2904
2905 MVT DstVT = Op.getSimpleValueType();
2906 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2907
2908 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2909
2910 if (!DstVT.isVector()) {
2911 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2912 // the result.
2913 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2914 Src.getValueType() == MVT::bf16) {
2915 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2916 }
2917
2918 unsigned Opc;
2919 if (SatVT == DstVT)
2920 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2921 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2923 else
2924 return SDValue();
2925 // FIXME: Support other SatVTs by clamping before or after the conversion.
2926
2927 SDLoc DL(Op);
2928 SDValue FpToInt = DAG.getNode(
2929 Opc, DL, DstVT, Src,
2931
2932 if (Opc == RISCVISD::FCVT_WU_RV64)
2933 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2934
2935 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2936 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2938 }
2939
2940 // Vectors.
2941
2942 MVT DstEltVT = DstVT.getVectorElementType();
2943 MVT SrcVT = Src.getSimpleValueType();
2944 MVT SrcEltVT = SrcVT.getVectorElementType();
2945 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2946 unsigned DstEltSize = DstEltVT.getSizeInBits();
2947
2948 // Only handle saturating to the destination type.
2949 if (SatVT != DstEltVT)
2950 return SDValue();
2951
2952 MVT DstContainerVT = DstVT;
2953 MVT SrcContainerVT = SrcVT;
2954 if (DstVT.isFixedLengthVector()) {
2955 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2956 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2957 assert(DstContainerVT.getVectorElementCount() ==
2958 SrcContainerVT.getVectorElementCount() &&
2959 "Expected same element count");
2960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2961 }
2962
2963 SDLoc DL(Op);
2964
2965 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2966
2967 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2968 {Src, Src, DAG.getCondCode(ISD::SETNE),
2969 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2970
2971 // Need to widen by more than 1 step, promote the FP type, then do a widening
2972 // convert.
2973 if (DstEltSize > (2 * SrcEltSize)) {
2974 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2975 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2976 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2977 }
2978
2979 MVT CvtContainerVT = DstContainerVT;
2980 MVT CvtEltVT = DstEltVT;
2981 if (SrcEltSize > (2 * DstEltSize)) {
2982 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2983 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2984 }
2985
2986 unsigned RVVOpc =
2988 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2989
2990 while (CvtContainerVT != DstContainerVT) {
2991 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2992 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2993 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2994 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2996 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2997 }
2998
2999 SDValue SplatZero = DAG.getNode(
3000 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3001 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3002 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3003 Res, DAG.getUNDEF(DstContainerVT), VL);
3004
3005 if (DstVT.isFixedLengthVector())
3006 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3007
3008 return Res;
3009}
3010
3012 const RISCVSubtarget &Subtarget) {
3013 bool IsStrict = Op->isStrictFPOpcode();
3014 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3015
3016 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3017 // bf16 conversions are always promoted to f32.
3018 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3019 SrcVal.getValueType() == MVT::bf16) {
3020 SDLoc DL(Op);
3021 if (IsStrict) {
3022 SDValue Ext =
3023 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3024 {Op.getOperand(0), SrcVal});
3025 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3026 {Ext.getValue(1), Ext.getValue(0)});
3027 }
3028 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3029 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3030 }
3031
3032 // Other operations are legal.
3033 return Op;
3034}
3035
3037 switch (Opc) {
3038 case ISD::FROUNDEVEN:
3040 case ISD::VP_FROUNDEVEN:
3041 return RISCVFPRndMode::RNE;
3042 case ISD::FTRUNC:
3043 case ISD::STRICT_FTRUNC:
3044 case ISD::VP_FROUNDTOZERO:
3045 return RISCVFPRndMode::RTZ;
3046 case ISD::FFLOOR:
3047 case ISD::STRICT_FFLOOR:
3048 case ISD::VP_FFLOOR:
3049 return RISCVFPRndMode::RDN;
3050 case ISD::FCEIL:
3051 case ISD::STRICT_FCEIL:
3052 case ISD::VP_FCEIL:
3053 return RISCVFPRndMode::RUP;
3054 case ISD::FROUND:
3055 case ISD::STRICT_FROUND:
3056 case ISD::VP_FROUND:
3057 return RISCVFPRndMode::RMM;
3058 case ISD::FRINT:
3059 case ISD::VP_FRINT:
3060 return RISCVFPRndMode::DYN;
3061 }
3062
3064}
3065
3066// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3067// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3068// the integer domain and back. Taking care to avoid converting values that are
3069// nan or already correct.
3070static SDValue
3072 const RISCVSubtarget &Subtarget) {
3073 MVT VT = Op.getSimpleValueType();
3074 assert(VT.isVector() && "Unexpected type");
3075
3076 SDLoc DL(Op);
3077
3078 SDValue Src = Op.getOperand(0);
3079
3080 MVT ContainerVT = VT;
3081 if (VT.isFixedLengthVector()) {
3082 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3083 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3084 }
3085
3086 SDValue Mask, VL;
3087 if (Op->isVPOpcode()) {
3088 Mask = Op.getOperand(1);
3089 if (VT.isFixedLengthVector())
3090 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3091 Subtarget);
3092 VL = Op.getOperand(2);
3093 } else {
3094 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3095 }
3096
3097 // Freeze the source since we are increasing the number of uses.
3098 Src = DAG.getFreeze(Src);
3099
3100 // We do the conversion on the absolute value and fix the sign at the end.
3101 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3102
3103 // Determine the largest integer that can be represented exactly. This and
3104 // values larger than it don't have any fractional bits so don't need to
3105 // be converted.
3106 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3107 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3108 APFloat MaxVal = APFloat(FltSem);
3109 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3110 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3111 SDValue MaxValNode =
3112 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3113 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3114 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3115
3116 // If abs(Src) was larger than MaxVal or nan, keep it.
3117 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3118 Mask =
3119 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3120 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3121 Mask, Mask, VL});
3122
3123 // Truncate to integer and convert back to FP.
3124 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3125 MVT XLenVT = Subtarget.getXLenVT();
3126 SDValue Truncated;
3127
3128 switch (Op.getOpcode()) {
3129 default:
3130 llvm_unreachable("Unexpected opcode");
3131 case ISD::FRINT:
3132 case ISD::VP_FRINT:
3133 case ISD::FCEIL:
3134 case ISD::VP_FCEIL:
3135 case ISD::FFLOOR:
3136 case ISD::VP_FFLOOR:
3137 case ISD::FROUND:
3138 case ISD::FROUNDEVEN:
3139 case ISD::VP_FROUND:
3140 case ISD::VP_FROUNDEVEN:
3141 case ISD::VP_FROUNDTOZERO: {
3144 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3145 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3146 break;
3147 }
3148 case ISD::FTRUNC:
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3150 Mask, VL);
3151 break;
3152 case ISD::FNEARBYINT:
3153 case ISD::VP_FNEARBYINT:
3154 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3155 Mask, VL);
3156 break;
3157 }
3158
3159 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3160 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3161 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3162 Mask, VL);
3163
3164 // Restore the original sign so that -0.0 is preserved.
3165 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3166 Src, Src, Mask, VL);
3167
3168 if (!VT.isFixedLengthVector())
3169 return Truncated;
3170
3171 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3172}
3173
3174// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3175// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3176// qNan and coverting the new source to integer and back to FP.
3177static SDValue
3179 const RISCVSubtarget &Subtarget) {
3180 SDLoc DL(Op);
3181 MVT VT = Op.getSimpleValueType();
3182 SDValue Chain = Op.getOperand(0);
3183 SDValue Src = Op.getOperand(1);
3184
3185 MVT ContainerVT = VT;
3186 if (VT.isFixedLengthVector()) {
3187 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3188 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3189 }
3190
3191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3192
3193 // Freeze the source since we are increasing the number of uses.
3194 Src = DAG.getFreeze(Src);
3195
3196 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3197 MVT MaskVT = Mask.getSimpleValueType();
3199 DAG.getVTList(MaskVT, MVT::Other),
3200 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3201 DAG.getUNDEF(MaskVT), Mask, VL});
3202 Chain = Unorder.getValue(1);
3204 DAG.getVTList(ContainerVT, MVT::Other),
3205 {Chain, Src, Src, Src, Unorder, VL});
3206 Chain = Src.getValue(1);
3207
3208 // We do the conversion on the absolute value and fix the sign at the end.
3209 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3210
3211 // Determine the largest integer that can be represented exactly. This and
3212 // values larger than it don't have any fractional bits so don't need to
3213 // be converted.
3214 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3215 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3216 APFloat MaxVal = APFloat(FltSem);
3217 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3218 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3219 SDValue MaxValNode =
3220 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3221 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3222 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3223
3224 // If abs(Src) was larger than MaxVal or nan, keep it.
3225 Mask = DAG.getNode(
3226 RISCVISD::SETCC_VL, DL, MaskVT,
3227 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3228
3229 // Truncate to integer and convert back to FP.
3230 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3231 MVT XLenVT = Subtarget.getXLenVT();
3232 SDValue Truncated;
3233
3234 switch (Op.getOpcode()) {
3235 default:
3236 llvm_unreachable("Unexpected opcode");
3237 case ISD::STRICT_FCEIL:
3238 case ISD::STRICT_FFLOOR:
3239 case ISD::STRICT_FROUND:
3243 Truncated = DAG.getNode(
3244 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3245 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3246 break;
3247 }
3248 case ISD::STRICT_FTRUNC:
3249 Truncated =
3251 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3252 break;
3255 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3256 Mask, VL);
3257 break;
3258 }
3259 Chain = Truncated.getValue(1);
3260
3261 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3262 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3263 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3264 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3265 Truncated, Mask, VL);
3266 Chain = Truncated.getValue(1);
3267 }
3268
3269 // Restore the original sign so that -0.0 is preserved.
3270 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3271 Src, Src, Mask, VL);
3272
3273 if (VT.isFixedLengthVector())
3274 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3275 return DAG.getMergeValues({Truncated, Chain}, DL);
3276}
3277
3278static SDValue
3280 const RISCVSubtarget &Subtarget) {
3281 MVT VT = Op.getSimpleValueType();
3282 if (VT.isVector())
3283 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3284
3285 if (DAG.shouldOptForSize())
3286 return SDValue();
3287
3288 SDLoc DL(Op);
3289 SDValue Src = Op.getOperand(0);
3290
3291 // Create an integer the size of the mantissa with the MSB set. This and all
3292 // values larger than it don't have any fractional bits so don't need to be
3293 // converted.
3294 const fltSemantics &FltSem = VT.getFltSemantics();
3295 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3296 APFloat MaxVal = APFloat(FltSem);
3297 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3298 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3299 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3300
3302 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3303 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3304}
3305
3306// Expand vector LRINT and LLRINT by converting to the integer domain.
3308 const RISCVSubtarget &Subtarget) {
3309 MVT VT = Op.getSimpleValueType();
3310 assert(VT.isVector() && "Unexpected type");
3311
3312 SDLoc DL(Op);
3313 SDValue Src = Op.getOperand(0);
3314 MVT ContainerVT = VT;
3315
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3322 SDValue Truncated = DAG.getNode(
3323 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3325 VL);
3326
3327 if (!VT.isFixedLengthVector())
3328 return Truncated;
3329
3330 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3331}
3332
3333static SDValue
3335 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3336 SDValue Offset, SDValue Mask, SDValue VL,
3338 if (Passthru.isUndef())
3340 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3341 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3342 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3343}
3344
3345static SDValue
3346getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3347 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3348 SDValue VL,
3350 if (Passthru.isUndef())
3352 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3353 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3354 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3355}
3356
3357static MVT getLMUL1VT(MVT VT) {
3359 "Unexpected vector MVT");
3363}
3364
3368 int64_t Addend;
3369};
3370
3371static std::optional<APInt> getExactInteger(const APFloat &APF,
3373 // We will use a SINT_TO_FP to materialize this constant so we should use a
3374 // signed APSInt here.
3375 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3376 // We use an arbitrary rounding mode here. If a floating-point is an exact
3377 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3378 // the rounding mode changes the output value, then it is not an exact
3379 // integer.
3381 bool IsExact;
3382 // If it is out of signed integer range, it will return an invalid operation.
3383 // If it is not an exact integer, IsExact is false.
3384 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3386 !IsExact)
3387 return std::nullopt;
3388 return ValInt.extractBits(BitWidth, 0);
3389}
3390
3391// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3392// to the (non-zero) step S and start value X. This can be then lowered as the
3393// RVV sequence (VID * S) + X, for example.
3394// The step S is represented as an integer numerator divided by a positive
3395// denominator. Note that the implementation currently only identifies
3396// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3397// cannot detect 2/3, for example.
3398// Note that this method will also match potentially unappealing index
3399// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3400// determine whether this is worth generating code for.
3401//
3402// EltSizeInBits is the size of the type that the sequence will be calculated
3403// in, i.e. SEW for build_vectors or XLEN for address calculations.
3404static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3405 unsigned EltSizeInBits) {
3406 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3407 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3408 return std::nullopt;
3409 bool IsInteger = Op.getValueType().isInteger();
3410
3411 std::optional<unsigned> SeqStepDenom;
3412 std::optional<APInt> SeqStepNum;
3413 std::optional<APInt> SeqAddend;
3414 std::optional<std::pair<APInt, unsigned>> PrevElt;
3415 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3416
3417 // First extract the ops into a list of constant integer values. This may not
3418 // be possible for floats if they're not all representable as integers.
3420 const unsigned OpSize = Op.getScalarValueSizeInBits();
3421 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3422 if (Elt.isUndef()) {
3423 Elts[Idx] = std::nullopt;
3424 continue;
3425 }
3426 if (IsInteger) {
3427 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3428 } else {
3429 auto ExactInteger =
3430 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3431 if (!ExactInteger)
3432 return std::nullopt;
3433 Elts[Idx] = *ExactInteger;
3434 }
3435 }
3436
3437 for (auto [Idx, Elt] : enumerate(Elts)) {
3438 // Assume undef elements match the sequence; we just have to be careful
3439 // when interpolating across them.
3440 if (!Elt)
3441 continue;
3442
3443 if (PrevElt) {
3444 // Calculate the step since the last non-undef element, and ensure
3445 // it's consistent across the entire sequence.
3446 unsigned IdxDiff = Idx - PrevElt->second;
3447 APInt ValDiff = *Elt - PrevElt->first;
3448
3449 // A zero-value value difference means that we're somewhere in the middle
3450 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3451 // step change before evaluating the sequence.
3452 if (ValDiff == 0)
3453 continue;
3454
3455 int64_t Remainder = ValDiff.srem(IdxDiff);
3456 // Normalize the step if it's greater than 1.
3457 if (Remainder != ValDiff.getSExtValue()) {
3458 // The difference must cleanly divide the element span.
3459 if (Remainder != 0)
3460 return std::nullopt;
3461 ValDiff = ValDiff.sdiv(IdxDiff);
3462 IdxDiff = 1;
3463 }
3464
3465 if (!SeqStepNum)
3466 SeqStepNum = ValDiff;
3467 else if (ValDiff != SeqStepNum)
3468 return std::nullopt;
3469
3470 if (!SeqStepDenom)
3471 SeqStepDenom = IdxDiff;
3472 else if (IdxDiff != *SeqStepDenom)
3473 return std::nullopt;
3474 }
3475
3476 // Record this non-undef element for later.
3477 if (!PrevElt || PrevElt->first != *Elt)
3478 PrevElt = std::make_pair(*Elt, Idx);
3479 }
3480
3481 // We need to have logged a step for this to count as a legal index sequence.
3482 if (!SeqStepNum || !SeqStepDenom)
3483 return std::nullopt;
3484
3485 // Loop back through the sequence and validate elements we might have skipped
3486 // while waiting for a valid step. While doing this, log any sequence addend.
3487 for (auto [Idx, Elt] : enumerate(Elts)) {
3488 if (!Elt)
3489 continue;
3490 APInt ExpectedVal =
3491 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3492 *SeqStepNum)
3493 .sdiv(*SeqStepDenom);
3494
3495 APInt Addend = *Elt - ExpectedVal;
3496 if (!SeqAddend)
3497 SeqAddend = Addend;
3498 else if (Addend != SeqAddend)
3499 return std::nullopt;
3500 }
3501
3502 assert(SeqAddend && "Must have an addend if we have a step");
3503
3504 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3505 SeqAddend->getSExtValue()};
3506}
3507
3508// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3509// and lower it as a VRGATHER_VX_VL from the source vector.
3510static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3511 SelectionDAG &DAG,
3512 const RISCVSubtarget &Subtarget) {
3513 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3514 return SDValue();
3515 SDValue Vec = SplatVal.getOperand(0);
3516 // Don't perform this optimization for i1 vectors, or if the element types are
3517 // different
3518 // FIXME: Support i1 vectors, maybe by promoting to i8?
3519 MVT EltTy = VT.getVectorElementType();
3520 if (EltTy == MVT::i1 ||
3522 return SDValue();
3523 SDValue Idx = SplatVal.getOperand(1);
3524 // The index must be a legal type.
3525 if (Idx.getValueType() != Subtarget.getXLenVT())
3526 return SDValue();
3527
3528 // Check that Index lies within VT
3529 // TODO: Can we check if the Index is constant and known in-bounds?
3531 return SDValue();
3532
3533 MVT ContainerVT = VT;
3534 if (VT.isFixedLengthVector())
3535 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3536
3537 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3538 DAG.getUNDEF(ContainerVT), Vec,
3539 DAG.getVectorIdxConstant(0, DL));
3540
3541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3542
3543 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3544 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3545
3546 if (!VT.isFixedLengthVector())
3547 return Gather;
3548
3549 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3550}
3551
3552/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3553/// which constitute a large proportion of the elements. In such cases we can
3554/// splat a vector with the dominant element and make up the shortfall with
3555/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3556/// Note that this includes vectors of 2 elements by association. The
3557/// upper-most element is the "dominant" one, allowing us to use a splat to
3558/// "insert" the upper element, and an insert of the lower element at position
3559/// 0, which improves codegen.
3561 const RISCVSubtarget &Subtarget) {
3562 MVT VT = Op.getSimpleValueType();
3563 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3564
3565 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3566
3567 SDLoc DL(Op);
3568 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3569
3570 MVT XLenVT = Subtarget.getXLenVT();
3571 unsigned NumElts = Op.getNumOperands();
3572
3573 SDValue DominantValue;
3574 unsigned MostCommonCount = 0;
3575 DenseMap<SDValue, unsigned> ValueCounts;
3576 unsigned NumUndefElts =
3577 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3578
3579 // Track the number of scalar loads we know we'd be inserting, estimated as
3580 // any non-zero floating-point constant. Other kinds of element are either
3581 // already in registers or are materialized on demand. The threshold at which
3582 // a vector load is more desirable than several scalar materializion and
3583 // vector-insertion instructions is not known.
3584 unsigned NumScalarLoads = 0;
3585
3586 for (SDValue V : Op->op_values()) {
3587 if (V.isUndef())
3588 continue;
3589
3590 unsigned &Count = ValueCounts[V];
3591 if (0 == Count)
3592 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3593 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3594
3595 // Is this value dominant? In case of a tie, prefer the highest element as
3596 // it's cheaper to insert near the beginning of a vector than it is at the
3597 // end.
3598 if (++Count >= MostCommonCount) {
3599 DominantValue = V;
3600 MostCommonCount = Count;
3601 }
3602 }
3603
3604 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3605 unsigned NumDefElts = NumElts - NumUndefElts;
3606 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3607
3608 // Don't perform this optimization when optimizing for size, since
3609 // materializing elements and inserting them tends to cause code bloat.
3610 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3611 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3612 ((MostCommonCount > DominantValueCountThreshold) ||
3613 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3614 // Start by splatting the most common element.
3615 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3616
3617 DenseSet<SDValue> Processed{DominantValue};
3618
3619 // We can handle an insert into the last element (of a splat) via
3620 // v(f)slide1down. This is slightly better than the vslideup insert
3621 // lowering as it avoids the need for a vector group temporary. It
3622 // is also better than using vmerge.vx as it avoids the need to
3623 // materialize the mask in a vector register.
3624 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3625 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3626 LastOp != DominantValue) {
3627 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3628 auto OpCode =
3630 if (!VT.isFloatingPoint())
3631 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3632 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3633 LastOp, Mask, VL);
3634 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3635 Processed.insert(LastOp);
3636 }
3637
3638 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3639 for (const auto &OpIdx : enumerate(Op->ops())) {
3640 const SDValue &V = OpIdx.value();
3641 if (V.isUndef() || !Processed.insert(V).second)
3642 continue;
3643 if (ValueCounts[V] == 1) {
3644 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3645 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3646 } else {
3647 // Blend in all instances of this value using a VSELECT, using a
3648 // mask where each bit signals whether that element is the one
3649 // we're after.
3651 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3652 return DAG.getConstant(V == V1, DL, XLenVT);
3653 });
3654 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3655 DAG.getBuildVector(SelMaskTy, DL, Ops),
3656 DAG.getSplatBuildVector(VT, DL, V), Vec);
3657 }
3658 }
3659
3660 return Vec;
3661 }
3662
3663 return SDValue();
3664}
3665
3667 const RISCVSubtarget &Subtarget) {
3668 MVT VT = Op.getSimpleValueType();
3669 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3670
3671 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3672
3673 SDLoc DL(Op);
3674 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3675
3676 MVT XLenVT = Subtarget.getXLenVT();
3677 unsigned NumElts = Op.getNumOperands();
3678
3679 if (VT.getVectorElementType() == MVT::i1) {
3680 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3681 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3682 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3683 }
3684
3685 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3686 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3687 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3688 }
3689
3690 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3691 // scalar integer chunks whose bit-width depends on the number of mask
3692 // bits and XLEN.
3693 // First, determine the most appropriate scalar integer type to use. This
3694 // is at most XLenVT, but may be shrunk to a smaller vector element type
3695 // according to the size of the final vector - use i8 chunks rather than
3696 // XLenVT if we're producing a v8i1. This results in more consistent
3697 // codegen across RV32 and RV64.
3698 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3699 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3700 // If we have to use more than one INSERT_VECTOR_ELT then this
3701 // optimization is likely to increase code size; avoid peforming it in
3702 // such a case. We can use a load from a constant pool in this case.
3703 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3704 return SDValue();
3705 // Now we can create our integer vector type. Note that it may be larger
3706 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3707 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3708 MVT IntegerViaVecVT =
3709 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3710 IntegerViaVecElts);
3711
3712 uint64_t Bits = 0;
3713 unsigned BitPos = 0, IntegerEltIdx = 0;
3714 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3715
3716 for (unsigned I = 0; I < NumElts;) {
3717 SDValue V = Op.getOperand(I);
3718 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3719 Bits |= ((uint64_t)BitValue << BitPos);
3720 ++BitPos;
3721 ++I;
3722
3723 // Once we accumulate enough bits to fill our scalar type or process the
3724 // last element, insert into our vector and clear our accumulated data.
3725 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3726 if (NumViaIntegerBits <= 32)
3727 Bits = SignExtend64<32>(Bits);
3728 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3729 Elts[IntegerEltIdx] = Elt;
3730 Bits = 0;
3731 BitPos = 0;
3732 IntegerEltIdx++;
3733 }
3734 }
3735
3736 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3737
3738 if (NumElts < NumViaIntegerBits) {
3739 // If we're producing a smaller vector than our minimum legal integer
3740 // type, bitcast to the equivalent (known-legal) mask type, and extract
3741 // our final mask.
3742 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3743 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3744 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3745 DAG.getConstant(0, DL, XLenVT));
3746 } else {
3747 // Else we must have produced an integer type with the same size as the
3748 // mask type; bitcast for the final result.
3749 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3750 Vec = DAG.getBitcast(VT, Vec);
3751 }
3752
3753 return Vec;
3754 }
3755
3756 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3757 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3759 if (!VT.isFloatingPoint())
3760 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3761 Splat =
3762 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3763 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3764 }
3765
3766 // Try and match index sequences, which we can lower to the vid instruction
3767 // with optional modifications. An all-undef vector is matched by
3768 // getSplatValue, above.
3769 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3770 int64_t StepNumerator = SimpleVID->StepNumerator;
3771 unsigned StepDenominator = SimpleVID->StepDenominator;
3772 int64_t Addend = SimpleVID->Addend;
3773
3774 assert(StepNumerator != 0 && "Invalid step");
3775 bool Negate = false;
3776 int64_t SplatStepVal = StepNumerator;
3777 unsigned StepOpcode = ISD::MUL;
3778 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3779 // anyway as the shift of 63 won't fit in uimm5.
3780 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3781 isPowerOf2_64(std::abs(StepNumerator))) {
3782 Negate = StepNumerator < 0;
3783 StepOpcode = ISD::SHL;
3784 SplatStepVal = Log2_64(std::abs(StepNumerator));
3785 }
3786
3787 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3788 // threshold since it's the immediate value many RVV instructions accept.
3789 // There is no vmul.vi instruction so ensure multiply constant can fit in
3790 // a single addi instruction.
3791 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3792 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3793 isPowerOf2_32(StepDenominator) &&
3794 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3795 MVT VIDVT =
3797 MVT VIDContainerVT =
3798 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3799 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3800 // Convert right out of the scalable type so we can use standard ISD
3801 // nodes for the rest of the computation. If we used scalable types with
3802 // these, we'd lose the fixed-length vector info and generate worse
3803 // vsetvli code.
3804 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3805 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3806 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3807 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3808 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3809 }
3810 if (StepDenominator != 1) {
3811 SDValue SplatStep =
3812 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3813 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3814 }
3815 if (Addend != 0 || Negate) {
3816 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3817 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3818 VID);
3819 }
3820 if (VT.isFloatingPoint()) {
3821 // TODO: Use vfwcvt to reduce register pressure.
3822 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3823 }
3824 return VID;
3825 }
3826 }
3827
3828 // For very small build_vectors, use a single scalar insert of a constant.
3829 // TODO: Base this on constant rematerialization cost, not size.
3830 const unsigned EltBitSize = VT.getScalarSizeInBits();
3831 if (VT.getSizeInBits() <= 32 &&
3833 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3834 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3835 "Unexpected sequence type");
3836 // If we can use the original VL with the modified element type, this
3837 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3838 // be moved into InsertVSETVLI?
3839 unsigned ViaVecLen =
3840 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3841 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3842
3843 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3844 uint64_t SplatValue = 0;
3845 // Construct the amalgamated value at this larger vector type.
3846 for (const auto &OpIdx : enumerate(Op->op_values())) {
3847 const auto &SeqV = OpIdx.value();
3848 if (!SeqV.isUndef())
3849 SplatValue |=
3850 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3851 }
3852
3853 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3854 // achieve better constant materializion.
3855 // On RV32, we need to sign-extend to use getSignedConstant.
3856 if (ViaIntVT == MVT::i32)
3857 SplatValue = SignExtend64<32>(SplatValue);
3858
3859 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3860 DAG.getUNDEF(ViaVecVT),
3861 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3862 DAG.getVectorIdxConstant(0, DL));
3863 if (ViaVecLen != 1)
3865 MVT::getVectorVT(ViaIntVT, 1), Vec,
3866 DAG.getConstant(0, DL, XLenVT));
3867 return DAG.getBitcast(VT, Vec);
3868 }
3869
3870
3871 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3872 // when re-interpreted as a vector with a larger element type. For example,
3873 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3874 // could be instead splat as
3875 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3876 // TODO: This optimization could also work on non-constant splats, but it
3877 // would require bit-manipulation instructions to construct the splat value.
3878 SmallVector<SDValue> Sequence;
3879 const auto *BV = cast<BuildVectorSDNode>(Op);
3880 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3882 BV->getRepeatedSequence(Sequence) &&
3883 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3884 unsigned SeqLen = Sequence.size();
3885 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3886 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3887 ViaIntVT == MVT::i64) &&
3888 "Unexpected sequence type");
3889
3890 // If we can use the original VL with the modified element type, this
3891 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3892 // be moved into InsertVSETVLI?
3893 const unsigned RequiredVL = NumElts / SeqLen;
3894 const unsigned ViaVecLen =
3895 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3896 NumElts : RequiredVL;
3897 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3898
3899 unsigned EltIdx = 0;
3900 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3901 uint64_t SplatValue = 0;
3902 // Construct the amalgamated value which can be splatted as this larger
3903 // vector type.
3904 for (const auto &SeqV : Sequence) {
3905 if (!SeqV.isUndef())
3906 SplatValue |=
3907 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3908 EltIdx++;
3909 }
3910
3911 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3912 // achieve better constant materializion.
3913 // On RV32, we need to sign-extend to use getSignedConstant.
3914 if (ViaIntVT == MVT::i32)
3915 SplatValue = SignExtend64<32>(SplatValue);
3916
3917 // Since we can't introduce illegal i64 types at this stage, we can only
3918 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3919 // way we can use RVV instructions to splat.
3920 assert((ViaIntVT.bitsLE(XLenVT) ||
3921 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3922 "Unexpected bitcast sequence");
3923 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3924 SDValue ViaVL =
3925 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3926 MVT ViaContainerVT =
3927 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3928 SDValue Splat =
3929 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3930 DAG.getUNDEF(ViaContainerVT),
3931 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3932 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3933 if (ViaVecLen != RequiredVL)
3935 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3936 DAG.getConstant(0, DL, XLenVT));
3937 return DAG.getBitcast(VT, Splat);
3938 }
3939 }
3940
3941 // If the number of signbits allows, see if we can lower as a <N x i8>.
3942 // Our main goal here is to reduce LMUL (and thus work) required to
3943 // build the constant, but we will also narrow if the resulting
3944 // narrow vector is known to materialize cheaply.
3945 // TODO: We really should be costing the smaller vector. There are
3946 // profitable cases this misses.
3947 if (EltBitSize > 8 && VT.isInteger() &&
3948 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3949 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3950 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3951 DL, Op->ops());
3952 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3953 Source, DAG, Subtarget);
3954 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3955 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3956 }
3957
3958 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3959 return Res;
3960
3961 // For constant vectors, use generic constant pool lowering. Otherwise,
3962 // we'd have to materialize constants in GPRs just to move them into the
3963 // vector.
3964 return SDValue();
3965}
3966
3967static unsigned getPACKOpcode(unsigned DestBW,
3968 const RISCVSubtarget &Subtarget) {
3969 switch (DestBW) {
3970 default:
3971 llvm_unreachable("Unsupported pack size");
3972 case 16:
3973 return RISCV::PACKH;
3974 case 32:
3975 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3976 case 64:
3977 assert(Subtarget.is64Bit());
3978 return RISCV::PACK;
3979 }
3980}
3981
3982/// Double the element size of the build vector to reduce the number
3983/// of vslide1down in the build vector chain. In the worst case, this
3984/// trades three scalar operations for 1 vector operation. Scalar
3985/// operations are generally lower latency, and for out-of-order cores
3986/// we also benefit from additional parallelism.
3988 const RISCVSubtarget &Subtarget) {
3989 SDLoc DL(Op);
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992 MVT ElemVT = VT.getVectorElementType();
3993 if (!ElemVT.isInteger())
3994 return SDValue();
3995
3996 // TODO: Relax these architectural restrictions, possibly with costing
3997 // of the actual instructions required.
3998 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3999 return SDValue();
4000
4001 unsigned NumElts = VT.getVectorNumElements();
4002 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4003 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4004 NumElts % 2 != 0)
4005 return SDValue();
4006
4007 // Produce [B,A] packed into a type twice as wide. Note that all
4008 // scalars are XLenVT, possibly masked (see below).
4009 MVT XLenVT = Subtarget.getXLenVT();
4010 SDValue Mask = DAG.getConstant(
4011 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4012 auto pack = [&](SDValue A, SDValue B) {
4013 // Bias the scheduling of the inserted operations to near the
4014 // definition of the element - this tends to reduce register
4015 // pressure overall.
4016 SDLoc ElemDL(B);
4017 if (Subtarget.hasStdExtZbkb())
4018 // Note that we're relying on the high bits of the result being
4019 // don't care. For PACKW, the result is *sign* extended.
4020 return SDValue(
4021 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4022 ElemDL, XLenVT, A, B),
4023 0);
4024
4025 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4026 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4027 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4028 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4029 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4031 };
4032
4033 SmallVector<SDValue> NewOperands;
4034 NewOperands.reserve(NumElts / 2);
4035 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4036 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4037 assert(NumElts == NewOperands.size() * 2);
4038 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4039 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4040 return DAG.getNode(ISD::BITCAST, DL, VT,
4041 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4042}
4043
4045 const RISCVSubtarget &Subtarget) {
4046 MVT VT = Op.getSimpleValueType();
4047 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4048
4049 MVT EltVT = VT.getVectorElementType();
4050 MVT XLenVT = Subtarget.getXLenVT();
4051
4052 SDLoc DL(Op);
4053
4054 // Proper support for f16 requires Zvfh. bf16 always requires special
4055 // handling. We need to cast the scalar to integer and create an integer
4056 // build_vector.
4057 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4058 MVT IVT = VT.changeVectorElementType(MVT::i16);
4060 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4061 SDValue Elem = Op.getOperand(I);
4062 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4063 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4064 // Called by LegalizeDAG, we need to use XLenVT operations since we
4065 // can't create illegal types.
4066 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4067 // Manually constant fold so the integer build_vector can be lowered
4068 // better. Waiting for DAGCombine will be too late.
4069 APInt V =
4070 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4071 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4072 } else {
4073 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4074 }
4075 } else {
4076 // Called by scalar type legalizer, we can use i16.
4077 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4078 }
4079 }
4080 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4081 return DAG.getBitcast(VT, Res);
4082 }
4083
4084 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4086 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4087
4088 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4089
4090 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4091
4092 if (VT.getVectorElementType() == MVT::i1) {
4093 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4094 // vector type, we have a legal equivalently-sized i8 type, so we can use
4095 // that.
4096 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4097 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4098
4099 SDValue WideVec;
4100 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4101 // For a splat, perform a scalar truncate before creating the wider
4102 // vector.
4103 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4104 DAG.getConstant(1, DL, Splat.getValueType()));
4105 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4106 } else {
4107 SmallVector<SDValue, 8> Ops(Op->op_values());
4108 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4109 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4110 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4111 }
4112
4113 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4114 }
4115
4116 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4117 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4118 return Gather;
4119 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4121 if (!VT.isFloatingPoint())
4122 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4123 Splat =
4124 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4125 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4126 }
4127
4128 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4129 return Res;
4130
4131 // If we're compiling for an exact VLEN value, we can split our work per
4132 // register in the register group.
4133 if (const auto VLen = Subtarget.getRealVLen();
4134 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4135 MVT ElemVT = VT.getVectorElementType();
4136 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4137 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4138 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4139 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4140 assert(M1VT == getLMUL1VT(M1VT));
4141
4142 // The following semantically builds up a fixed length concat_vector
4143 // of the component build_vectors. We eagerly lower to scalable and
4144 // insert_subvector here to avoid DAG combining it back to a large
4145 // build_vector.
4146 SmallVector<SDValue> BuildVectorOps(Op->ops());
4147 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4148 SDValue Vec = DAG.getUNDEF(ContainerVT);
4149 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4150 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4151 SDValue SubBV =
4152 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4153 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4154 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4155 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4156 DAG.getVectorIdxConstant(InsertIdx, DL));
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159 }
4160
4161 // If we're about to resort to vslide1down (or stack usage), pack our
4162 // elements into the widest scalar type we can. This will force a VL/VTYPE
4163 // toggle, but reduces the critical path, the number of vslide1down ops
4164 // required, and possibly enables scalar folds of the values.
4165 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4166 return Res;
4167
4168 // For m1 vectors, if we have non-undef values in both halves of our vector,
4169 // split the vector into low and high halves, build them separately, then
4170 // use a vselect to combine them. For long vectors, this cuts the critical
4171 // path of the vslide1down sequence in half, and gives us an opportunity
4172 // to special case each half independently. Note that we don't change the
4173 // length of the sub-vectors here, so if both fallback to the generic
4174 // vslide1down path, we should be able to fold the vselect into the final
4175 // vslidedown (for the undef tail) for the first half w/ masking.
4176 unsigned NumElts = VT.getVectorNumElements();
4177 unsigned NumUndefElts =
4178 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4179 unsigned NumDefElts = NumElts - NumUndefElts;
4180 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4181 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4182 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4183 SmallVector<SDValue> MaskVals;
4184 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4185 SubVecAOps.reserve(NumElts);
4186 SubVecBOps.reserve(NumElts);
4187 for (unsigned i = 0; i < NumElts; i++) {
4188 SDValue Elem = Op->getOperand(i);
4189 if (i < NumElts / 2) {
4190 SubVecAOps.push_back(Elem);
4191 SubVecBOps.push_back(UndefElem);
4192 } else {
4193 SubVecAOps.push_back(UndefElem);
4194 SubVecBOps.push_back(Elem);
4195 }
4196 bool SelectMaskVal = (i < NumElts / 2);
4197 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4198 }
4199 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4200 MaskVals.size() == NumElts);
4201
4202 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4203 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4204 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4205 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4206 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4207 }
4208
4209 // Cap the cost at a value linear to the number of elements in the vector.
4210 // The default lowering is to use the stack. The vector store + scalar loads
4211 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4212 // being (at least) linear in LMUL. As a result, using the vslidedown
4213 // lowering for every element ends up being VL*LMUL..
4214 // TODO: Should we be directly costing the stack alternative? Doing so might
4215 // give us a more accurate upper bound.
4216 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4217
4218 // TODO: unify with TTI getSlideCost.
4219 InstructionCost PerSlideCost = 1;
4220 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4221 default: break;
4223 PerSlideCost = 2;
4224 break;
4226 PerSlideCost = 4;
4227 break;
4229 PerSlideCost = 8;
4230 break;
4231 }
4232
4233 // TODO: Should we be using the build instseq then cost + evaluate scheme
4234 // we use for integer constants here?
4235 unsigned UndefCount = 0;
4236 for (const SDValue &V : Op->ops()) {
4237 if (V.isUndef()) {
4238 UndefCount++;
4239 continue;
4240 }
4241 if (UndefCount) {
4242 LinearBudget -= PerSlideCost;
4243 UndefCount = 0;
4244 }
4245 LinearBudget -= PerSlideCost;
4246 }
4247 if (UndefCount) {
4248 LinearBudget -= PerSlideCost;
4249 }
4250
4251 if (LinearBudget < 0)
4252 return SDValue();
4253
4254 assert((!VT.isFloatingPoint() ||
4255 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4256 "Illegal type which will result in reserved encoding");
4257
4258 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4259
4260 SDValue Vec;
4261 UndefCount = 0;
4262 for (SDValue V : Op->ops()) {
4263 if (V.isUndef()) {
4264 UndefCount++;
4265 continue;
4266 }
4267
4268 // Start our sequence with a TA splat in the hopes that hardware is able to
4269 // recognize there's no dependency on the prior value of our temporary
4270 // register.
4271 if (!Vec) {
4272 Vec = DAG.getSplatVector(VT, DL, V);
4273 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4274 UndefCount = 0;
4275 continue;
4276 }
4277
4278 if (UndefCount) {
4279 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4280 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4281 Vec, Offset, Mask, VL, Policy);
4282 UndefCount = 0;
4283 }
4284 auto OpCode =
4286 if (!VT.isFloatingPoint())
4287 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4288 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4289 V, Mask, VL);
4290 }
4291 if (UndefCount) {
4292 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4293 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4294 Vec, Offset, Mask, VL, Policy);
4295 }
4296 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4297}
4298
4299static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4301 SelectionDAG &DAG) {
4302 if (!Passthru)
4303 Passthru = DAG.getUNDEF(VT);
4304 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4305 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4306 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4307 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4308 // node in order to try and match RVV vector/scalar instructions.
4309 if ((LoC >> 31) == HiC)
4310 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4311
4312 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4313 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4314 // vlmax vsetvli or vsetivli to change the VL.
4315 // FIXME: Support larger constants?
4316 // FIXME: Support non-constant VLs by saturating?
4317 if (LoC == HiC) {
4318 SDValue NewVL;
4319 if (isAllOnesConstant(VL) ||
4320 (isa<RegisterSDNode>(VL) &&
4321 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4322 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4323 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4324 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4325
4326 if (NewVL) {
4327 MVT InterVT =
4328 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4329 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4330 DAG.getUNDEF(InterVT), Lo, NewVL);
4331 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4332 }
4333 }
4334 }
4335
4336 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4337 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4338 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4339 Hi.getConstantOperandVal(1) == 31)
4340 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4341
4342 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4343 // even if it might be sign extended.
4344 if (Hi.isUndef())
4345 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4346
4347 // Fall back to a stack store and stride x0 vector load.
4348 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4349 Hi, VL);
4350}
4351
4352// Called by type legalization to handle splat of i64 on RV32.
4353// FIXME: We can optimize this when the type has sign or zero bits in one
4354// of the halves.
4355static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4356 SDValue Scalar, SDValue VL,
4357 SelectionDAG &DAG) {
4358 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4359 SDValue Lo, Hi;
4360 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4361 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4362}
4363
4364// This function lowers a splat of a scalar operand Splat with the vector
4365// length VL. It ensures the final sequence is type legal, which is useful when
4366// lowering a splat after type legalization.
4367static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4368 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4369 const RISCVSubtarget &Subtarget) {
4370 bool HasPassthru = Passthru && !Passthru.isUndef();
4371 if (!HasPassthru && !Passthru)
4372 Passthru = DAG.getUNDEF(VT);
4373
4374 MVT EltVT = VT.getVectorElementType();
4375 MVT XLenVT = Subtarget.getXLenVT();
4376
4377 if (VT.isFloatingPoint()) {
4378 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4379 EltVT == MVT::bf16) {
4380 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4381 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4382 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4383 else
4384 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4385 MVT IVT = VT.changeVectorElementType(MVT::i16);
4386 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4387 SDValue Splat =
4388 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4389 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4390 }
4391 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4392 }
4393
4394 // Simplest case is that the operand needs to be promoted to XLenVT.
4395 if (Scalar.getValueType().bitsLE(XLenVT)) {
4396 // If the operand is a constant, sign extend to increase our chances
4397 // of being able to use a .vi instruction. ANY_EXTEND would become a
4398 // a zero extend and the simm5 check in isel would fail.
4399 // FIXME: Should we ignore the upper bits in isel instead?
4400 unsigned ExtOpc =
4401 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4402 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4403 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4404 }
4405
4406 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4407 "Unexpected scalar for splat lowering!");
4408
4409 if (isOneConstant(VL) && isNullConstant(Scalar))
4410 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4411 DAG.getConstant(0, DL, XLenVT), VL);
4412
4413 // Otherwise use the more complicated splatting algorithm.
4414 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4415}
4416
4417// This function lowers an insert of a scalar operand Scalar into lane
4418// 0 of the vector regardless of the value of VL. The contents of the
4419// remaining lanes of the result vector are unspecified. VL is assumed
4420// to be non-zero.
4422 const SDLoc &DL, SelectionDAG &DAG,
4423 const RISCVSubtarget &Subtarget) {
4424 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4425
4426 const MVT XLenVT = Subtarget.getXLenVT();
4427 SDValue Passthru = DAG.getUNDEF(VT);
4428
4429 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4430 isNullConstant(Scalar.getOperand(1))) {
4431 SDValue ExtractedVal = Scalar.getOperand(0);
4432 // The element types must be the same.
4433 if (ExtractedVal.getValueType().getVectorElementType() ==
4434 VT.getVectorElementType()) {
4435 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4436 MVT ExtractedContainerVT = ExtractedVT;
4437 if (ExtractedContainerVT.isFixedLengthVector()) {
4438 ExtractedContainerVT = getContainerForFixedLengthVector(
4439 DAG, ExtractedContainerVT, Subtarget);
4440 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4441 ExtractedVal, DAG, Subtarget);
4442 }
4443 if (ExtractedContainerVT.bitsLE(VT))
4444 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4445 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4446 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4447 DAG.getVectorIdxConstant(0, DL));
4448 }
4449 }
4450
4451
4452 if (VT.isFloatingPoint())
4453 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4454 DAG.getUNDEF(VT), Scalar, VL);
4455
4456 // Avoid the tricky legalization cases by falling back to using the
4457 // splat code which already handles it gracefully.
4458 if (!Scalar.getValueType().bitsLE(XLenVT))
4459 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4460 DAG.getConstant(1, DL, XLenVT),
4461 VT, DL, DAG, Subtarget);
4462
4463 // If the operand is a constant, sign extend to increase our chances
4464 // of being able to use a .vi instruction. ANY_EXTEND would become a
4465 // a zero extend and the simm5 check in isel would fail.
4466 // FIXME: Should we ignore the upper bits in isel instead?
4467 unsigned ExtOpc =
4468 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4469 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4470 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4471 VL);
4472}
4473
4474// Can this shuffle be performed on exactly one (possibly larger) input?
4475static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4476 SDValue V2) {
4477
4478 if (V2.isUndef() &&
4480 return V1;
4481
4482 // Both input must be extracts.
4483 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4484 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4485 return SDValue();
4486
4487 // Extracting from the same source.
4488 SDValue Src = V1.getOperand(0);
4489 if (Src != V2.getOperand(0))
4490 return SDValue();
4491
4492 // Src needs to have twice the number of elements.
4493 unsigned NumElts = VT.getVectorNumElements();
4494 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4495 return SDValue();
4496
4497 // The extracts must extract the two halves of the source.
4498 if (V1.getConstantOperandVal(1) != 0 ||
4499 V2.getConstantOperandVal(1) != NumElts)
4500 return SDValue();
4501
4502 return Src;
4503}
4504
4505/// Is this shuffle interleaving contiguous elements from one vector into the
4506/// even elements and contiguous elements from another vector into the odd
4507/// elements. \p EvenSrc will contain the element that should be in the first
4508/// even element. \p OddSrc will contain the element that should be in the first
4509/// odd element. These can be the first element in a source or the element half
4510/// way through the source.
4511static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4512 int &OddSrc, const RISCVSubtarget &Subtarget) {
4513 // We need to be able to widen elements to the next larger integer type.
4514 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4515 return false;
4516
4517 int Size = Mask.size();
4518 int NumElts = VT.getVectorNumElements();
4519 assert(Size == (int)NumElts && "Unexpected mask size");
4520
4521 SmallVector<unsigned, 2> StartIndexes;
4522 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4523 return false;
4524
4525 EvenSrc = StartIndexes[0];
4526 OddSrc = StartIndexes[1];
4527
4528 // One source should be low half of first vector.
4529 if (EvenSrc != 0 && OddSrc != 0)
4530 return false;
4531
4532 // Subvectors will be subtracted from either at the start of the two input
4533 // vectors, or at the start and middle of the first vector if it's an unary
4534 // interleave.
4535 // In both cases, HalfNumElts will be extracted.
4536 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4537 // we'll create an illegal extract_subvector.
4538 // FIXME: We could support other values using a slidedown first.
4539 int HalfNumElts = NumElts / 2;
4540 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4541}
4542
4543/// Match shuffles that concatenate two vectors, rotate the concatenation,
4544/// and then extract the original number of elements from the rotated result.
4545/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4546/// returned rotation amount is for a rotate right, where elements move from
4547/// higher elements to lower elements. \p LoSrc indicates the first source
4548/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4549/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4550/// 0 or 1 if a rotation is found.
4551///
4552/// NOTE: We talk about rotate to the right which matches how bit shift and
4553/// rotate instructions are described where LSBs are on the right, but LLVM IR
4554/// and the table below write vectors with the lowest elements on the left.
4555static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4556 int Size = Mask.size();
4557
4558 // We need to detect various ways of spelling a rotation:
4559 // [11, 12, 13, 14, 15, 0, 1, 2]
4560 // [-1, 12, 13, 14, -1, -1, 1, -1]
4561 // [-1, -1, -1, -1, -1, -1, 1, 2]
4562 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4563 // [-1, 4, 5, 6, -1, -1, 9, -1]
4564 // [-1, 4, 5, 6, -1, -1, -1, -1]
4565 int Rotation = 0;
4566 LoSrc = -1;
4567 HiSrc = -1;
4568 for (int i = 0; i != Size; ++i) {
4569 int M = Mask[i];
4570 if (M < 0)
4571 continue;
4572
4573 // Determine where a rotate vector would have started.
4574 int StartIdx = i - (M % Size);
4575 // The identity rotation isn't interesting, stop.
4576 if (StartIdx == 0)
4577 return -1;
4578
4579 // If we found the tail of a vector the rotation must be the missing
4580 // front. If we found the head of a vector, it must be how much of the
4581 // head.
4582 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4583
4584 if (Rotation == 0)
4585 Rotation = CandidateRotation;
4586 else if (Rotation != CandidateRotation)
4587 // The rotations don't match, so we can't match this mask.
4588 return -1;
4589
4590 // Compute which value this mask is pointing at.
4591 int MaskSrc = M < Size ? 0 : 1;
4592
4593 // Compute which of the two target values this index should be assigned to.
4594 // This reflects whether the high elements are remaining or the low elemnts
4595 // are remaining.
4596 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4597
4598 // Either set up this value if we've not encountered it before, or check
4599 // that it remains consistent.
4600 if (TargetSrc < 0)
4601 TargetSrc = MaskSrc;
4602 else if (TargetSrc != MaskSrc)
4603 // This may be a rotation, but it pulls from the inputs in some
4604 // unsupported interleaving.
4605 return -1;
4606 }
4607
4608 // Check that we successfully analyzed the mask, and normalize the results.
4609 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4610 assert((LoSrc >= 0 || HiSrc >= 0) &&
4611 "Failed to find a rotated input vector!");
4612
4613 return Rotation;
4614}
4615
4616// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4617// 2, 4, 8 and the integer type Factor-times larger than VT's
4618// element type must be a legal element type.
4619// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4620// -> [p, q, r, s] (Factor=2, Index=1)
4622 SDValue Src, unsigned Factor,
4623 unsigned Index, SelectionDAG &DAG) {
4624 unsigned EltBits = VT.getScalarSizeInBits();
4625 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4626 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4627 SrcEC.divideCoefficientBy(Factor));
4628 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4629 SrcEC.divideCoefficientBy(Factor));
4630 Src = DAG.getBitcast(WideSrcVT, Src);
4631
4632 unsigned Shift = Index * EltBits;
4633 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4634 DAG.getConstant(Shift, DL, WideSrcVT));
4635 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4637 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4638 DAG.getVectorIdxConstant(0, DL));
4639 return DAG.getBitcast(VT, Res);
4640}
4641
4642// Lower the following shuffle to vslidedown.
4643// a)
4644// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4645// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4646// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4647// b)
4648// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4649// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4650// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4651// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4652// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4653// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4655 SDValue V1, SDValue V2,
4656 ArrayRef<int> Mask,
4657 const RISCVSubtarget &Subtarget,
4658 SelectionDAG &DAG) {
4659 auto findNonEXTRACT_SUBVECTORParent =
4660 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4661 uint64_t Offset = 0;
4662 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4663 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4664 // a scalable vector. But we don't want to match the case.
4665 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4666 Offset += Parent.getConstantOperandVal(1);
4667 Parent = Parent.getOperand(0);
4668 }
4669 return std::make_pair(Parent, Offset);
4670 };
4671
4672 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4673 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4674
4675 // Extracting from the same source.
4676 SDValue Src = V1Src;
4677 if (Src != V2Src)
4678 return SDValue();
4679
4680 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4681 SmallVector<int, 16> NewMask(Mask);
4682 for (size_t i = 0; i != NewMask.size(); ++i) {
4683 if (NewMask[i] == -1)
4684 continue;
4685
4686 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4687 NewMask[i] = NewMask[i] + V1IndexOffset;
4688 } else {
4689 // Minus NewMask.size() is needed. Otherwise, the b case would be
4690 // <5,6,7,12> instead of <5,6,7,8>.
4691 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4692 }
4693 }
4694
4695 // First index must be known and non-zero. It will be used as the slidedown
4696 // amount.
4697 if (NewMask[0] <= 0)
4698 return SDValue();
4699
4700 // NewMask is also continuous.
4701 for (unsigned i = 1; i != NewMask.size(); ++i)
4702 if (NewMask[i - 1] + 1 != NewMask[i])
4703 return SDValue();
4704
4705 MVT XLenVT = Subtarget.getXLenVT();
4706 MVT SrcVT = Src.getSimpleValueType();
4707 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4708 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4709 SDValue Slidedown =
4710 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4711 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4712 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4713 return DAG.getNode(
4715 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4716 DAG.getConstant(0, DL, XLenVT));
4717}
4718
4719// Because vslideup leaves the destination elements at the start intact, we can
4720// use it to perform shuffles that insert subvectors:
4721//
4722// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4723// ->
4724// vsetvli zero, 8, e8, mf2, ta, ma
4725// vslideup.vi v8, v9, 4
4726//
4727// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4728// ->
4729// vsetvli zero, 5, e8, mf2, tu, ma
4730// vslideup.v1 v8, v9, 2
4732 SDValue V1, SDValue V2,
4733 ArrayRef<int> Mask,
4734 const RISCVSubtarget &Subtarget,
4735 SelectionDAG &DAG) {
4736 unsigned NumElts = VT.getVectorNumElements();
4737 int NumSubElts, Index;
4738 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4739 Index))
4740 return SDValue();
4741
4742 bool OpsSwapped = Mask[Index] < (int)NumElts;
4743 SDValue InPlace = OpsSwapped ? V2 : V1;
4744 SDValue ToInsert = OpsSwapped ? V1 : V2;
4745
4746 MVT XLenVT = Subtarget.getXLenVT();
4747 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4748 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4749 // We slide up by the index that the subvector is being inserted at, and set
4750 // VL to the index + the number of elements being inserted.
4752 // If the we're adding a suffix to the in place vector, i.e. inserting right
4753 // up to the very end of it, then we don't actually care about the tail.
4754 if (NumSubElts + Index >= (int)NumElts)
4755 Policy |= RISCVII::TAIL_AGNOSTIC;
4756
4757 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4758 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4759 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4760
4761 SDValue Res;
4762 // If we're inserting into the lowest elements, use a tail undisturbed
4763 // vmv.v.v.
4764 if (Index == 0)
4765 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4766 VL);
4767 else
4768 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4769 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4770 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4771}
4772
4773/// Match v(f)slide1up/down idioms. These operations involve sliding
4774/// N-1 elements to make room for an inserted scalar at one end.
4776 SDValue V1, SDValue V2,
4777 ArrayRef<int> Mask,
4778 const RISCVSubtarget &Subtarget,
4779 SelectionDAG &DAG) {
4780 bool OpsSwapped = false;
4781 if (!isa<BuildVectorSDNode>(V1)) {
4782 if (!isa<BuildVectorSDNode>(V2))
4783 return SDValue();
4784 std::swap(V1, V2);
4785 OpsSwapped = true;
4786 }
4787 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4788 if (!Splat)
4789 return SDValue();
4790
4791 // Return true if the mask could describe a slide of Mask.size() - 1
4792 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4793 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4794 const unsigned S = (Offset > 0) ? 0 : -Offset;
4795 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4796 for (unsigned i = S; i != E; ++i)
4797 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4798 return false;
4799 return true;
4800 };
4801
4802 const unsigned NumElts = VT.getVectorNumElements();
4803 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4804 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4805 return SDValue();
4806
4807 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4808 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4809 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4810 return SDValue();
4811
4812 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4813 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4814
4815 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4816 // vslide1{down,up}.vx instead.
4817 if (VT.getVectorElementType() == MVT::bf16 ||
4818 (VT.getVectorElementType() == MVT::f16 &&
4819 !Subtarget.hasVInstructionsF16())) {
4820 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4821 Splat =
4822 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4823 V2 = DAG.getBitcast(
4824 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4825 SDValue Vec = DAG.getNode(
4827 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4828 Vec = DAG.getBitcast(ContainerVT, Vec);
4829 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4830 }
4831
4832 auto OpCode = IsVSlidedown ?
4835 if (!VT.isFloatingPoint())
4836 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4837 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4838 DAG.getUNDEF(ContainerVT),
4839 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4840 Splat, TrueMask, VL);
4841 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4842}
4843
4844// Match a mask which "spreads" the leading elements of a vector evenly
4845// across the result. Factor is the spread amount, and Index is the
4846// offset applied. (on success, Index < Factor) This is the inverse
4847// of a deinterleave with the same Factor and Index. This is analogous
4848// to an interleave, except that all but one lane is undef.
4849static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4850 SmallVector<bool> LaneIsUndef(Factor, true);
4851 for (unsigned i = 0; i < Mask.size(); i++)
4852 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4853
4854 bool Found = false;
4855 for (unsigned i = 0; i < Factor; i++) {
4856 if (LaneIsUndef[i])
4857 continue;
4858 if (Found)
4859 return false;
4860 Index = i;
4861 Found = true;
4862 }
4863 if (!Found)
4864 return false;
4865
4866 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4867 unsigned j = i * Factor + Index;
4868 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4869 return false;
4870 }
4871 return true;
4872}
4873
4874// Given a vector a, b, c, d return a vector Factor times longer
4875// with Factor-1 undef's between elements. Ex:
4876// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4877// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4878static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4879 const SDLoc &DL, SelectionDAG &DAG) {
4880
4881 MVT VT = V.getSimpleValueType();
4882 unsigned EltBits = VT.getScalarSizeInBits();
4884 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4885
4886 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4887
4888 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4889 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4890 // allow the SHL to fold away if Index is 0.
4891 if (Index != 0)
4892 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4893 DAG.getConstant(EltBits * Index, DL, WideVT));
4894 // Make sure to use original element type
4896 EC.multiplyCoefficientBy(Factor));
4897 return DAG.getBitcast(ResultVT, Result);
4898}
4899
4900// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4901// to create an interleaved vector of <[vscale x] n*2 x ty>.
4902// This requires that the size of ty is less than the subtarget's maximum ELEN.
4904 const SDLoc &DL, SelectionDAG &DAG,
4905 const RISCVSubtarget &Subtarget) {
4906
4907 // FIXME: Not only does this optimize the code, it fixes some correctness
4908 // issues because MIR does not have freeze.
4909 if (EvenV.isUndef())
4910 return getWideningSpread(OddV, 2, 1, DL, DAG);
4911 if (OddV.isUndef())
4912 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4913
4914 MVT VecVT = EvenV.getSimpleValueType();
4915 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4916 // Convert fixed vectors to scalable if needed
4917 if (VecContainerVT.isFixedLengthVector()) {
4918 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4919 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4920 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4921 }
4922
4923 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4924
4925 // We're working with a vector of the same size as the resulting
4926 // interleaved vector, but with half the number of elements and
4927 // twice the SEW (Hence the restriction on not using the maximum
4928 // ELEN)
4929 MVT WideVT =
4931 VecVT.getVectorElementCount());
4932 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4933 if (WideContainerVT.isFixedLengthVector())
4934 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4935
4936 // Bitcast the input vectors to integers in case they are FP
4937 VecContainerVT = VecContainerVT.changeTypeToInteger();
4938 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4939 OddV = DAG.getBitcast(VecContainerVT, OddV);
4940
4941 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4942 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4943
4944 SDValue Interleaved;
4945 if (Subtarget.hasStdExtZvbb()) {
4946 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4947 SDValue OffsetVec =
4948 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4949 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4950 OffsetVec, Passthru, Mask, VL);
4951 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4952 Interleaved, EvenV, Passthru, Mask, VL);
4953 } else {
4954 // FIXME: We should freeze the odd vector here. We already handled the case
4955 // of provably undef/poison above.
4956
4957 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4958 // vwaddu.vv
4959 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4960 OddV, Passthru, Mask, VL);
4961
4962 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4963 SDValue AllOnesVec = DAG.getSplatVector(
4964 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4965 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4966 OddV, AllOnesVec, Passthru, Mask, VL);
4967
4968 // Add the two together so we get
4969 // (OddV * 0xff...ff) + (OddV + EvenV)
4970 // = (OddV * 0x100...00) + EvenV
4971 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4972 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4973 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4974 Interleaved, OddsMul, Passthru, Mask, VL);
4975 }
4976
4977 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4978 MVT ResultContainerVT = MVT::getVectorVT(
4979 VecVT.getVectorElementType(), // Make sure to use original type
4980 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4981 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4982
4983 // Convert back to a fixed vector if needed
4984 MVT ResultVT =
4987 if (ResultVT.isFixedLengthVector())
4988 Interleaved =
4989 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4990
4991 return Interleaved;
4992}
4993
4994// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4995// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4997 SelectionDAG &DAG,
4998 const RISCVSubtarget &Subtarget) {
4999 SDLoc DL(SVN);
5000 MVT VT = SVN->getSimpleValueType(0);
5001 SDValue V = SVN->getOperand(0);
5002 unsigned NumElts = VT.getVectorNumElements();
5003
5004 assert(VT.getVectorElementType() == MVT::i1);
5005
5007 SVN->getMask().size()) ||
5008 !SVN->getOperand(1).isUndef())
5009 return SDValue();
5010
5011 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5012 EVT ViaVT = EVT::getVectorVT(
5013 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5014 EVT ViaBitVT =
5015 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5016
5017 // If we don't have zvbb or the larger element type > ELEN, the operation will
5018 // be illegal.
5020 ViaVT) ||
5021 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5022 return SDValue();
5023
5024 // If the bit vector doesn't fit exactly into the larger element type, we need
5025 // to insert it into the larger vector and then shift up the reversed bits
5026 // afterwards to get rid of the gap introduced.
5027 if (ViaEltSize > NumElts)
5028 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5029 V, DAG.getVectorIdxConstant(0, DL));
5030
5031 SDValue Res =
5032 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5033
5034 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5035 // element type.
5036 if (ViaEltSize > NumElts)
5037 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5038 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5039
5040 Res = DAG.getBitcast(ViaBitVT, Res);
5041
5042 if (ViaEltSize > NumElts)
5043 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5044 DAG.getVectorIdxConstant(0, DL));
5045 return Res;
5046}
5047
5049 SelectionDAG &DAG,
5050 const RISCVSubtarget &Subtarget,
5051 MVT &RotateVT, unsigned &RotateAmt) {
5052 SDLoc DL(SVN);
5053
5054 EVT VT = SVN->getValueType(0);
5055 unsigned NumElts = VT.getVectorNumElements();
5056 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5057 unsigned NumSubElts;
5058 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5059 NumElts, NumSubElts, RotateAmt))
5060 return false;
5061 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5062 NumElts / NumSubElts);
5063
5064 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5065 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5066}
5067
5068// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5069// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5070// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5072 SelectionDAG &DAG,
5073 const RISCVSubtarget &Subtarget) {
5074 SDLoc DL(SVN);
5075
5076 EVT VT = SVN->getValueType(0);
5077 unsigned RotateAmt;
5078 MVT RotateVT;
5079 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5080 return SDValue();
5081
5082 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5083
5084 SDValue Rotate;
5085 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5086 // so canonicalize to vrev8.
5087 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5088 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5089 else
5090 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5091 DAG.getConstant(RotateAmt, DL, RotateVT));
5092
5093 return DAG.getBitcast(VT, Rotate);
5094}
5095
5096// If compiling with an exactly known VLEN, see if we can split a
5097// shuffle on m2 or larger into a small number of m1 sized shuffles
5098// which write each destination registers exactly once.
5100 SelectionDAG &DAG,
5101 const RISCVSubtarget &Subtarget) {
5102 SDLoc DL(SVN);
5103 MVT VT = SVN->getSimpleValueType(0);
5104 SDValue V1 = SVN->getOperand(0);
5105 SDValue V2 = SVN->getOperand(1);
5106 ArrayRef<int> Mask = SVN->getMask();
5107
5108 // If we don't know exact data layout, not much we can do. If this
5109 // is already m1 or smaller, no point in splitting further.
5110 const auto VLen = Subtarget.getRealVLen();
5111 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5112 return SDValue();
5113
5114 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5115 // expansion for.
5116 unsigned RotateAmt;
5117 MVT RotateVT;
5118 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5119 return SDValue();
5120
5121 MVT ElemVT = VT.getVectorElementType();
5122 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5123
5124 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5125 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5126 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5127 assert(M1VT == getLMUL1VT(M1VT));
5128 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5129 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5130 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5131 unsigned NumOfDestRegs = NumElts / NumOpElts;
5132 // The following semantically builds up a fixed length concat_vector
5133 // of the component shuffle_vectors. We eagerly lower to scalable here
5134 // to avoid DAG combining it back to a large shuffle_vector again.
5135 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5136 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5138 Operands;
5140 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5141 [&]() { Operands.emplace_back(); },
5142 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5143 Operands.emplace_back().emplace_back(
5144 SrcVecIdx, UINT_MAX,
5145 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5146 },
5147 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5148 if (NewReg)
5149 Operands.emplace_back();
5150 Operands.back().emplace_back(
5151 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5152 });
5153 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5154 // Note: check that we do not emit too many shuffles here to prevent code
5155 // size explosion.
5156 // TODO: investigate, if it can be improved by extra analysis of the masks to
5157 // check if the code is more profitable.
5158 unsigned NumShuffles = std::accumulate(
5159 Operands.begin(), Operands.end(), 0u,
5160 [&](unsigned N,
5161 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5162 if (Data.empty())
5163 return N;
5164 N += Data.size();
5165 for (const auto &P : Data) {
5166 unsigned Idx2 = std::get<1>(P);
5167 ArrayRef<int> Mask = std::get<2>(P);
5168 if (Idx2 != UINT_MAX)
5169 ++N;
5170 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5171 --N;
5172 }
5173 return N;
5174 });
5175 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5176 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5177 return SDValue();
5178 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5179 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5180 DAG.getVectorIdxConstant(ExtractIdx, DL));
5181 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5182 return SubVec;
5183 };
5184 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5186 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5187 return SubVec;
5188 };
5189 SDValue Vec = DAG.getUNDEF(ContainerVT);
5190 for (auto [I, Data] : enumerate(Operands)) {
5191 if (Data.empty())
5192 continue;
5194 for (unsigned I : seq<unsigned>(Data.size())) {
5195 const auto &[Idx1, Idx2, _] = Data[I];
5196 if (Values.contains(Idx1)) {
5197 assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5198 "Expected both indices to be extracted already.");
5199 break;
5200 }
5201 SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5202 (Idx1 % NumOfSrcRegs) * NumOpElts);
5203 Values[Idx1] = V;
5204 if (Idx2 != UINT_MAX)
5205 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5206 (Idx2 % NumOfSrcRegs) * NumOpElts);
5207 }
5208 SDValue V;
5209 for (const auto &[Idx1, Idx2, Mask] : Data) {
5210 SDValue V1 = Values.at(Idx1);
5211 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5212 V = PerformShuffle(V1, V2, Mask);
5213 Values[Idx1] = V;
5214 }
5215
5216 unsigned InsertIdx = I * NumOpElts;
5217 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5218 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5219 DAG.getVectorIdxConstant(InsertIdx, DL));
5220 }
5221 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5222}
5223
5224// Matches a subset of compress masks with a contiguous prefix of output
5225// elements. This could be extended to allow gaps by deciding which
5226// source elements to spuriously demand.
5228 int Last = -1;
5229 bool SawUndef = false;
5230 for (unsigned i = 0; i < Mask.size(); i++) {
5231 if (Mask[i] == -1) {
5232 SawUndef = true;
5233 continue;
5234 }
5235 if (SawUndef)
5236 return false;
5237 if (i > (unsigned)Mask[i])
5238 return false;
5239 if (Mask[i] <= Last)
5240 return false;
5241 Last = Mask[i];
5242 }
5243 return true;
5244}
5245
5246/// Given a shuffle where the indices are disjoint between the two sources,
5247/// e.g.:
5248///
5249/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5250///
5251/// Merge the two sources into one and do a single source shuffle:
5252///
5253/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5254/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5255///
5256/// A vselect will either be merged into a masked instruction or be lowered as a
5257/// vmerge.vvm, which is cheaper than a vrgather.vv.
5259 SelectionDAG &DAG,
5260 const RISCVSubtarget &Subtarget) {
5261 MVT VT = SVN->getSimpleValueType(0);
5262 MVT XLenVT = Subtarget.getXLenVT();
5263 SDLoc DL(SVN);
5264
5265 const ArrayRef<int> Mask = SVN->getMask();
5266
5267 // Work out which source each lane will come from.
5268 SmallVector<int, 16> Srcs(Mask.size(), -1);
5269
5270 for (int Idx : Mask) {
5271 if (Idx == -1)
5272 continue;
5273 unsigned SrcIdx = Idx % Mask.size();
5274 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5275 if (Srcs[SrcIdx] == -1)
5276 // Mark this source as using this lane.
5277 Srcs[SrcIdx] = Src;
5278 else if (Srcs[SrcIdx] != Src)
5279 // The other source is using this lane: not disjoint.
5280 return SDValue();
5281 }
5282
5283 SmallVector<SDValue> SelectMaskVals;
5284 for (int Lane : Srcs) {
5285 if (Lane == -1)
5286 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5287 else
5288 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5289 }
5290 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5291 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5292 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5293 SVN->getOperand(0), SVN->getOperand(1));
5294
5295 // Move all indices relative to the first source.
5296 SmallVector<int> NewMask(Mask.size());
5297 for (unsigned I = 0; I < Mask.size(); I++) {
5298 if (Mask[I] == -1)
5299 NewMask[I] = -1;
5300 else
5301 NewMask[I] = Mask[I] % Mask.size();
5302 }
5303
5304 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5305}
5306
5307/// Try to widen element type to get a new mask value for a better permutation
5308/// sequence. This doesn't try to inspect the widened mask for profitability;
5309/// we speculate the widened form is equal or better. This has the effect of
5310/// reducing mask constant sizes - allowing cheaper materialization sequences
5311/// - and index sequence sizes - reducing register pressure and materialization
5312/// cost, at the cost of (possibly) an extra VTYPE toggle.
5314 SDLoc DL(Op);
5315 MVT VT = Op.getSimpleValueType();
5316 MVT ScalarVT = VT.getVectorElementType();
5317 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5318 SDValue V0 = Op.getOperand(0);
5319 SDValue V1 = Op.getOperand(1);
5320 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5321
5322 // Avoid wasted work leading to isTypeLegal check failing below
5323 if (ElementSize > 32)
5324 return SDValue();
5325
5326 SmallVector<int, 8> NewMask;
5327 if (!widenShuffleMaskElts(Mask, NewMask))
5328 return SDValue();
5329
5330 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5331 : MVT::getIntegerVT(ElementSize * 2);
5332 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5333 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5334 return SDValue();
5335 V0 = DAG.getBitcast(NewVT, V0);
5336 V1 = DAG.getBitcast(NewVT, V1);
5337 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5338}
5339
5341 const RISCVSubtarget &Subtarget) {
5342 SDValue V1 = Op.getOperand(0);
5343 SDValue V2 = Op.getOperand(1);
5344 SDLoc DL(Op);
5345 MVT XLenVT = Subtarget.getXLenVT();
5346 MVT VT = Op.getSimpleValueType();
5347 unsigned NumElts = VT.getVectorNumElements();
5348 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5349
5350 if (VT.getVectorElementType() == MVT::i1) {
5351 // Lower to a vror.vi of a larger element type if possible before we promote
5352 // i1s to i8s.
5353 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5354 return V;
5355 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5356 return V;
5357
5358 // Promote i1 shuffle to i8 shuffle.
5359 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5360 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5361 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5362 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5363 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5364 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5365 ISD::SETNE);
5366 }
5367
5368 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5369
5370 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5371
5372 if (SVN->isSplat()) {
5373 const int Lane = SVN->getSplatIndex();
5374 if (Lane >= 0) {
5375 MVT SVT = VT.getVectorElementType();
5376
5377 // Turn splatted vector load into a strided load with an X0 stride.
5378 SDValue V = V1;
5379 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5380 // with undef.
5381 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5382 int Offset = Lane;
5383 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5384 int OpElements =
5385 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5386 V = V.getOperand(Offset / OpElements);
5387 Offset %= OpElements;
5388 }
5389
5390 // We need to ensure the load isn't atomic or volatile.
5391 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5392 auto *Ld = cast<LoadSDNode>(V);
5393 Offset *= SVT.getStoreSize();
5394 SDValue NewAddr = DAG.getMemBasePlusOffset(
5395 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5396
5397 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5398 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5399 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5400 SDValue IntID =
5401 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5402 SDValue Ops[] = {Ld->getChain(),
5403 IntID,
5404 DAG.getUNDEF(ContainerVT),
5405 NewAddr,
5406 DAG.getRegister(RISCV::X0, XLenVT),
5407 VL};
5408 SDValue NewLoad = DAG.getMemIntrinsicNode(
5409 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5411 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5412 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5413 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5414 }
5415
5416 MVT SplatVT = ContainerVT;
5417
5418 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5419 if (SVT == MVT::bf16 ||
5420 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5421 SVT = MVT::i16;
5422 SplatVT = ContainerVT.changeVectorElementType(SVT);
5423 }
5424
5425 // Otherwise use a scalar load and splat. This will give the best
5426 // opportunity to fold a splat into the operation. ISel can turn it into
5427 // the x0 strided load if we aren't able to fold away the select.
5428 if (SVT.isFloatingPoint())
5429 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5430 Ld->getPointerInfo().getWithOffset(Offset),
5431 Ld->getOriginalAlign(),
5432 Ld->getMemOperand()->getFlags());
5433 else
5434 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5435 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5436 Ld->getOriginalAlign(),
5437 Ld->getMemOperand()->getFlags());
5439
5440 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5442 SDValue Splat =
5443 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5444 Splat = DAG.getBitcast(ContainerVT, Splat);
5445 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5446 }
5447
5448 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5449 assert(Lane < (int)NumElts && "Unexpected lane!");
5450 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5451 V1, DAG.getConstant(Lane, DL, XLenVT),
5452 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5453 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5454 }
5455 }
5456
5457 // For exact VLEN m2 or greater, try to split to m1 operations if we
5458 // can split cleanly.
5459 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5460 return V;
5461
5462 ArrayRef<int> Mask = SVN->getMask();
5463
5464 if (SDValue V =
5465 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5466 return V;
5467
5468 if (SDValue V =
5469 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5470 return V;
5471
5472 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5473 // available.
5474 if (Subtarget.hasStdExtZvkb())
5475 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5476 return V;
5477
5478 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5479 // be undef which can be handled with a single SLIDEDOWN/UP.
5480 int LoSrc, HiSrc;
5481 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5482 if (Rotation > 0) {
5483 SDValue LoV, HiV;
5484 if (LoSrc >= 0) {
5485 LoV = LoSrc == 0 ? V1 : V2;
5486 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5487 }
5488 if (HiSrc >= 0) {
5489 HiV = HiSrc == 0 ? V1 : V2;
5490 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5491 }
5492
5493 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5494 // to slide LoV up by (NumElts - Rotation).
5495 unsigned InvRotate = NumElts - Rotation;
5496
5497 SDValue Res = DAG.getUNDEF(ContainerVT);
5498 if (HiV) {
5499 // Even though we could use a smaller VL, don't to avoid a vsetivli
5500 // toggle.
5501 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5502 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5503 }
5504 if (LoV)
5505 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5506 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5508
5509 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5510 }
5511
5512 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5513 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5514
5515 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5516 // use shift and truncate to perform the shuffle.
5517 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5518 // shift-and-trunc reducing total cost for everything except an mf8 result.
5519 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5520 // to do the entire operation.
5521 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5522 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5523 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5524 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5525 unsigned Index = 0;
5526 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5527 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5528 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5529 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5530 }
5531 }
5532 }
5533
5534 if (SDValue V =
5535 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5536 return V;
5537
5538 // Detect an interleave shuffle and lower to
5539 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5540 int EvenSrc, OddSrc;
5541 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5542 // Extract the halves of the vectors.
5543 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5544
5545 // Recognize if one half is actually undef; the matching above will
5546 // otherwise reuse the even stream for the undef one. This improves
5547 // spread(2) shuffles.
5548 bool LaneIsUndef[2] = { true, true};
5549 for (unsigned i = 0; i < Mask.size(); i++)
5550 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5551
5552 int Size = Mask.size();
5553 SDValue EvenV, OddV;
5554 if (LaneIsUndef[0]) {
5555 EvenV = DAG.getUNDEF(HalfVT);
5556 } else {
5557 assert(EvenSrc >= 0 && "Undef source?");
5558 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5559 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5560 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5561 }
5562
5563 if (LaneIsUndef[1]) {
5564 OddV = DAG.getUNDEF(HalfVT);
5565 } else {
5566 assert(OddSrc >= 0 && "Undef source?");
5567 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5568 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5569 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5570 }
5571
5572 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5573 }
5574
5575
5576 // Handle any remaining single source shuffles
5577 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5578 if (V2.isUndef()) {
5579 // We might be able to express the shuffle as a bitrotate. But even if we
5580 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5581 // shifts and a vor will have a higher throughput than a vrgather.
5582 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5583 return V;
5584
5585 // Before hitting generic lowering fallbacks, try to widen the mask
5586 // to a wider SEW.
5587 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5588 return V;
5589
5590 // Can we generate a vcompress instead of a vrgather? These scale better
5591 // at high LMUL, at the cost of not being able to fold a following select
5592 // into them. The mask constants are also smaller than the index vector
5593 // constants, and thus easier to materialize.
5594 if (isCompressMask(Mask)) {
5595 SmallVector<SDValue> MaskVals(NumElts,
5596 DAG.getConstant(false, DL, XLenVT));
5597 for (auto Idx : Mask) {
5598 if (Idx == -1)
5599 break;
5600 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5601 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5602 }
5603 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5604 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5605 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5606 DAG.getUNDEF(VT));
5607 }
5608
5609 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5610 // is fully covered in interleave(2) above, so it is ignored here.
5611 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5612 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5613 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5614 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5615 unsigned Index;
5616 if (isSpreadMask(Mask, Factor, Index)) {
5617 MVT NarrowVT =
5618 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5619 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5620 DAG.getVectorIdxConstant(0, DL));
5621 return getWideningSpread(Src, Factor, Index, DL, DAG);
5622 }
5623 }
5624 }
5625
5626 if (VT.getScalarSizeInBits() == 8 &&
5627 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5628 // On such a vector we're unable to use i8 as the index type.
5629 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5630 // may involve vector splitting if we're already at LMUL=8, or our
5631 // user-supplied maximum fixed-length LMUL.
5632 return SDValue();
5633 }
5634
5635 // Base case for the two operand recursion below - handle the worst case
5636 // single source shuffle.
5637 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5638 MVT IndexVT = VT.changeTypeToInteger();
5639 // Since we can't introduce illegal index types at this stage, use i16 and
5640 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5641 // than XLenVT.
5642 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5643 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5644 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5645 }
5646
5647 // If the mask allows, we can do all the index computation in 16 bits. This
5648 // requires less work and less register pressure at high LMUL, and creates
5649 // smaller constants which may be cheaper to materialize.
5650 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5651 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5652 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5653 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5654 }
5655
5656 MVT IndexContainerVT =
5657 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5658
5659 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5660 SmallVector<SDValue> GatherIndicesLHS;
5661 for (int MaskIndex : Mask) {
5662 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5663 GatherIndicesLHS.push_back(IsLHSIndex
5664 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5665 : DAG.getUNDEF(XLenVT));
5666 }
5667 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5668 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5669 Subtarget);
5670 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5671 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5672 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5673 }
5674
5675 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5676 // merged with a second vrgather.
5677 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5678
5679 // Now construct the mask that will be used by the blended vrgather operation.
5680 // Construct the appropriate indices into each vector.
5681 for (int MaskIndex : Mask) {
5682 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5683 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5684 ? MaskIndex : -1);
5685 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5686 }
5687
5688 // If the mask indices are disjoint between the two sources, we can lower it
5689 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5690 // operands may end up being lowered to something cheaper than a vrgather.vv.
5691 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5692 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5693 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5694 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5695 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5696 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5697 return V;
5698
5699 // Before hitting generic lowering fallbacks, try to widen the mask
5700 // to a wider SEW.
5701 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5702 return V;
5703
5704 // Try to pick a profitable operand order.
5705 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5706 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5707
5708 // Recursively invoke lowering for each operand if we had two
5709 // independent single source shuffles, and then combine the result via a
5710 // vselect. Note that the vselect will likely be folded back into the
5711 // second permute (vrgather, or other) by the post-isel combine.
5712 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5713 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5714
5715 SmallVector<SDValue> MaskVals;
5716 for (int MaskIndex : Mask) {
5717 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5718 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5719 }
5720
5721 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5722 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5723 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5724
5725 if (SwapOps)
5726 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5727 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5728}
5729
5731 // Support splats for any type. These should type legalize well.
5732 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5733 return true;
5734
5735 // Only support legal VTs for other shuffles for now.
5736 if (!isTypeLegal(VT))
5737 return false;
5738
5739 MVT SVT = VT.getSimpleVT();
5740
5741 // Not for i1 vectors.
5742 if (SVT.getScalarType() == MVT::i1)
5743 return false;
5744
5745 int Dummy1, Dummy2;
5746 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5747 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5748}
5749
5750// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5751// the exponent.
5752SDValue
5753RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5754 SelectionDAG &DAG) const {
5755 MVT VT = Op.getSimpleValueType();
5756 unsigned EltSize = VT.getScalarSizeInBits();
5757 SDValue Src = Op.getOperand(0);
5758 SDLoc DL(Op);
5759 MVT ContainerVT = VT;
5760
5761 SDValue Mask, VL;
5762 if (Op->isVPOpcode()) {
5763 Mask = Op.getOperand(1);
5764 if (VT.isFixedLengthVector())
5765 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5766 Subtarget);
5767 VL = Op.getOperand(2);
5768 }
5769
5770 // We choose FP type that can represent the value if possible. Otherwise, we
5771 // use rounding to zero conversion for correct exponent of the result.
5772 // TODO: Use f16 for i8 when possible?
5773 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5774 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5775 FloatEltVT = MVT::f32;
5776 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5777
5778 // Legal types should have been checked in the RISCVTargetLowering
5779 // constructor.
5780 // TODO: Splitting may make sense in some cases.
5781 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5782 "Expected legal float type!");
5783
5784 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5785 // The trailing zero count is equal to log2 of this single bit value.
5786 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5787 SDValue Neg = DAG.getNegative(Src, DL, VT);
5788 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5789 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5790 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5791 Src, Mask, VL);
5792 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5793 }
5794
5795 // We have a legal FP type, convert to it.
5796 SDValue FloatVal;
5797 if (FloatVT.bitsGT(VT)) {
5798 if (Op->isVPOpcode())
5799 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5800 else
5801 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5802 } else {
5803 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5804 if (VT.isFixedLengthVector()) {
5805 ContainerVT = getContainerForFixedLengthVector(VT);
5806 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5807 }
5808 if (!Op->isVPOpcode())
5809 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5810 SDValue RTZRM =
5812 MVT ContainerFloatVT =
5813 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5814 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5815 Src, Mask, RTZRM, VL);
5816 if (VT.isFixedLengthVector())
5817 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5818 }
5819 // Bitcast to integer and shift the exponent to the LSB.
5820 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5821 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5822 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5823
5824 SDValue Exp;
5825 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5826 if (Op->isVPOpcode()) {
5827 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5828 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5829 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5830 } else {
5831 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5832 DAG.getConstant(ShiftAmt, DL, IntVT));
5833 if (IntVT.bitsLT(VT))
5834 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5835 else if (IntVT.bitsGT(VT))
5836 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5837 }
5838
5839 // The exponent contains log2 of the value in biased form.
5840 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5841 // For trailing zeros, we just need to subtract the bias.
5842 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5843 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5844 DAG.getConstant(ExponentBias, DL, VT));
5845 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5846 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5847 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5848
5849 // For leading zeros, we need to remove the bias and convert from log2 to
5850 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5851 unsigned Adjust = ExponentBias + (EltSize - 1);
5852 SDValue Res;
5853 if (Op->isVPOpcode())
5854 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5855 Mask, VL);
5856 else
5857 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5858
5859 // The above result with zero input equals to Adjust which is greater than
5860 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5861 if (Op.getOpcode() == ISD::CTLZ)
5862 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5863 else if (Op.getOpcode() == ISD::VP_CTLZ)
5864 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5865 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5866 return Res;
5867}
5868
5869SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5870 SelectionDAG &DAG) const {
5871 SDLoc DL(Op);
5872 MVT XLenVT = Subtarget.getXLenVT();
5873 SDValue Source = Op->getOperand(0);
5874 MVT SrcVT = Source.getSimpleValueType();
5875 SDValue Mask = Op->getOperand(1);
5876 SDValue EVL = Op->getOperand(2);
5877
5878 if (SrcVT.isFixedLengthVector()) {
5879 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5880 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5881 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5882 Subtarget);
5883 SrcVT = ContainerVT;
5884 }
5885
5886 // Convert to boolean vector.
5887 if (SrcVT.getScalarType() != MVT::i1) {
5888 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5889 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5890 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5891 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5892 DAG.getUNDEF(SrcVT), Mask, EVL});
5893 }
5894
5895 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5896 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5897 // In this case, we can interpret poison as -1, so nothing to do further.
5898 return Res;
5899
5900 // Convert -1 to VL.
5901 SDValue SetCC =
5902 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5903 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5904 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5905}
5906
5907// While RVV has alignment restrictions, we should always be able to load as a
5908// legal equivalently-sized byte-typed vector instead. This method is
5909// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5910// the load is already correctly-aligned, it returns SDValue().
5911SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5912 SelectionDAG &DAG) const {
5913 auto *Load = cast<LoadSDNode>(Op);
5914 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5915
5917 Load->getMemoryVT(),
5918 *Load->getMemOperand()))
5919 return SDValue();
5920
5921 SDLoc DL(Op);
5922 MVT VT = Op.getSimpleValueType();
5923 unsigned EltSizeBits = VT.getScalarSizeInBits();
5924 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5925 "Unexpected unaligned RVV load type");
5926 MVT NewVT =
5927 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5928 assert(NewVT.isValid() &&
5929 "Expecting equally-sized RVV vector types to be legal");
5930 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5931 Load->getPointerInfo(), Load->getOriginalAlign(),
5932 Load->getMemOperand()->getFlags());
5933 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5934}
5935
5936// While RVV has alignment restrictions, we should always be able to store as a
5937// legal equivalently-sized byte-typed vector instead. This method is
5938// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5939// returns SDValue() if the store is already correctly aligned.
5940SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5941 SelectionDAG &DAG) const {
5942 auto *Store = cast<StoreSDNode>(Op);
5943 assert(Store && Store->getValue().getValueType().isVector() &&
5944 "Expected vector store");
5945
5947 Store->getMemoryVT(),
5948 *Store->getMemOperand()))
5949 return SDValue();
5950
5951 SDLoc DL(Op);
5952 SDValue StoredVal = Store->getValue();
5953 MVT VT = StoredVal.getSimpleValueType();
5954 unsigned EltSizeBits = VT.getScalarSizeInBits();
5955 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5956 "Unexpected unaligned RVV store type");
5957 MVT NewVT =
5958 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5959 assert(NewVT.isValid() &&
5960 "Expecting equally-sized RVV vector types to be legal");
5961 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5962 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5963 Store->getPointerInfo(), Store->getOriginalAlign(),
5964 Store->getMemOperand()->getFlags());
5965}
5966
5968 const RISCVSubtarget &Subtarget) {
5969 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5970
5971 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5972
5973 // All simm32 constants should be handled by isel.
5974 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5975 // this check redundant, but small immediates are common so this check
5976 // should have better compile time.
5977 if (isInt<32>(Imm))
5978 return Op;
5979
5980 // We only need to cost the immediate, if constant pool lowering is enabled.
5981 if (!Subtarget.useConstantPoolForLargeInts())
5982 return Op;
5983
5985 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5986 return Op;
5987
5988 // Optimizations below are disabled for opt size. If we're optimizing for
5989 // size, use a constant pool.
5990 if (DAG.shouldOptForSize())
5991 return SDValue();
5992
5993 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5994 // that if it will avoid a constant pool.
5995 // It will require an extra temporary register though.
5996 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5997 // low and high 32 bits are the same and bit 31 and 63 are set.
5998 unsigned ShiftAmt, AddOpc;
5999 RISCVMatInt::InstSeq SeqLo =
6000 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6001 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6002 return Op;
6003
6004 return SDValue();
6005}
6006
6007SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6008 SelectionDAG &DAG) const {
6009 MVT VT = Op.getSimpleValueType();
6010 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6011
6012 // Can this constant be selected by a Zfa FLI instruction?
6013 bool Negate = false;
6014 int Index = getLegalZfaFPImm(Imm, VT);
6015
6016 // If the constant is negative, try negating.
6017 if (Index < 0 && Imm.isNegative()) {
6018 Index = getLegalZfaFPImm(-Imm, VT);
6019 Negate = true;
6020 }
6021
6022 // If we couldn't find a FLI lowering, fall back to generic code.
6023 if (Index < 0)
6024 return SDValue();
6025
6026 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6027 SDLoc DL(Op);
6028 SDValue Const =
6029 DAG.getNode(RISCVISD::FLI, DL, VT,
6030 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6031 if (!Negate)
6032 return Const;
6033
6034 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6035}
6036
6038 const RISCVSubtarget &Subtarget) {
6039 SDLoc dl(Op);
6040 AtomicOrdering FenceOrdering =
6041 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6042 SyncScope::ID FenceSSID =
6043 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6044
6045 if (Subtarget.hasStdExtZtso()) {
6046 // The only fence that needs an instruction is a sequentially-consistent
6047 // cross-thread fence.
6048 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6049 FenceSSID == SyncScope::System)
6050 return Op;
6051
6052 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6053 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6054 }
6055
6056 // singlethread fences only synchronize with signal handlers on the same
6057 // thread and thus only need to preserve instruction order, not actually
6058 // enforce memory ordering.
6059 if (FenceSSID == SyncScope::SingleThread)
6060 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6061 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6062
6063 return Op;
6064}
6065
6066SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6067 SelectionDAG &DAG) const {
6068 SDLoc DL(Op);
6069 MVT VT = Op.getSimpleValueType();
6070 MVT XLenVT = Subtarget.getXLenVT();
6071 unsigned Check = Op.getConstantOperandVal(1);
6072 unsigned TDCMask = 0;
6073 if (Check & fcSNan)
6074 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6075 if (Check & fcQNan)
6076 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6077 if (Check & fcPosInf)
6079 if (Check & fcNegInf)
6081 if (Check & fcPosNormal)
6083 if (Check & fcNegNormal)
6085 if (Check & fcPosSubnormal)
6087 if (Check & fcNegSubnormal)
6089 if (Check & fcPosZero)
6090 TDCMask |= RISCV::FPMASK_Positive_Zero;
6091 if (Check & fcNegZero)
6092 TDCMask |= RISCV::FPMASK_Negative_Zero;
6093
6094 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6095
6096 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6097
6098 if (VT.isVector()) {
6099 SDValue Op0 = Op.getOperand(0);
6100 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6101
6102 if (VT.isScalableVector()) {
6104 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6105 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6106 Mask = Op.getOperand(2);
6107 VL = Op.getOperand(3);
6108 }
6109 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6110 VL, Op->getFlags());
6111 if (IsOneBitMask)
6112 return DAG.getSetCC(DL, VT, FPCLASS,
6113 DAG.getConstant(TDCMask, DL, DstVT),
6115 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6116 DAG.getConstant(TDCMask, DL, DstVT));
6117 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6118 ISD::SETNE);
6119 }
6120
6121 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6122 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6123 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6124 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6125 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6126 Mask = Op.getOperand(2);
6127 MVT MaskContainerVT =
6128 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6129 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6130 VL = Op.getOperand(3);
6131 }
6132 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6133
6134 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6135 Mask, VL, Op->getFlags());
6136
6137 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6138 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6139 if (IsOneBitMask) {
6140 SDValue VMSEQ =
6141 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6142 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6143 DAG.getUNDEF(ContainerVT), Mask, VL});
6144 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6145 }
6146 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6147 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6148
6149 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6150 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6151 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6152
6153 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6154 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6155 DAG.getUNDEF(ContainerVT), Mask, VL});
6156 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6157 }
6158
6159 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6160 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6161 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6163 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6164}
6165
6166// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6167// operations propagate nans.
6169 const RISCVSubtarget &Subtarget) {
6170 SDLoc DL(Op);
6171 MVT VT = Op.getSimpleValueType();
6172
6173 SDValue X = Op.getOperand(0);
6174 SDValue Y = Op.getOperand(1);
6175
6176 if (!VT.isVector()) {
6177 MVT XLenVT = Subtarget.getXLenVT();
6178
6179 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6180 // ensures that when one input is a nan, the other will also be a nan
6181 // allowing the nan to propagate. If both inputs are nan, this will swap the
6182 // inputs which is harmless.
6183
6184 SDValue NewY = Y;
6185 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6186 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6187 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6188 }
6189
6190 SDValue NewX = X;
6191 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6192 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6193 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6194 }
6195
6196 unsigned Opc =
6197 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6198 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6199 }
6200
6201 // Check no NaNs before converting to fixed vector scalable.
6202 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6203 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6204
6205 MVT ContainerVT = VT;
6206 if (VT.isFixedLengthVector()) {
6207 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6208 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6209 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6210 }
6211
6212 SDValue Mask, VL;
6213 if (Op->isVPOpcode()) {
6214 Mask = Op.getOperand(2);
6215 if (VT.isFixedLengthVector())
6216 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6217 Subtarget);
6218 VL = Op.getOperand(3);
6219 } else {
6220 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6221 }
6222
6223 SDValue NewY = Y;
6224 if (!XIsNeverNan) {
6225 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6226 {X, X, DAG.getCondCode(ISD::SETOEQ),
6227 DAG.getUNDEF(ContainerVT), Mask, VL});
6228 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6229 DAG.getUNDEF(ContainerVT), VL);
6230 }
6231
6232 SDValue NewX = X;
6233 if (!YIsNeverNan) {
6234 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6235 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6236 DAG.getUNDEF(ContainerVT), Mask, VL});
6237 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6238 DAG.getUNDEF(ContainerVT), VL);
6239 }
6240
6241 unsigned Opc =
6242 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6245 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6246 DAG.getUNDEF(ContainerVT), Mask, VL);
6247 if (VT.isFixedLengthVector())
6248 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6249 return Res;
6250}
6251
6253 const RISCVSubtarget &Subtarget) {
6254 bool IsFABS = Op.getOpcode() == ISD::FABS;
6255 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6256 "Wrong opcode for lowering FABS or FNEG.");
6257
6258 MVT XLenVT = Subtarget.getXLenVT();
6259 MVT VT = Op.getSimpleValueType();
6260 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6261
6262 SDLoc DL(Op);
6263 SDValue Fmv =
6264 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6265
6266 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6267 Mask = Mask.sext(Subtarget.getXLen());
6268
6269 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6270 SDValue Logic =
6271 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6272 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6273}
6274
6276 const RISCVSubtarget &Subtarget) {
6277 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6278
6279 MVT XLenVT = Subtarget.getXLenVT();
6280 MVT VT = Op.getSimpleValueType();
6281 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6282
6283 SDValue Mag = Op.getOperand(0);
6284 SDValue Sign = Op.getOperand(1);
6285
6286 SDLoc DL(Op);
6287
6288 // Get sign bit into an integer value.
6289 SDValue SignAsInt;
6290 unsigned SignSize = Sign.getValueSizeInBits();
6291 if (SignSize == Subtarget.getXLen()) {
6292 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6293 } else if (SignSize == 16) {
6294 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6295 } else if (SignSize == 32) {
6296 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6297 } else if (SignSize == 64) {
6298 assert(XLenVT == MVT::i32 && "Unexpected type");
6299 // Copy the upper word to integer.
6300 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6301 .getValue(1);
6302 SignSize = 32;
6303 } else
6304 llvm_unreachable("Unexpected sign size");
6305
6306 // Get the signbit at the right position for MagAsInt.
6307 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6308 if (ShiftAmount > 0) {
6309 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6310 DAG.getConstant(ShiftAmount, DL, XLenVT));
6311 } else if (ShiftAmount < 0) {
6312 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6313 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6314 }
6315
6316 // Mask the sign bit and any bits above it. The extra bits will be dropped
6317 // when we convert back to FP.
6318 SDValue SignMask = DAG.getConstant(
6319 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6320 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6321
6322 // Transform Mag value to integer, and clear the sign bit.
6323 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6324 SDValue ClearSignMask = DAG.getConstant(
6325 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6326 SDValue ClearedSign =
6327 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6328
6329 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6331
6332 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6333}
6334
6335/// Get a RISC-V target specified VL op for a given SDNode.
6336static unsigned getRISCVVLOp(SDValue Op) {
6337#define OP_CASE(NODE) \
6338 case ISD::NODE: \
6339 return RISCVISD::NODE##_VL;
6340#define VP_CASE(NODE) \
6341 case ISD::VP_##NODE: \
6342 return RISCVISD::NODE##_VL;
6343 // clang-format off
6344 switch (Op.getOpcode()) {
6345 default:
6346 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6347 OP_CASE(ADD)
6348 OP_CASE(SUB)
6349 OP_CASE(MUL)
6350 OP_CASE(MULHS)
6351 OP_CASE(MULHU)
6352 OP_CASE(SDIV)
6353 OP_CASE(SREM)
6354 OP_CASE(UDIV)
6355 OP_CASE(UREM)
6356 OP_CASE(SHL)
6357 OP_CASE(SRA)
6358 OP_CASE(SRL)
6359 OP_CASE(ROTL)
6360 OP_CASE(ROTR)
6361 OP_CASE(BSWAP)
6362 OP_CASE(CTTZ)
6363 OP_CASE(CTLZ)
6364 OP_CASE(CTPOP)
6365 OP_CASE(BITREVERSE)
6366 OP_CASE(SADDSAT)
6367 OP_CASE(UADDSAT)
6368 OP_CASE(SSUBSAT)
6369 OP_CASE(USUBSAT)
6370 OP_CASE(AVGFLOORS)
6371 OP_CASE(AVGFLOORU)
6372 OP_CASE(AVGCEILS)
6373 OP_CASE(AVGCEILU)
6374 OP_CASE(FADD)
6375 OP_CASE(FSUB)
6376 OP_CASE(FMUL)
6377 OP_CASE(FDIV)
6378 OP_CASE(FNEG)
6379 OP_CASE(FABS)
6380 OP_CASE(FSQRT)
6381 OP_CASE(SMIN)
6382 OP_CASE(SMAX)
6383 OP_CASE(UMIN)
6384 OP_CASE(UMAX)
6385 OP_CASE(STRICT_FADD)
6386 OP_CASE(STRICT_FSUB)
6387 OP_CASE(STRICT_FMUL)
6388 OP_CASE(STRICT_FDIV)
6389 OP_CASE(STRICT_FSQRT)
6390 VP_CASE(ADD) // VP_ADD
6391 VP_CASE(SUB) // VP_SUB
6392 VP_CASE(MUL) // VP_MUL
6393 VP_CASE(SDIV) // VP_SDIV
6394 VP_CASE(SREM) // VP_SREM
6395 VP_CASE(UDIV) // VP_UDIV
6396 VP_CASE(UREM) // VP_UREM
6397 VP_CASE(SHL) // VP_SHL
6398 VP_CASE(FADD) // VP_FADD
6399 VP_CASE(FSUB) // VP_FSUB
6400 VP_CASE(FMUL) // VP_FMUL
6401 VP_CASE(FDIV) // VP_FDIV
6402 VP_CASE(FNEG) // VP_FNEG
6403 VP_CASE(FABS) // VP_FABS
6404 VP_CASE(SMIN) // VP_SMIN
6405 VP_CASE(SMAX) // VP_SMAX
6406 VP_CASE(UMIN) // VP_UMIN
6407 VP_CASE(UMAX) // VP_UMAX
6408 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6409 VP_CASE(SETCC) // VP_SETCC
6410 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6411 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6412 VP_CASE(BITREVERSE) // VP_BITREVERSE
6413 VP_CASE(SADDSAT) // VP_SADDSAT
6414 VP_CASE(UADDSAT) // VP_UADDSAT
6415 VP_CASE(SSUBSAT) // VP_SSUBSAT
6416 VP_CASE(USUBSAT) // VP_USUBSAT
6417 VP_CASE(BSWAP) // VP_BSWAP
6418 VP_CASE(CTLZ) // VP_CTLZ
6419 VP_CASE(CTTZ) // VP_CTTZ
6420 VP_CASE(CTPOP) // VP_CTPOP
6422 case ISD::VP_CTLZ_ZERO_UNDEF:
6423 return RISCVISD::CTLZ_VL;
6425 case ISD::VP_CTTZ_ZERO_UNDEF:
6426 return RISCVISD::CTTZ_VL;
6427 case ISD::FMA:
6428 case ISD::VP_FMA:
6429 return RISCVISD::VFMADD_VL;
6430 case ISD::STRICT_FMA:
6432 case ISD::AND:
6433 case ISD::VP_AND:
6434 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6435 return RISCVISD::VMAND_VL;
6436 return RISCVISD::AND_VL;
6437 case ISD::OR:
6438 case ISD::VP_OR:
6439 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6440 return RISCVISD::VMOR_VL;
6441 return RISCVISD::OR_VL;
6442 case ISD::XOR:
6443 case ISD::VP_XOR:
6444 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6445 return RISCVISD::VMXOR_VL;
6446 return RISCVISD::XOR_VL;
6447 case ISD::VP_SELECT:
6448 case ISD::VP_MERGE:
6449 return RISCVISD::VMERGE_VL;
6450 case ISD::VP_SRA:
6451 return RISCVISD::SRA_VL;
6452 case ISD::VP_SRL:
6453 return RISCVISD::SRL_VL;
6454 case ISD::VP_SQRT:
6455 return RISCVISD::FSQRT_VL;
6456 case ISD::VP_SIGN_EXTEND:
6457 return RISCVISD::VSEXT_VL;
6458 case ISD::VP_ZERO_EXTEND:
6459 return RISCVISD::VZEXT_VL;
6460 case ISD::VP_FP_TO_SINT:
6462 case ISD::VP_FP_TO_UINT:
6464 case ISD::FMINNUM:
6465 case ISD::VP_FMINNUM:
6466 return RISCVISD::VFMIN_VL;
6467 case ISD::FMAXNUM:
6468 case ISD::VP_FMAXNUM:
6469 return RISCVISD::VFMAX_VL;
6470 case ISD::LRINT:
6471 case ISD::VP_LRINT:
6472 case ISD::LLRINT:
6473 case ISD::VP_LLRINT:
6475 }
6476 // clang-format on
6477#undef OP_CASE
6478#undef VP_CASE
6479}
6480
6481/// Return true if a RISC-V target specified op has a passthru operand.
6482static bool hasPassthruOp(unsigned Opcode) {
6483 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6485 "not a RISC-V target specific op");
6486 static_assert(
6489 "adding target specific op should update this function");
6490 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6491 return true;
6492 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6493 return true;
6494 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6495 return true;
6496 if (Opcode == RISCVISD::SETCC_VL)
6497 return true;
6498 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6499 return true;
6500 if (Opcode == RISCVISD::VMERGE_VL)
6501 return true;
6502 return false;
6503}
6504
6505/// Return true if a RISC-V target specified op has a mask operand.
6506static bool hasMaskOp(unsigned Opcode) {
6507 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6509 "not a RISC-V target specific op");
6510 static_assert(
6513 "adding target specific op should update this function");
6514 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6515 return true;
6516 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6517 return true;
6518 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6520 return true;
6521 return false;
6522}
6523
6525 const RISCVSubtarget &Subtarget) {
6526 if (Op.getValueType() == MVT::nxv32f16 &&
6527 (Subtarget.hasVInstructionsF16Minimal() &&
6528 !Subtarget.hasVInstructionsF16()))
6529 return true;
6530 if (Op.getValueType() == MVT::nxv32bf16)
6531 return true;
6532 return false;
6533}
6534
6536 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6537 SDLoc DL(Op);
6538
6541
6542 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6543 if (!Op.getOperand(j).getValueType().isVector()) {
6544 LoOperands[j] = Op.getOperand(j);
6545 HiOperands[j] = Op.getOperand(j);
6546 continue;
6547 }
6548 std::tie(LoOperands[j], HiOperands[j]) =
6549 DAG.SplitVector(Op.getOperand(j), DL);
6550 }
6551
6552 SDValue LoRes =
6553 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6554 SDValue HiRes =
6555 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6556
6557 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6558}
6559
6561 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6562 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6563 SDLoc DL(Op);
6564
6567
6568 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6569 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6570 std::tie(LoOperands[j], HiOperands[j]) =
6571 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6572 continue;
6573 }
6574 if (!Op.getOperand(j).getValueType().isVector()) {
6575 LoOperands[j] = Op.getOperand(j);
6576 HiOperands[j] = Op.getOperand(j);
6577 continue;
6578 }
6579 std::tie(LoOperands[j], HiOperands[j]) =
6580 DAG.SplitVector(Op.getOperand(j), DL);
6581 }
6582
6583 SDValue LoRes =
6584 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6585 SDValue HiRes =
6586 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6587
6588 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6589}
6590
6592 SDLoc DL(Op);
6593
6594 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6595 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6596 auto [EVLLo, EVLHi] =
6597 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6598
6599 SDValue ResLo =
6600 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6601 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6602 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6603 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6604}
6605
6607
6608 assert(Op->isStrictFPOpcode());
6609
6610 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6611
6612 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6613 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6614
6615 SDLoc DL(Op);
6616
6619
6620 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6621 if (!Op.getOperand(j).getValueType().isVector()) {
6622 LoOperands[j] = Op.getOperand(j);
6623 HiOperands[j] = Op.getOperand(j);
6624 continue;
6625 }
6626 std::tie(LoOperands[j], HiOperands[j]) =
6627 DAG.SplitVector(Op.getOperand(j), DL);
6628 }
6629
6630 SDValue LoRes =
6631 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6632 HiOperands[0] = LoRes.getValue(1);
6633 SDValue HiRes =
6634 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6635
6636 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6637 LoRes.getValue(0), HiRes.getValue(0));
6638 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6639}
6640
6642 SelectionDAG &DAG) const {
6643 switch (Op.getOpcode()) {
6644 default:
6645 report_fatal_error("unimplemented operand");
6646 case ISD::ATOMIC_FENCE:
6647 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6648 case ISD::GlobalAddress:
6649 return lowerGlobalAddress(Op, DAG);
6650 case ISD::BlockAddress:
6651 return lowerBlockAddress(Op, DAG);
6652 case ISD::ConstantPool:
6653 return lowerConstantPool(Op, DAG);
6654 case ISD::JumpTable:
6655 return lowerJumpTable(Op, DAG);
6657 return lowerGlobalTLSAddress(Op, DAG);
6658 case ISD::Constant:
6659 return lowerConstant(Op, DAG, Subtarget);
6660 case ISD::ConstantFP:
6661 return lowerConstantFP(Op, DAG);
6662 case ISD::SELECT:
6663 return lowerSELECT(Op, DAG);
6664 case ISD::BRCOND:
6665 return lowerBRCOND(Op, DAG);
6666 case ISD::VASTART:
6667 return lowerVASTART(Op, DAG);
6668 case ISD::FRAMEADDR:
6669 return lowerFRAMEADDR(Op, DAG);
6670 case ISD::RETURNADDR:
6671 return lowerRETURNADDR(Op, DAG);
6672 case ISD::SHL_PARTS:
6673 return lowerShiftLeftParts(Op, DAG);
6674 case ISD::SRA_PARTS:
6675 return lowerShiftRightParts(Op, DAG, true);
6676 case ISD::SRL_PARTS:
6677 return lowerShiftRightParts(Op, DAG, false);
6678 case ISD::ROTL:
6679 case ISD::ROTR:
6680 if (Op.getValueType().isFixedLengthVector()) {
6681 assert(Subtarget.hasStdExtZvkb());
6682 return lowerToScalableOp(Op, DAG);
6683 }
6684 assert(Subtarget.hasVendorXTHeadBb() &&
6685 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6686 "Unexpected custom legalization");
6687 // XTHeadBb only supports rotate by constant.
6688 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6689 return SDValue();
6690 return Op;
6691 case ISD::BITCAST: {
6692 SDLoc DL(Op);
6693 EVT VT = Op.getValueType();
6694 SDValue Op0 = Op.getOperand(0);
6695 EVT Op0VT = Op0.getValueType();
6696 MVT XLenVT = Subtarget.getXLenVT();
6697 if (Op0VT == MVT::i16 &&
6698 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6699 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6700 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6701 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6702 }
6703 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6704 Subtarget.hasStdExtFOrZfinx()) {
6705 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6706 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6707 }
6708 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6709 Subtarget.hasStdExtDOrZdinx()) {
6710 SDValue Lo, Hi;
6711 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6712 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6713 }
6714
6715 // Consider other scalar<->scalar casts as legal if the types are legal.
6716 // Otherwise expand them.
6717 if (!VT.isVector() && !Op0VT.isVector()) {
6718 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6719 return Op;
6720 return SDValue();
6721 }
6722
6723 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6724 "Unexpected types");
6725
6726 if (VT.isFixedLengthVector()) {
6727 // We can handle fixed length vector bitcasts with a simple replacement
6728 // in isel.
6729 if (Op0VT.isFixedLengthVector())
6730 return Op;
6731 // When bitcasting from scalar to fixed-length vector, insert the scalar
6732 // into a one-element vector of the result type, and perform a vector
6733 // bitcast.
6734 if (!Op0VT.isVector()) {
6735 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6736 if (!isTypeLegal(BVT))
6737 return SDValue();
6738 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6739 DAG.getUNDEF(BVT), Op0,
6740 DAG.getVectorIdxConstant(0, DL)));
6741 }
6742 return SDValue();
6743 }
6744 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6745 // thus: bitcast the vector to a one-element vector type whose element type
6746 // is the same as the result type, and extract the first element.
6747 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6748 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6749 if (!isTypeLegal(BVT))
6750 return SDValue();
6751 SDValue BVec = DAG.getBitcast(BVT, Op0);
6752 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6753 DAG.getVectorIdxConstant(0, DL));
6754 }
6755 return SDValue();
6756 }
6758 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6760 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6762 return LowerINTRINSIC_VOID(Op, DAG);
6763 case ISD::IS_FPCLASS:
6764 return LowerIS_FPCLASS(Op, DAG);
6765 case ISD::BITREVERSE: {
6766 MVT VT = Op.getSimpleValueType();
6767 if (VT.isFixedLengthVector()) {
6768 assert(Subtarget.hasStdExtZvbb());
6769 return lowerToScalableOp(Op, DAG);
6770 }
6771 SDLoc DL(Op);
6772 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6773 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6774 // Expand bitreverse to a bswap(rev8) followed by brev8.
6775 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6776 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6777 }
6778 case ISD::TRUNCATE:
6781 // Only custom-lower vector truncates
6782 if (!Op.getSimpleValueType().isVector())
6783 return Op;
6784 return lowerVectorTruncLike(Op, DAG);
6785 case ISD::ANY_EXTEND:
6786 case ISD::ZERO_EXTEND:
6787 if (Op.getOperand(0).getValueType().isVector() &&
6788 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6789 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6790 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6791 case ISD::SIGN_EXTEND:
6792 if (Op.getOperand(0).getValueType().isVector() &&
6793 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6794 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6795 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6797 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6799 return lowerINSERT_VECTOR_ELT(Op, DAG);
6801 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6802 case ISD::SCALAR_TO_VECTOR: {
6803 MVT VT = Op.getSimpleValueType();
6804 SDLoc DL(Op);
6805 SDValue Scalar = Op.getOperand(0);
6806 if (VT.getVectorElementType() == MVT::i1) {
6807 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6808 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6809 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6810 }
6811 MVT ContainerVT = VT;
6812 if (VT.isFixedLengthVector())
6813 ContainerVT = getContainerForFixedLengthVector(VT);
6814 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6815
6816 SDValue V;
6817 if (VT.isFloatingPoint()) {
6818 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6819 DAG.getUNDEF(ContainerVT), Scalar, VL);
6820 } else {
6821 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6822 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6823 DAG.getUNDEF(ContainerVT), Scalar, VL);
6824 }
6825 if (VT.isFixedLengthVector())
6826 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6827 return V;
6828 }
6829 case ISD::VSCALE: {
6830 MVT XLenVT = Subtarget.getXLenVT();
6831 MVT VT = Op.getSimpleValueType();
6832 SDLoc DL(Op);
6833 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6834 // We define our scalable vector types for lmul=1 to use a 64 bit known
6835 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6836 // vscale as VLENB / 8.
6837 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6838 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6839 report_fatal_error("Support for VLEN==32 is incomplete.");
6840 // We assume VLENB is a multiple of 8. We manually choose the best shift
6841 // here because SimplifyDemandedBits isn't always able to simplify it.
6842 uint64_t Val = Op.getConstantOperandVal(0);
6843 if (isPowerOf2_64(Val)) {
6844 uint64_t Log2 = Log2_64(Val);
6845 if (Log2 < 3)
6846 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6847 DAG.getConstant(3 - Log2, DL, VT));
6848 else if (Log2 > 3)
6849 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6850 DAG.getConstant(Log2 - 3, DL, XLenVT));
6851 } else if ((Val % 8) == 0) {
6852 // If the multiplier is a multiple of 8, scale it down to avoid needing
6853 // to shift the VLENB value.
6854 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6855 DAG.getConstant(Val / 8, DL, XLenVT));
6856 } else {
6857 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6858 DAG.getConstant(3, DL, XLenVT));
6859 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6860 DAG.getConstant(Val, DL, XLenVT));
6861 }
6862 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6863 }
6864 case ISD::FPOWI: {
6865 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6866 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6867 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6868 Op.getOperand(1).getValueType() == MVT::i32) {
6869 SDLoc DL(Op);
6870 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6871 SDValue Powi =
6872 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6873 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6874 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6875 }
6876 return SDValue();
6877 }
6878 case ISD::FMAXIMUM:
6879 case ISD::FMINIMUM:
6880 if (isPromotedOpNeedingSplit(Op, Subtarget))
6881 return SplitVectorOp(Op, DAG);
6882 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6883 case ISD::FP_EXTEND:
6884 case ISD::FP_ROUND:
6885 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6888 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6889 case ISD::SINT_TO_FP:
6890 case ISD::UINT_TO_FP:
6891 if (Op.getValueType().isVector() &&
6892 ((Op.getValueType().getScalarType() == MVT::f16 &&
6893 (Subtarget.hasVInstructionsF16Minimal() &&
6894 !Subtarget.hasVInstructionsF16())) ||
6895 Op.getValueType().getScalarType() == MVT::bf16)) {
6896 if (isPromotedOpNeedingSplit(Op, Subtarget))
6897 return SplitVectorOp(Op, DAG);
6898 // int -> f32
6899 SDLoc DL(Op);
6900 MVT NVT =
6901 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6902 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6903 // f32 -> [b]f16
6904 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6905 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6906 }
6907 [[fallthrough]];
6908 case ISD::FP_TO_SINT:
6909 case ISD::FP_TO_UINT:
6910 if (SDValue Op1 = Op.getOperand(0);
6911 Op1.getValueType().isVector() &&
6912 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6913 (Subtarget.hasVInstructionsF16Minimal() &&
6914 !Subtarget.hasVInstructionsF16())) ||
6915 Op1.getValueType().getScalarType() == MVT::bf16)) {
6916 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6917 return SplitVectorOp(Op, DAG);
6918 // [b]f16 -> f32
6919 SDLoc DL(Op);
6920 MVT NVT = MVT::getVectorVT(MVT::f32,
6921 Op1.getValueType().getVectorElementCount());
6922 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6923 // f32 -> int
6924 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6925 }
6926 [[fallthrough]];
6931 // RVV can only do fp<->int conversions to types half/double the size as
6932 // the source. We custom-lower any conversions that do two hops into
6933 // sequences.
6934 MVT VT = Op.getSimpleValueType();
6935 if (VT.isScalarInteger())
6936 return lowerFP_TO_INT(Op, DAG, Subtarget);
6937 bool IsStrict = Op->isStrictFPOpcode();
6938 SDValue Src = Op.getOperand(0 + IsStrict);
6939 MVT SrcVT = Src.getSimpleValueType();
6940 if (SrcVT.isScalarInteger())
6941 return lowerINT_TO_FP(Op, DAG, Subtarget);
6942 if (!VT.isVector())
6943 return Op;
6944 SDLoc DL(Op);
6945 MVT EltVT = VT.getVectorElementType();
6946 MVT SrcEltVT = SrcVT.getVectorElementType();
6947 unsigned EltSize = EltVT.getSizeInBits();
6948 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6949 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6950 "Unexpected vector element types");
6951
6952 bool IsInt2FP = SrcEltVT.isInteger();
6953 // Widening conversions
6954 if (EltSize > (2 * SrcEltSize)) {
6955 if (IsInt2FP) {
6956 // Do a regular integer sign/zero extension then convert to float.
6957 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6959 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6960 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6963 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6964 if (IsStrict)
6965 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6966 Op.getOperand(0), Ext);
6967 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6968 }
6969 // FP2Int
6970 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6971 // Do one doubling fp_extend then complete the operation by converting
6972 // to int.
6973 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6974 if (IsStrict) {
6975 auto [FExt, Chain] =
6976 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6977 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6978 }
6979 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6980 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6981 }
6982
6983 // Narrowing conversions
6984 if (SrcEltSize > (2 * EltSize)) {
6985 if (IsInt2FP) {
6986 // One narrowing int_to_fp, then an fp_round.
6987 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6988 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6989 if (IsStrict) {
6990 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6991 DAG.getVTList(InterimFVT, MVT::Other),
6992 Op.getOperand(0), Src);
6993 SDValue Chain = Int2FP.getValue(1);
6994 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6995 }
6996 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6997 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6998 }
6999 // FP2Int
7000 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7001 // representable by the integer, the result is poison.
7002 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7004 if (IsStrict) {
7005 SDValue FP2Int =
7006 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7007 Op.getOperand(0), Src);
7008 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7009 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7010 }
7011 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7012 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7013 }
7014
7015 // Scalable vectors can exit here. Patterns will handle equally-sized
7016 // conversions halving/doubling ones.
7017 if (!VT.isFixedLengthVector())
7018 return Op;
7019
7020 // For fixed-length vectors we lower to a custom "VL" node.
7021 unsigned RVVOpc = 0;
7022 switch (Op.getOpcode()) {
7023 default:
7024 llvm_unreachable("Impossible opcode");
7025 case ISD::FP_TO_SINT:
7027 break;
7028 case ISD::FP_TO_UINT:
7030 break;
7031 case ISD::SINT_TO_FP:
7032 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7033 break;
7034 case ISD::UINT_TO_FP:
7035 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7036 break;
7039 break;
7042 break;
7045 break;
7048 break;
7049 }
7050
7051 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7052 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7053 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7054 "Expected same element count");
7055
7056 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7057
7058 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7059 if (IsStrict) {
7060 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7061 Op.getOperand(0), Src, Mask, VL);
7062 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7063 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7064 }
7065 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7066 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7067 }
7070 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7071 case ISD::FP_TO_BF16: {
7072 // Custom lower to ensure the libcall return is passed in an FPR on hard
7073 // float ABIs.
7074 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7075 SDLoc DL(Op);
7076 MakeLibCallOptions CallOptions;
7077 RTLIB::Libcall LC =
7078 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7079 SDValue Res =
7080 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7081 if (Subtarget.is64Bit())
7082 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7083 return DAG.getBitcast(MVT::i32, Res);
7084 }
7085 case ISD::BF16_TO_FP: {
7086 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7087 MVT VT = Op.getSimpleValueType();
7088 SDLoc DL(Op);
7089 Op = DAG.getNode(
7090 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7091 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7092 SDValue Res = Subtarget.is64Bit()
7093 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7094 : DAG.getBitcast(MVT::f32, Op);
7095 // fp_extend if the target VT is bigger than f32.
7096 if (VT != MVT::f32)
7097 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7098 return Res;
7099 }
7101 case ISD::FP_TO_FP16: {
7102 // Custom lower to ensure the libcall return is passed in an FPR on hard
7103 // float ABIs.
7104 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7105 SDLoc DL(Op);
7106 MakeLibCallOptions CallOptions;
7107 bool IsStrict = Op->isStrictFPOpcode();
7108 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7109 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7110 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7111 SDValue Res;
7112 std::tie(Res, Chain) =
7113 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7114 if (Subtarget.is64Bit())
7115 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7116 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7117 if (IsStrict)
7118 return DAG.getMergeValues({Result, Chain}, DL);
7119 return Result;
7120 }
7122 case ISD::FP16_TO_FP: {
7123 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7124 // float ABIs.
7125 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7126 SDLoc DL(Op);
7127 MakeLibCallOptions CallOptions;
7128 bool IsStrict = Op->isStrictFPOpcode();
7129 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7130 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7131 SDValue Arg = Subtarget.is64Bit()
7132 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7133 : DAG.getBitcast(MVT::f32, Op0);
7134 SDValue Res;
7135 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7136 CallOptions, DL, Chain);
7137 if (IsStrict)
7138 return DAG.getMergeValues({Res, Chain}, DL);
7139 return Res;
7140 }
7141 case ISD::FTRUNC:
7142 case ISD::FCEIL:
7143 case ISD::FFLOOR:
7144 case ISD::FNEARBYINT:
7145 case ISD::FRINT:
7146 case ISD::FROUND:
7147 case ISD::FROUNDEVEN:
7148 if (isPromotedOpNeedingSplit(Op, Subtarget))
7149 return SplitVectorOp(Op, DAG);
7150 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7151 case ISD::LRINT:
7152 case ISD::LLRINT:
7153 if (Op.getValueType().isVector())
7154 return lowerVectorXRINT(Op, DAG, Subtarget);
7155 [[fallthrough]];
7156 case ISD::LROUND:
7157 case ISD::LLROUND: {
7158 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7159 "Unexpected custom legalisation");
7160 SDLoc DL(Op);
7161 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7162 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7163 }
7164 case ISD::STRICT_LRINT:
7165 case ISD::STRICT_LLRINT:
7166 case ISD::STRICT_LROUND:
7167 case ISD::STRICT_LLROUND: {
7168 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7169 "Unexpected custom legalisation");
7170 SDLoc DL(Op);
7171 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7172 {Op.getOperand(0), Op.getOperand(1)});
7173 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7174 {Ext.getValue(1), Ext.getValue(0)});
7175 }
7176 case ISD::VECREDUCE_ADD:
7181 return lowerVECREDUCE(Op, DAG);
7182 case ISD::VECREDUCE_AND:
7183 case ISD::VECREDUCE_OR:
7184 case ISD::VECREDUCE_XOR:
7185 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7186 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7187 return lowerVECREDUCE(Op, DAG);
7194 return lowerFPVECREDUCE(Op, DAG);
7195 case ISD::VP_REDUCE_ADD:
7196 case ISD::VP_REDUCE_UMAX:
7197 case ISD::VP_REDUCE_SMAX:
7198 case ISD::VP_REDUCE_UMIN:
7199 case ISD::VP_REDUCE_SMIN:
7200 case ISD::VP_REDUCE_FADD:
7201 case ISD::VP_REDUCE_SEQ_FADD:
7202 case ISD::VP_REDUCE_FMIN:
7203 case ISD::VP_REDUCE_FMAX:
7204 case ISD::VP_REDUCE_FMINIMUM:
7205 case ISD::VP_REDUCE_FMAXIMUM:
7206 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7207 return SplitVectorReductionOp(Op, DAG);
7208 return lowerVPREDUCE(Op, DAG);
7209 case ISD::VP_REDUCE_AND:
7210 case ISD::VP_REDUCE_OR:
7211 case ISD::VP_REDUCE_XOR:
7212 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7213 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7214 return lowerVPREDUCE(Op, DAG);
7215 case ISD::VP_CTTZ_ELTS:
7216 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7217 return lowerVPCttzElements(Op, DAG);
7218 case ISD::UNDEF: {
7219 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7220 return convertFromScalableVector(Op.getSimpleValueType(),
7221 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7222 }
7224 return lowerINSERT_SUBVECTOR(Op, DAG);
7226 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7228 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7230 return lowerVECTOR_INTERLEAVE(Op, DAG);
7231 case ISD::STEP_VECTOR:
7232 return lowerSTEP_VECTOR(Op, DAG);
7234 return lowerVECTOR_REVERSE(Op, DAG);
7235 case ISD::VECTOR_SPLICE:
7236 return lowerVECTOR_SPLICE(Op, DAG);
7237 case ISD::BUILD_VECTOR:
7238 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7239 case ISD::SPLAT_VECTOR: {
7240 MVT VT = Op.getSimpleValueType();
7241 MVT EltVT = VT.getVectorElementType();
7242 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7243 EltVT == MVT::bf16) {
7244 SDLoc DL(Op);
7245 SDValue Elt;
7246 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7247 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7248 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7249 Op.getOperand(0));
7250 else
7251 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7252 MVT IVT = VT.changeVectorElementType(MVT::i16);
7253 return DAG.getNode(ISD::BITCAST, DL, VT,
7254 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7255 }
7256
7257 if (EltVT == MVT::i1)
7258 return lowerVectorMaskSplat(Op, DAG);
7259 return SDValue();
7260 }
7262 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7263 case ISD::CONCAT_VECTORS: {
7264 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7265 // better than going through the stack, as the default expansion does.
7266 SDLoc DL(Op);
7267 MVT VT = Op.getSimpleValueType();
7268 MVT ContainerVT = VT;
7269 if (VT.isFixedLengthVector())
7270 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7271
7272 // Recursively split concat_vectors with more than 2 operands:
7273 //
7274 // concat_vector op1, op2, op3, op4
7275 // ->
7276 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7277 //
7278 // This reduces the length of the chain of vslideups and allows us to
7279 // perform the vslideups at a smaller LMUL, limited to MF2.
7280 if (Op.getNumOperands() > 2 &&
7281 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7282 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7284 size_t HalfNumOps = Op.getNumOperands() / 2;
7285 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7286 Op->ops().take_front(HalfNumOps));
7287 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7288 Op->ops().drop_front(HalfNumOps));
7289 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7290 }
7291
7292 unsigned NumOpElts =
7293 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7294 SDValue Vec = DAG.getUNDEF(VT);
7295 for (const auto &OpIdx : enumerate(Op->ops())) {
7296 SDValue SubVec = OpIdx.value();
7297 // Don't insert undef subvectors.
7298 if (SubVec.isUndef())
7299 continue;
7300 Vec =
7301 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7302 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7303 }
7304 return Vec;
7305 }
7306 case ISD::LOAD: {
7307 auto *Load = cast<LoadSDNode>(Op);
7308 EVT VecTy = Load->getMemoryVT();
7309 // Handle normal vector tuple load.
7310 if (VecTy.isRISCVVectorTuple()) {
7311 SDLoc DL(Op);
7312 MVT XLenVT = Subtarget.getXLenVT();
7313 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7314 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7315 unsigned NumElts = Sz / (NF * 8);
7316 int Log2LMUL = Log2_64(NumElts) - 3;
7317
7318 auto Flag = SDNodeFlags();
7319 Flag.setNoUnsignedWrap(true);
7320 SDValue Ret = DAG.getUNDEF(VecTy);
7321 SDValue BasePtr = Load->getBasePtr();
7322 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7323 VROffset =
7324 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7325 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7326 SmallVector<SDValue, 8> OutChains;
7327
7328 // Load NF vector registers and combine them to a vector tuple.
7329 for (unsigned i = 0; i < NF; ++i) {
7330 SDValue LoadVal = DAG.getLoad(
7331 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7332 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7333 OutChains.push_back(LoadVal.getValue(1));
7334 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7335 DAG.getVectorIdxConstant(i, DL));
7336 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7337 }
7338 return DAG.getMergeValues(
7339 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7340 }
7341
7342 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7343 return V;
7344 if (Op.getValueType().isFixedLengthVector())
7345 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7346 return Op;
7347 }
7348 case ISD::STORE: {
7349 auto *Store = cast<StoreSDNode>(Op);
7350 SDValue StoredVal = Store->getValue();
7351 EVT VecTy = StoredVal.getValueType();
7352 // Handle normal vector tuple store.
7353 if (VecTy.isRISCVVectorTuple()) {
7354 SDLoc DL(Op);
7355 MVT XLenVT = Subtarget.getXLenVT();
7356 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7357 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7358 unsigned NumElts = Sz / (NF * 8);
7359 int Log2LMUL = Log2_64(NumElts) - 3;
7360
7361 auto Flag = SDNodeFlags();
7362 Flag.setNoUnsignedWrap(true);
7363 SDValue Ret;
7364 SDValue Chain = Store->getChain();
7365 SDValue BasePtr = Store->getBasePtr();
7366 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7367 VROffset =
7368 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7369 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7370
7371 // Extract subregisters in a vector tuple and store them individually.
7372 for (unsigned i = 0; i < NF; ++i) {
7373 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7374 MVT::getScalableVectorVT(MVT::i8, NumElts),
7375 StoredVal, DAG.getVectorIdxConstant(i, DL));
7376 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7377 MachinePointerInfo(Store->getAddressSpace()),
7378 Store->getOriginalAlign(),
7379 Store->getMemOperand()->getFlags());
7380 Chain = Ret.getValue(0);
7381 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7382 }
7383 return Ret;
7384 }
7385
7386 if (auto V = expandUnalignedRVVStore(Op, DAG))
7387 return V;
7388 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7389 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7390 return Op;
7391 }
7392 case ISD::MLOAD:
7393 case ISD::VP_LOAD:
7394 return lowerMaskedLoad(Op, DAG);
7395 case ISD::MSTORE:
7396 case ISD::VP_STORE:
7397 return lowerMaskedStore(Op, DAG);
7399 return lowerVectorCompress(Op, DAG);
7400 case ISD::SELECT_CC: {
7401 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7402 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7403 // into separate SETCC+SELECT just like LegalizeDAG.
7404 SDValue Tmp1 = Op.getOperand(0);
7405 SDValue Tmp2 = Op.getOperand(1);
7406 SDValue True = Op.getOperand(2);
7407 SDValue False = Op.getOperand(3);
7408 EVT VT = Op.getValueType();
7409 SDValue CC = Op.getOperand(4);
7410 EVT CmpVT = Tmp1.getValueType();
7411 EVT CCVT =
7412 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7413 SDLoc DL(Op);
7414 SDValue Cond =
7415 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7416 return DAG.getSelect(DL, VT, Cond, True, False);
7417 }
7418 case ISD::SETCC: {
7419 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7420 if (OpVT.isScalarInteger()) {
7421 MVT VT = Op.getSimpleValueType();
7422 SDValue LHS = Op.getOperand(0);
7423 SDValue RHS = Op.getOperand(1);
7424 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7425 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7426 "Unexpected CondCode");
7427
7428 SDLoc DL(Op);
7429
7430 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7431 // convert this to the equivalent of (set(u)ge X, C+1) by using
7432 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7433 // in a register.
7434 if (isa<ConstantSDNode>(RHS)) {
7435 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7436 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7437 // If this is an unsigned compare and the constant is -1, incrementing
7438 // the constant would change behavior. The result should be false.
7439 if (CCVal == ISD::SETUGT && Imm == -1)
7440 return DAG.getConstant(0, DL, VT);
7441 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7442 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7443 SDValue SetCC = DAG.getSetCC(
7444 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7445 return DAG.getLogicalNOT(DL, SetCC, VT);
7446 }
7447 }
7448
7449 // Not a constant we could handle, swap the operands and condition code to
7450 // SETLT/SETULT.
7451 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7452 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7453 }
7454
7455 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7456 return SplitVectorOp(Op, DAG);
7457
7458 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7459 }
7460 case ISD::ADD:
7461 case ISD::SUB:
7462 case ISD::MUL:
7463 case ISD::MULHS:
7464 case ISD::MULHU:
7465 case ISD::AND:
7466 case ISD::OR:
7467 case ISD::XOR:
7468 case ISD::SDIV:
7469 case ISD::SREM:
7470 case ISD::UDIV:
7471 case ISD::UREM:
7472 case ISD::BSWAP:
7473 case ISD::CTPOP:
7474 return lowerToScalableOp(Op, DAG);
7475 case ISD::SHL:
7476 case ISD::SRA:
7477 case ISD::SRL:
7478 if (Op.getSimpleValueType().isFixedLengthVector())
7479 return lowerToScalableOp(Op, DAG);
7480 // This can be called for an i32 shift amount that needs to be promoted.
7481 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7482 "Unexpected custom legalisation");
7483 return SDValue();
7484 case ISD::FABS:
7485 case ISD::FNEG:
7486 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7487 return lowerFABSorFNEG(Op, DAG, Subtarget);
7488 [[fallthrough]];
7489 case ISD::FADD:
7490 case ISD::FSUB:
7491 case ISD::FMUL:
7492 case ISD::FDIV:
7493 case ISD::FSQRT:
7494 case ISD::FMA:
7495 case ISD::FMINNUM:
7496 case ISD::FMAXNUM:
7497 if (isPromotedOpNeedingSplit(Op, Subtarget))
7498 return SplitVectorOp(Op, DAG);
7499 [[fallthrough]];
7500 case ISD::AVGFLOORS:
7501 case ISD::AVGFLOORU:
7502 case ISD::AVGCEILS:
7503 case ISD::AVGCEILU:
7504 case ISD::SMIN:
7505 case ISD::SMAX:
7506 case ISD::UMIN:
7507 case ISD::UMAX:
7508 case ISD::UADDSAT:
7509 case ISD::USUBSAT:
7510 case ISD::SADDSAT:
7511 case ISD::SSUBSAT:
7512 return lowerToScalableOp(Op, DAG);
7513 case ISD::ABDS:
7514 case ISD::ABDU: {
7515 SDLoc dl(Op);
7516 EVT VT = Op->getValueType(0);
7517 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7518 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7519 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7520
7521 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7522 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7523 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7524 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7525 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7526 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7527 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7528 }
7529 case ISD::ABS:
7530 case ISD::VP_ABS:
7531 return lowerABS(Op, DAG);
7532 case ISD::CTLZ:
7534 case ISD::CTTZ:
7536 if (Subtarget.hasStdExtZvbb())
7537 return lowerToScalableOp(Op, DAG);
7538 assert(Op.getOpcode() != ISD::CTTZ);
7539 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7540 case ISD::VSELECT:
7541 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7542 case ISD::FCOPYSIGN:
7543 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7544 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7545 if (isPromotedOpNeedingSplit(Op, Subtarget))
7546 return SplitVectorOp(Op, DAG);
7547 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7548 case ISD::STRICT_FADD:
7549 case ISD::STRICT_FSUB:
7550 case ISD::STRICT_FMUL:
7551 case ISD::STRICT_FDIV:
7552 case ISD::STRICT_FSQRT:
7553 case ISD::STRICT_FMA:
7554 if (isPromotedOpNeedingSplit(Op, Subtarget))
7555 return SplitStrictFPVectorOp(Op, DAG);
7556 return lowerToScalableOp(Op, DAG);
7557 case ISD::STRICT_FSETCC:
7559 return lowerVectorStrictFSetcc(Op, DAG);
7560 case ISD::STRICT_FCEIL:
7561 case ISD::STRICT_FRINT:
7562 case ISD::STRICT_FFLOOR:
7563 case ISD::STRICT_FTRUNC:
7565 case ISD::STRICT_FROUND:
7567 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7568 case ISD::MGATHER:
7569 case ISD::VP_GATHER:
7570 return lowerMaskedGather(Op, DAG);
7571 case ISD::MSCATTER:
7572 case ISD::VP_SCATTER:
7573 return lowerMaskedScatter(Op, DAG);
7574 case ISD::GET_ROUNDING:
7575 return lowerGET_ROUNDING(Op, DAG);
7576 case ISD::SET_ROUNDING:
7577 return lowerSET_ROUNDING(Op, DAG);
7578 case ISD::EH_DWARF_CFA:
7579 return lowerEH_DWARF_CFA(Op, DAG);
7580 case ISD::VP_MERGE:
7581 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7582 return lowerVPMergeMask(Op, DAG);
7583 [[fallthrough]];
7584 case ISD::VP_SELECT:
7585 case ISD::VP_ADD:
7586 case ISD::VP_SUB:
7587 case ISD::VP_MUL:
7588 case ISD::VP_SDIV:
7589 case ISD::VP_UDIV:
7590 case ISD::VP_SREM:
7591 case ISD::VP_UREM:
7592 case ISD::VP_UADDSAT:
7593 case ISD::VP_USUBSAT:
7594 case ISD::VP_SADDSAT:
7595 case ISD::VP_SSUBSAT:
7596 case ISD::VP_LRINT:
7597 case ISD::VP_LLRINT:
7598 return lowerVPOp(Op, DAG);
7599 case ISD::VP_AND:
7600 case ISD::VP_OR:
7601 case ISD::VP_XOR:
7602 return lowerLogicVPOp(Op, DAG);
7603 case ISD::VP_FADD:
7604 case ISD::VP_FSUB:
7605 case ISD::VP_FMUL:
7606 case ISD::VP_FDIV:
7607 case ISD::VP_FNEG:
7608 case ISD::VP_FABS:
7609 case ISD::VP_SQRT:
7610 case ISD::VP_FMA:
7611 case ISD::VP_FMINNUM:
7612 case ISD::VP_FMAXNUM:
7613 case ISD::VP_FCOPYSIGN:
7614 if (isPromotedOpNeedingSplit(Op, Subtarget))
7615 return SplitVPOp(Op, DAG);
7616 [[fallthrough]];
7617 case ISD::VP_SRA:
7618 case ISD::VP_SRL:
7619 case ISD::VP_SHL:
7620 return lowerVPOp(Op, DAG);
7621 case ISD::VP_IS_FPCLASS:
7622 return LowerIS_FPCLASS(Op, DAG);
7623 case ISD::VP_SIGN_EXTEND:
7624 case ISD::VP_ZERO_EXTEND:
7625 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7626 return lowerVPExtMaskOp(Op, DAG);
7627 return lowerVPOp(Op, DAG);
7628 case ISD::VP_TRUNCATE:
7629 return lowerVectorTruncLike(Op, DAG);
7630 case ISD::VP_FP_EXTEND:
7631 case ISD::VP_FP_ROUND:
7632 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7633 case ISD::VP_SINT_TO_FP:
7634 case ISD::VP_UINT_TO_FP:
7635 if (Op.getValueType().isVector() &&
7636 ((Op.getValueType().getScalarType() == MVT::f16 &&
7637 (Subtarget.hasVInstructionsF16Minimal() &&
7638 !Subtarget.hasVInstructionsF16())) ||
7639 Op.getValueType().getScalarType() == MVT::bf16)) {
7640 if (isPromotedOpNeedingSplit(Op, Subtarget))
7641 return SplitVectorOp(Op, DAG);
7642 // int -> f32
7643 SDLoc DL(Op);
7644 MVT NVT =
7645 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7646 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7647 // f32 -> [b]f16
7648 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7649 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7650 }
7651 [[fallthrough]];
7652 case ISD::VP_FP_TO_SINT:
7653 case ISD::VP_FP_TO_UINT:
7654 if (SDValue Op1 = Op.getOperand(0);
7655 Op1.getValueType().isVector() &&
7656 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7657 (Subtarget.hasVInstructionsF16Minimal() &&
7658 !Subtarget.hasVInstructionsF16())) ||
7659 Op1.getValueType().getScalarType() == MVT::bf16)) {
7660 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7661 return SplitVectorOp(Op, DAG);
7662 // [b]f16 -> f32
7663 SDLoc DL(Op);
7664 MVT NVT = MVT::getVectorVT(MVT::f32,
7665 Op1.getValueType().getVectorElementCount());
7666 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7667 // f32 -> int
7668 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7669 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7670 }
7671 return lowerVPFPIntConvOp(Op, DAG);
7672 case ISD::VP_SETCC:
7673 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7674 return SplitVPOp(Op, DAG);
7675 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7676 return lowerVPSetCCMaskOp(Op, DAG);
7677 [[fallthrough]];
7678 case ISD::VP_SMIN:
7679 case ISD::VP_SMAX:
7680 case ISD::VP_UMIN:
7681 case ISD::VP_UMAX:
7682 case ISD::VP_BITREVERSE:
7683 case ISD::VP_BSWAP:
7684 return lowerVPOp(Op, DAG);
7685 case ISD::VP_CTLZ:
7686 case ISD::VP_CTLZ_ZERO_UNDEF:
7687 if (Subtarget.hasStdExtZvbb())
7688 return lowerVPOp(Op, DAG);
7689 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7690 case ISD::VP_CTTZ:
7691 case ISD::VP_CTTZ_ZERO_UNDEF:
7692 if (Subtarget.hasStdExtZvbb())
7693 return lowerVPOp(Op, DAG);
7694 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7695 case ISD::VP_CTPOP:
7696 return lowerVPOp(Op, DAG);
7697 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7698 return lowerVPStridedLoad(Op, DAG);
7699 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7700 return lowerVPStridedStore(Op, DAG);
7701 case ISD::VP_FCEIL:
7702 case ISD::VP_FFLOOR:
7703 case ISD::VP_FRINT:
7704 case ISD::VP_FNEARBYINT:
7705 case ISD::VP_FROUND:
7706 case ISD::VP_FROUNDEVEN:
7707 case ISD::VP_FROUNDTOZERO:
7708 if (isPromotedOpNeedingSplit(Op, Subtarget))
7709 return SplitVPOp(Op, DAG);
7710 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7711 case ISD::VP_FMAXIMUM:
7712 case ISD::VP_FMINIMUM:
7713 if (isPromotedOpNeedingSplit(Op, Subtarget))
7714 return SplitVPOp(Op, DAG);
7715 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7716 case ISD::EXPERIMENTAL_VP_SPLICE:
7717 return lowerVPSpliceExperimental(Op, DAG);
7718 case ISD::EXPERIMENTAL_VP_REVERSE:
7719 return lowerVPReverseExperimental(Op, DAG);
7720 case ISD::EXPERIMENTAL_VP_SPLAT:
7721 return lowerVPSplatExperimental(Op, DAG);
7722 case ISD::CLEAR_CACHE: {
7723 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7724 "llvm.clear_cache only needs custom lower on Linux targets");
7725 SDLoc DL(Op);
7726 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7727 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7728 Op.getOperand(2), Flags, DL);
7729 }
7731 return lowerINIT_TRAMPOLINE(Op, DAG);
7733 return lowerADJUST_TRAMPOLINE(Op, DAG);
7734 }
7735}
7736
7737SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7738 SDValue Start, SDValue End,
7739 SDValue Flags, SDLoc DL) const {
7740 MakeLibCallOptions CallOptions;
7741 std::pair<SDValue, SDValue> CallResult =
7742 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7743 {Start, End, Flags}, CallOptions, DL, InChain);
7744
7745 // This function returns void so only the out chain matters.
7746 return CallResult.second;
7747}
7748
7749SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7750 SelectionDAG &DAG) const {
7751 if (!Subtarget.is64Bit())
7752 llvm::report_fatal_error("Trampolines only implemented for RV64");
7753
7754 // Create an MCCodeEmitter to encode instructions.
7756 assert(TLO);
7757 MCContext &MCCtx = TLO->getContext();
7758
7759 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7760 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7761
7762 SDValue Root = Op.getOperand(0);
7763 SDValue Trmp = Op.getOperand(1); // trampoline
7764 SDLoc dl(Op);
7765
7766 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7767
7768 // We store in the trampoline buffer the following instructions and data.
7769 // Offset:
7770 // 0: auipc t2, 0
7771 // 4: ld t0, 24(t2)
7772 // 8: ld t2, 16(t2)
7773 // 12: jalr t0
7774 // 16: <StaticChainOffset>
7775 // 24: <FunctionAddressOffset>
7776 // 32:
7777
7778 constexpr unsigned StaticChainOffset = 16;
7779 constexpr unsigned FunctionAddressOffset = 24;
7780
7782 assert(STI);
7783 auto GetEncoding = [&](const MCInst &MC) {
7786 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7787 uint32_t Encoding = support::endian::read32le(CB.data());
7788 return Encoding;
7789 };
7790
7791 SDValue OutChains[6];
7792
7793 uint32_t Encodings[] = {
7794 // auipc t2, 0
7795 // Loads the current PC into t2.
7796 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7797 // ld t0, 24(t2)
7798 // Loads the function address into t0. Note that we are using offsets
7799 // pc-relative to the first instruction of the trampoline.
7800 GetEncoding(
7801 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7802 FunctionAddressOffset)),
7803 // ld t2, 16(t2)
7804 // Load the value of the static chain.
7805 GetEncoding(
7806 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7807 StaticChainOffset)),
7808 // jalr t0
7809 // Jump to the function.
7810 GetEncoding(MCInstBuilder(RISCV::JALR)
7811 .addReg(RISCV::X0)
7812 .addReg(RISCV::X5)
7813 .addImm(0))};
7814
7815 // Store encoded instructions.
7816 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7817 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7818 DAG.getConstant(Idx * 4, dl, MVT::i64))
7819 : Trmp;
7820 OutChains[Idx] = DAG.getTruncStore(
7821 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7822 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7823 }
7824
7825 // Now store the variable part of the trampoline.
7826 SDValue FunctionAddress = Op.getOperand(2);
7827 SDValue StaticChain = Op.getOperand(3);
7828
7829 // Store the given static chain and function pointer in the trampoline buffer.
7830 struct OffsetValuePair {
7831 const unsigned Offset;
7832 const SDValue Value;
7833 SDValue Addr = SDValue(); // Used to cache the address.
7834 } OffsetValues[] = {
7835 {StaticChainOffset, StaticChain},
7836 {FunctionAddressOffset, FunctionAddress},
7837 };
7838 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7839 SDValue Addr =
7840 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7841 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7842 OffsetValue.Addr = Addr;
7843 OutChains[Idx + 4] =
7844 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7845 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7846 }
7847
7848 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7849
7850 // The end of instructions of trampoline is the same as the static chain
7851 // address that we computed earlier.
7852 SDValue EndOfTrmp = OffsetValues[0].Addr;
7853
7854 // Call clear cache on the trampoline instructions.
7855 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7856 Trmp, EndOfTrmp);
7857
7858 return Chain;
7859}
7860
7861SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7862 SelectionDAG &DAG) const {
7863 if (!Subtarget.is64Bit())
7864 llvm::report_fatal_error("Trampolines only implemented for RV64");
7865
7866 return Op.getOperand(0);
7867}
7868
7870 SelectionDAG &DAG, unsigned Flags) {
7871 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7872}
7873
7875 SelectionDAG &DAG, unsigned Flags) {
7876 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7877 Flags);
7878}
7879
7881 SelectionDAG &DAG, unsigned Flags) {
7882 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7883 N->getOffset(), Flags);
7884}
7885
7887 SelectionDAG &DAG, unsigned Flags) {
7888 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7889}
7890
7892 EVT Ty, SelectionDAG &DAG) {
7894 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7895 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7896 return DAG.getLoad(
7897 Ty, DL, DAG.getEntryNode(), LC,
7899}
7900
7902 EVT Ty, SelectionDAG &DAG) {
7904 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7905 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7906 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7907 return DAG.getLoad(
7908 Ty, DL, DAG.getEntryNode(), LC,
7910}
7911
7912template <class NodeTy>
7913SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7914 bool IsLocal, bool IsExternWeak) const {
7915 SDLoc DL(N);
7916 EVT Ty = getPointerTy(DAG.getDataLayout());
7917
7918 // When HWASAN is used and tagging of global variables is enabled
7919 // they should be accessed via the GOT, since the tagged address of a global
7920 // is incompatible with existing code models. This also applies to non-pic
7921 // mode.
7922 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7923 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7924 if (IsLocal && !Subtarget.allowTaggedGlobals())
7925 // Use PC-relative addressing to access the symbol. This generates the
7926 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7927 // %pcrel_lo(auipc)).
7928 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7929
7930 // Use PC-relative addressing to access the GOT for this symbol, then load
7931 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7932 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7933 SDValue Load =
7934 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7940 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7941 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7942 return Load;
7943 }
7944
7945 switch (getTargetMachine().getCodeModel()) {
7946 default:
7947 report_fatal_error("Unsupported code model for lowering");
7948 case CodeModel::Small: {
7949 // Generate a sequence for accessing addresses within the first 2 GiB of
7950 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7951 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7952 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7953 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7954 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7955 }
7956 case CodeModel::Medium: {
7957 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7958 if (IsExternWeak) {
7959 // An extern weak symbol may be undefined, i.e. have value 0, which may
7960 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7961 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7962 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7963 SDValue Load =
7964 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7970 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7971 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7972 return Load;
7973 }
7974
7975 // Generate a sequence for accessing addresses within any 2GiB range within
7976 // the address space. This generates the pattern (PseudoLLA sym), which
7977 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7978 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7979 }
7980 case CodeModel::Large: {
7981 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
7982 return getLargeGlobalAddress(G, DL, Ty, DAG);
7983
7984 // Using pc-relative mode for other node type.
7985 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7986 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7987 }
7988 }
7989}
7990
7991SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7994 assert(N->getOffset() == 0 && "unexpected offset in global node");
7995 const GlobalValue *GV = N->getGlobal();
7996 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7997}
7998
7999SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8000 SelectionDAG &DAG) const {
8001 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8002
8003 return getAddr(N, DAG);
8004}
8005
8006SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8007 SelectionDAG &DAG) const {
8008 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8009
8010 return getAddr(N, DAG);
8011}
8012
8013SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8014 SelectionDAG &DAG) const {
8015 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8016
8017 return getAddr(N, DAG);
8018}
8019
8020SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8021 SelectionDAG &DAG,
8022 bool UseGOT) const {
8023 SDLoc DL(N);
8024 EVT Ty = getPointerTy(DAG.getDataLayout());
8025 const GlobalValue *GV = N->getGlobal();
8026 MVT XLenVT = Subtarget.getXLenVT();
8027
8028 if (UseGOT) {
8029 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8030 // load the address from the GOT and add the thread pointer. This generates
8031 // the pattern (PseudoLA_TLS_IE sym), which expands to
8032 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8033 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8034 SDValue Load =
8035 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8041 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8042 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8043
8044 // Add the thread pointer.
8045 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8046 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8047 }
8048
8049 // Generate a sequence for accessing the address relative to the thread
8050 // pointer, with the appropriate adjustment for the thread pointer offset.
8051 // This generates the pattern
8052 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8053 SDValue AddrHi =
8055 SDValue AddrAdd =
8057 SDValue AddrLo =
8059
8060 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8061 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8062 SDValue MNAdd =
8063 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8064 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8065}
8066
8067SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8068 SelectionDAG &DAG) const {
8069 SDLoc DL(N);
8070 EVT Ty = getPointerTy(DAG.getDataLayout());
8071 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8072 const GlobalValue *GV = N->getGlobal();
8073
8074 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8075 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8076 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8077 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8078 SDValue Load =
8079 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8080
8081 // Prepare argument list to generate call.
8083 ArgListEntry Entry;
8084 Entry.Node = Load;
8085 Entry.Ty = CallTy;
8086 Args.push_back(Entry);
8087
8088 // Setup call to __tls_get_addr.
8090 CLI.setDebugLoc(DL)
8091 .setChain(DAG.getEntryNode())
8092 .setLibCallee(CallingConv::C, CallTy,
8093 DAG.getExternalSymbol("__tls_get_addr", Ty),
8094 std::move(Args));
8095
8096 return LowerCallTo(CLI).first;
8097}
8098
8099SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8100 SelectionDAG &DAG) const {
8101 SDLoc DL(N);
8102 EVT Ty = getPointerTy(DAG.getDataLayout());
8103 const GlobalValue *GV = N->getGlobal();
8104
8105 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8106 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8107 //
8108 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8109 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8110 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8111 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8112 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8113 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8114}
8115
8116SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8117 SelectionDAG &DAG) const {
8118 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8119 assert(N->getOffset() == 0 && "unexpected offset in global node");
8120
8121 if (DAG.getTarget().useEmulatedTLS())
8122 return LowerToTLSEmulatedModel(N, DAG);
8123
8125
8128 report_fatal_error("In GHC calling convention TLS is not supported");
8129
8130 SDValue Addr;
8131 switch (Model) {
8133 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8134 break;
8136 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8137 break;
8140 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8141 : getDynamicTLSAddr(N, DAG);
8142 break;
8143 }
8144
8145 return Addr;
8146}
8147
8148// Return true if Val is equal to (setcc LHS, RHS, CC).
8149// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8150// Otherwise, return std::nullopt.
8151static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8152 ISD::CondCode CC, SDValue Val) {
8153 assert(Val->getOpcode() == ISD::SETCC);
8154 SDValue LHS2 = Val.getOperand(0);
8155 SDValue RHS2 = Val.getOperand(1);
8156 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8157
8158 if (LHS == LHS2 && RHS == RHS2) {
8159 if (CC == CC2)
8160 return true;
8161 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8162 return false;
8163 } else if (LHS == RHS2 && RHS == LHS2) {
8165 if (CC == CC2)
8166 return true;
8167 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8168 return false;
8169 }
8170
8171 return std::nullopt;
8172}
8173
8175 const RISCVSubtarget &Subtarget) {
8176 SDValue CondV = N->getOperand(0);
8177 SDValue TrueV = N->getOperand(1);
8178 SDValue FalseV = N->getOperand(2);
8179 MVT VT = N->getSimpleValueType(0);
8180 SDLoc DL(N);
8181
8182 if (!Subtarget.hasConditionalMoveFusion()) {
8183 // (select c, -1, y) -> -c | y
8184 if (isAllOnesConstant(TrueV)) {
8185 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8186 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8187 }
8188 // (select c, y, -1) -> (c-1) | y
8189 if (isAllOnesConstant(FalseV)) {
8190 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8191 DAG.getAllOnesConstant(DL, VT));
8192 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8193 }
8194
8195 // (select c, 0, y) -> (c-1) & y
8196 if (isNullConstant(TrueV)) {
8197 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8198 DAG.getAllOnesConstant(DL, VT));
8199 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8200 }
8201 // (select c, y, 0) -> -c & y
8202 if (isNullConstant(FalseV)) {
8203 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8204 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8205 }
8206 }
8207
8208 // select c, ~x, x --> xor -c, x
8209 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8210 const APInt &TrueVal = TrueV->getAsAPIntVal();
8211 const APInt &FalseVal = FalseV->getAsAPIntVal();
8212 if (~TrueVal == FalseVal) {
8213 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8214 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8215 }
8216 }
8217
8218 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8219 // when both truev and falsev are also setcc.
8220 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8221 FalseV.getOpcode() == ISD::SETCC) {
8222 SDValue LHS = CondV.getOperand(0);
8223 SDValue RHS = CondV.getOperand(1);
8224 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8225
8226 // (select x, x, y) -> x | y
8227 // (select !x, x, y) -> x & y
8228 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8229 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8230 DAG.getFreeze(FalseV));
8231 }
8232 // (select x, y, x) -> x & y
8233 // (select !x, y, x) -> x | y
8234 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8235 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8236 DAG.getFreeze(TrueV), FalseV);
8237 }
8238 }
8239
8240 return SDValue();
8241}
8242
8243// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8244// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8245// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8246// being `0` or `-1`. In such cases we can replace `select` with `and`.
8247// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8248// than `c0`?
8249static SDValue
8251 const RISCVSubtarget &Subtarget) {
8252 if (Subtarget.hasShortForwardBranchOpt())
8253 return SDValue();
8254
8255 unsigned SelOpNo = 0;
8256 SDValue Sel = BO->getOperand(0);
8257 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8258 SelOpNo = 1;
8259 Sel = BO->getOperand(1);
8260 }
8261
8262 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8263 return SDValue();
8264
8265 unsigned ConstSelOpNo = 1;
8266 unsigned OtherSelOpNo = 2;
8267 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8268 ConstSelOpNo = 2;
8269 OtherSelOpNo = 1;
8270 }
8271 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8272 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8273 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8274 return SDValue();
8275
8276 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8277 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8278 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8279 return SDValue();
8280
8281 SDLoc DL(Sel);
8282 EVT VT = BO->getValueType(0);
8283
8284 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8285 if (SelOpNo == 1)
8286 std::swap(NewConstOps[0], NewConstOps[1]);
8287
8288 SDValue NewConstOp =
8289 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8290 if (!NewConstOp)
8291 return SDValue();
8292
8293 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8294 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8295 return SDValue();
8296
8297 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8298 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8299 if (SelOpNo == 1)
8300 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8301 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8302
8303 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8304 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8305 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8306}
8307
8308SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8309 SDValue CondV = Op.getOperand(0);
8310 SDValue TrueV = Op.getOperand(1);
8311 SDValue FalseV = Op.getOperand(2);
8312 SDLoc DL(Op);
8313 MVT VT = Op.getSimpleValueType();
8314 MVT XLenVT = Subtarget.getXLenVT();
8315
8316 // Lower vector SELECTs to VSELECTs by splatting the condition.
8317 if (VT.isVector()) {
8318 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8319 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8320 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8321 }
8322
8323 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8324 // nodes to implement the SELECT. Performing the lowering here allows for
8325 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8326 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8327 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8328 VT.isScalarInteger()) {
8329 // (select c, t, 0) -> (czero_eqz t, c)
8330 if (isNullConstant(FalseV))
8331 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8332 // (select c, 0, f) -> (czero_nez f, c)
8333 if (isNullConstant(TrueV))
8334 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8335
8336 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8337 if (TrueV.getOpcode() == ISD::AND &&
8338 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8339 return DAG.getNode(
8340 ISD::OR, DL, VT, TrueV,
8341 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8342 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8343 if (FalseV.getOpcode() == ISD::AND &&
8344 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8345 return DAG.getNode(
8346 ISD::OR, DL, VT, FalseV,
8347 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8348
8349 // Try some other optimizations before falling back to generic lowering.
8350 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8351 return V;
8352
8353 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8354 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8355 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8356 const APInt &TrueVal = TrueV->getAsAPIntVal();
8357 const APInt &FalseVal = FalseV->getAsAPIntVal();
8358 const int TrueValCost = RISCVMatInt::getIntMatCost(
8359 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8360 const int FalseValCost = RISCVMatInt::getIntMatCost(
8361 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8362 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8363 SDValue LHSVal = DAG.getConstant(
8364 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8365 SDValue RHSVal =
8366 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8367 SDValue CMOV =
8369 DL, VT, LHSVal, CondV);
8370 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8371 }
8372
8373 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8374 // Unless we have the short forward branch optimization.
8375 if (!Subtarget.hasConditionalMoveFusion())
8376 return DAG.getNode(
8377 ISD::OR, DL, VT,
8378 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8379 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8380 }
8381
8382 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8383 return V;
8384
8385 if (Op.hasOneUse()) {
8386 unsigned UseOpc = Op->user_begin()->getOpcode();
8387 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8388 SDNode *BinOp = *Op->user_begin();
8389 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8390 DAG, Subtarget)) {
8391 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8392 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8393 // may return a constant node and cause crash in lowerSELECT.
8394 if (NewSel.getOpcode() == ISD::SELECT)
8395 return lowerSELECT(NewSel, DAG);
8396 return NewSel;
8397 }
8398 }
8399 }
8400
8401 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8402 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8403 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8404 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8405 if (FPTV && FPFV) {
8406 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8407 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8408 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8409 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8410 DAG.getConstant(1, DL, XLenVT));
8411 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8412 }
8413 }
8414
8415 // If the condition is not an integer SETCC which operates on XLenVT, we need
8416 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8417 // (select condv, truev, falsev)
8418 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8419 if (CondV.getOpcode() != ISD::SETCC ||
8420 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8421 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8422 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8423
8424 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8425
8426 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8427 }
8428
8429 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8430 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8431 // advantage of the integer compare+branch instructions. i.e.:
8432 // (select (setcc lhs, rhs, cc), truev, falsev)
8433 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8434 SDValue LHS = CondV.getOperand(0);
8435 SDValue RHS = CondV.getOperand(1);
8436 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8437
8438 // Special case for a select of 2 constants that have a diffence of 1.
8439 // Normally this is done by DAGCombine, but if the select is introduced by
8440 // type legalization or op legalization, we miss it. Restricting to SETLT
8441 // case for now because that is what signed saturating add/sub need.
8442 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8443 // but we would probably want to swap the true/false values if the condition
8444 // is SETGE/SETLE to avoid an XORI.
8445 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8446 CCVal == ISD::SETLT) {
8447 const APInt &TrueVal = TrueV->getAsAPIntVal();
8448 const APInt &FalseVal = FalseV->getAsAPIntVal();
8449 if (TrueVal - 1 == FalseVal)
8450 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8451 if (TrueVal + 1 == FalseVal)
8452 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8453 }
8454
8455 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8456 // 1 < x ? x : 1 -> 0 < x ? x : 1
8457 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8458 RHS == TrueV && LHS == FalseV) {
8459 LHS = DAG.getConstant(0, DL, VT);
8460 // 0 <u x is the same as x != 0.
8461 if (CCVal == ISD::SETULT) {
8462 std::swap(LHS, RHS);
8463 CCVal = ISD::SETNE;
8464 }
8465 }
8466
8467 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8468 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8469 RHS == FalseV) {
8470 RHS = DAG.getConstant(0, DL, VT);
8471 }
8472
8473 SDValue TargetCC = DAG.getCondCode(CCVal);
8474
8475 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8476 // (select (setcc lhs, rhs, CC), constant, falsev)
8477 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8478 std::swap(TrueV, FalseV);
8479 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8480 }
8481
8482 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8483 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8484}
8485
8486SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8487 SDValue CondV = Op.getOperand(1);
8488 SDLoc DL(Op);
8489 MVT XLenVT = Subtarget.getXLenVT();
8490
8491 if (CondV.getOpcode() == ISD::SETCC &&
8492 CondV.getOperand(0).getValueType() == XLenVT) {
8493 SDValue LHS = CondV.getOperand(0);
8494 SDValue RHS = CondV.getOperand(1);
8495 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8496
8497 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8498
8499 SDValue TargetCC = DAG.getCondCode(CCVal);
8500 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8501 LHS, RHS, TargetCC, Op.getOperand(2));
8502 }
8503
8504 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8505 CondV, DAG.getConstant(0, DL, XLenVT),
8506 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8507}
8508
8509SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8512
8513 SDLoc DL(Op);
8514 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8516
8517 // vastart just stores the address of the VarArgsFrameIndex slot into the
8518 // memory location argument.
8519 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8520 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8521 MachinePointerInfo(SV));
8522}
8523
8524SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8525 SelectionDAG &DAG) const {
8526 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8528 MachineFrameInfo &MFI = MF.getFrameInfo();
8529 MFI.setFrameAddressIsTaken(true);
8530 Register FrameReg = RI.getFrameRegister(MF);
8531 int XLenInBytes = Subtarget.getXLen() / 8;
8532
8533 EVT VT = Op.getValueType();
8534 SDLoc DL(Op);
8535 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8536 unsigned Depth = Op.getConstantOperandVal(0);
8537 while (Depth--) {
8538 int Offset = -(XLenInBytes * 2);
8539 SDValue Ptr = DAG.getNode(
8540 ISD::ADD, DL, VT, FrameAddr,
8542 FrameAddr =
8543 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8544 }
8545 return FrameAddr;
8546}
8547
8548SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8549 SelectionDAG &DAG) const {
8550 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8552 MachineFrameInfo &MFI = MF.getFrameInfo();
8553 MFI.setReturnAddressIsTaken(true);
8554 MVT XLenVT = Subtarget.getXLenVT();
8555 int XLenInBytes = Subtarget.getXLen() / 8;
8556
8558 return SDValue();
8559
8560 EVT VT = Op.getValueType();
8561 SDLoc DL(Op);
8562 unsigned Depth = Op.getConstantOperandVal(0);
8563 if (Depth) {
8564 int Off = -XLenInBytes;
8565 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8566 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8567 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8568 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8570 }
8571
8572 // Return the value of the return address register, marking it an implicit
8573 // live-in.
8574 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8575 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8576}
8577
8578SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8579 SelectionDAG &DAG) const {
8580 SDLoc DL(Op);
8581 SDValue Lo = Op.getOperand(0);
8582 SDValue Hi = Op.getOperand(1);
8583 SDValue Shamt = Op.getOperand(2);
8584 EVT VT = Lo.getValueType();
8585
8586 // if Shamt-XLEN < 0: // Shamt < XLEN
8587 // Lo = Lo << Shamt
8588 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8589 // else:
8590 // Lo = 0
8591 // Hi = Lo << (Shamt-XLEN)
8592
8593 SDValue Zero = DAG.getConstant(0, DL, VT);
8594 SDValue One = DAG.getConstant(1, DL, VT);
8595 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8596 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8597 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8598 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8599
8600 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8601 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8602 SDValue ShiftRightLo =
8603 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8604 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8605 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8606 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8607
8608 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8609
8610 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8611 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8612
8613 SDValue Parts[2] = {Lo, Hi};
8614 return DAG.getMergeValues(Parts, DL);
8615}
8616
8617SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8618 bool IsSRA) const {
8619 SDLoc DL(Op);
8620 SDValue Lo = Op.getOperand(0);
8621 SDValue Hi = Op.getOperand(1);
8622 SDValue Shamt = Op.getOperand(2);
8623 EVT VT = Lo.getValueType();
8624
8625 // SRA expansion:
8626 // if Shamt-XLEN < 0: // Shamt < XLEN
8627 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8628 // Hi = Hi >>s Shamt
8629 // else:
8630 // Lo = Hi >>s (Shamt-XLEN);
8631 // Hi = Hi >>s (XLEN-1)
8632 //
8633 // SRL expansion:
8634 // if Shamt-XLEN < 0: // Shamt < XLEN
8635 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8636 // Hi = Hi >>u Shamt
8637 // else:
8638 // Lo = Hi >>u (Shamt-XLEN);
8639 // Hi = 0;
8640
8641 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8642
8643 SDValue Zero = DAG.getConstant(0, DL, VT);
8644 SDValue One = DAG.getConstant(1, DL, VT);
8645 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8646 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8647 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8648 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8649
8650 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8651 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8652 SDValue ShiftLeftHi =
8653 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8654 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8655 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8656 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8657 SDValue HiFalse =
8658 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8659
8660 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8661
8662 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8663 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8664
8665 SDValue Parts[2] = {Lo, Hi};
8666 return DAG.getMergeValues(Parts, DL);
8667}
8668
8669// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8670// legal equivalently-sized i8 type, so we can use that as a go-between.
8671SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8672 SelectionDAG &DAG) const {
8673 SDLoc DL(Op);
8674 MVT VT = Op.getSimpleValueType();
8675 SDValue SplatVal = Op.getOperand(0);
8676 // All-zeros or all-ones splats are handled specially.
8677 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8678 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8679 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8680 }
8681 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8682 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8683 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8684 }
8685 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8686 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8687 DAG.getConstant(1, DL, SplatVal.getValueType()));
8688 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8689 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8690 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8691}
8692
8693// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8694// illegal (currently only vXi64 RV32).
8695// FIXME: We could also catch non-constant sign-extended i32 values and lower
8696// them to VMV_V_X_VL.
8697SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8698 SelectionDAG &DAG) const {
8699 SDLoc DL(Op);
8700 MVT VecVT = Op.getSimpleValueType();
8701 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8702 "Unexpected SPLAT_VECTOR_PARTS lowering");
8703
8704 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8705 SDValue Lo = Op.getOperand(0);
8706 SDValue Hi = Op.getOperand(1);
8707
8708 MVT ContainerVT = VecVT;
8709 if (VecVT.isFixedLengthVector())
8710 ContainerVT = getContainerForFixedLengthVector(VecVT);
8711
8712 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8713
8714 SDValue Res =
8715 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8716
8717 if (VecVT.isFixedLengthVector())
8718 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8719
8720 return Res;
8721}
8722
8723// Custom-lower extensions from mask vectors by using a vselect either with 1
8724// for zero/any-extension or -1 for sign-extension:
8725// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8726// Note that any-extension is lowered identically to zero-extension.
8727SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8728 int64_t ExtTrueVal) const {
8729 SDLoc DL(Op);
8730 MVT VecVT = Op.getSimpleValueType();
8731 SDValue Src = Op.getOperand(0);
8732 // Only custom-lower extensions from mask types
8733 assert(Src.getValueType().isVector() &&
8734 Src.getValueType().getVectorElementType() == MVT::i1);
8735
8736 if (VecVT.isScalableVector()) {
8737 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8738 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8739 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8740 }
8741
8742 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8743 MVT I1ContainerVT =
8744 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8745
8746 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8747
8748 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8749
8750 MVT XLenVT = Subtarget.getXLenVT();
8751 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8752 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8753
8754 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8755 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8756 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8757 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8758 SDValue Select =
8759 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8760 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8761
8762 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8763}
8764
8765SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8766 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8767 MVT ExtVT = Op.getSimpleValueType();
8768 // Only custom-lower extensions from fixed-length vector types.
8769 if (!ExtVT.isFixedLengthVector())
8770 return Op;
8771 MVT VT = Op.getOperand(0).getSimpleValueType();
8772 // Grab the canonical container type for the extended type. Infer the smaller
8773 // type from that to ensure the same number of vector elements, as we know
8774 // the LMUL will be sufficient to hold the smaller type.
8775 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8776 // Get the extended container type manually to ensure the same number of
8777 // vector elements between source and dest.
8778 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8779 ContainerExtVT.getVectorElementCount());
8780
8781 SDValue Op1 =
8782 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8783
8784 SDLoc DL(Op);
8785 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8786
8787 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8788
8789 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8790}
8791
8792// Custom-lower truncations from vectors to mask vectors by using a mask and a
8793// setcc operation:
8794// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8795SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8796 SelectionDAG &DAG) const {
8797 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8798 SDLoc DL(Op);
8799 EVT MaskVT = Op.getValueType();
8800 // Only expect to custom-lower truncations to mask types
8801 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8802 "Unexpected type for vector mask lowering");
8803 SDValue Src = Op.getOperand(0);
8804 MVT VecVT = Src.getSimpleValueType();
8805 SDValue Mask, VL;
8806 if (IsVPTrunc) {
8807 Mask = Op.getOperand(1);
8808 VL = Op.getOperand(2);
8809 }
8810 // If this is a fixed vector, we need to convert it to a scalable vector.
8811 MVT ContainerVT = VecVT;
8812
8813 if (VecVT.isFixedLengthVector()) {
8814 ContainerVT = getContainerForFixedLengthVector(VecVT);
8815 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8816 if (IsVPTrunc) {
8817 MVT MaskContainerVT =
8818 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8819 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8820 }
8821 }
8822
8823 if (!IsVPTrunc) {
8824 std::tie(Mask, VL) =
8825 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8826 }
8827
8828 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8829 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8830
8831 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8832 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8833 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8834 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8835
8836 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8837 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8838 DAG.getUNDEF(ContainerVT), Mask, VL);
8839 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8840 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8841 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8842 if (MaskVT.isFixedLengthVector())
8843 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8844 return Trunc;
8845}
8846
8847SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8848 SelectionDAG &DAG) const {
8849 unsigned Opc = Op.getOpcode();
8850 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8851 SDLoc DL(Op);
8852
8853 MVT VT = Op.getSimpleValueType();
8854 // Only custom-lower vector truncates
8855 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8856
8857 // Truncates to mask types are handled differently
8858 if (VT.getVectorElementType() == MVT::i1)
8859 return lowerVectorMaskTruncLike(Op, DAG);
8860
8861 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8862 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8863 // truncate by one power of two at a time.
8864 MVT DstEltVT = VT.getVectorElementType();
8865
8866 SDValue Src = Op.getOperand(0);
8867 MVT SrcVT = Src.getSimpleValueType();
8868 MVT SrcEltVT = SrcVT.getVectorElementType();
8869
8870 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8871 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8872 "Unexpected vector truncate lowering");
8873
8874 MVT ContainerVT = SrcVT;
8875 SDValue Mask, VL;
8876 if (IsVPTrunc) {
8877 Mask = Op.getOperand(1);
8878 VL = Op.getOperand(2);
8879 }
8880 if (SrcVT.isFixedLengthVector()) {
8881 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8882 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8883 if (IsVPTrunc) {
8884 MVT MaskVT = getMaskTypeFor(ContainerVT);
8885 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8886 }
8887 }
8888
8889 SDValue Result = Src;
8890 if (!IsVPTrunc) {
8891 std::tie(Mask, VL) =
8892 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8893 }
8894
8895 unsigned NewOpc;
8896 if (Opc == ISD::TRUNCATE_SSAT_S)
8898 else if (Opc == ISD::TRUNCATE_USAT_U)
8900 else
8902
8903 do {
8904 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8905 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8906 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8907 } while (SrcEltVT != DstEltVT);
8908
8909 if (SrcVT.isFixedLengthVector())
8910 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8911
8912 return Result;
8913}
8914
8915SDValue
8916RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8917 SelectionDAG &DAG) const {
8918 SDLoc DL(Op);
8919 SDValue Chain = Op.getOperand(0);
8920 SDValue Src = Op.getOperand(1);
8921 MVT VT = Op.getSimpleValueType();
8922 MVT SrcVT = Src.getSimpleValueType();
8923 MVT ContainerVT = VT;
8924 if (VT.isFixedLengthVector()) {
8925 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8926 ContainerVT =
8927 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8928 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8929 }
8930
8931 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8932
8933 // RVV can only widen/truncate fp to types double/half the size as the source.
8934 if ((VT.getVectorElementType() == MVT::f64 &&
8935 (SrcVT.getVectorElementType() == MVT::f16 ||
8936 SrcVT.getVectorElementType() == MVT::bf16)) ||
8937 ((VT.getVectorElementType() == MVT::f16 ||
8938 VT.getVectorElementType() == MVT::bf16) &&
8939 SrcVT.getVectorElementType() == MVT::f64)) {
8940 // For double rounding, the intermediate rounding should be round-to-odd.
8941 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8944 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8945 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8946 Chain, Src, Mask, VL);
8947 Chain = Src.getValue(1);
8948 }
8949
8950 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8953 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8954 Chain, Src, Mask, VL);
8955 if (VT.isFixedLengthVector()) {
8956 // StrictFP operations have two result values. Their lowered result should
8957 // have same result count.
8958 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8959 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8960 }
8961 return Res;
8962}
8963
8964SDValue
8965RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8966 SelectionDAG &DAG) const {
8967 bool IsVP =
8968 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8969 bool IsExtend =
8970 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8971 // RVV can only do truncate fp to types half the size as the source. We
8972 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8973 // conversion instruction.
8974 SDLoc DL(Op);
8975 MVT VT = Op.getSimpleValueType();
8976
8977 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8978
8979 SDValue Src = Op.getOperand(0);
8980 MVT SrcVT = Src.getSimpleValueType();
8981
8982 bool IsDirectExtend =
8983 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8984 (SrcVT.getVectorElementType() != MVT::f16 &&
8985 SrcVT.getVectorElementType() != MVT::bf16));
8986 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8987 VT.getVectorElementType() != MVT::bf16) ||
8988 SrcVT.getVectorElementType() != MVT::f64);
8989
8990 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8991
8992 // Prepare any fixed-length vector operands.
8993 MVT ContainerVT = VT;
8994 SDValue Mask, VL;
8995 if (IsVP) {
8996 Mask = Op.getOperand(1);
8997 VL = Op.getOperand(2);
8998 }
8999 if (VT.isFixedLengthVector()) {
9000 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9001 ContainerVT =
9002 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9003 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9004 if (IsVP) {
9005 MVT MaskVT = getMaskTypeFor(ContainerVT);
9006 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9007 }
9008 }
9009
9010 if (!IsVP)
9011 std::tie(Mask, VL) =
9012 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9013
9014 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9015
9016 if (IsDirectConv) {
9017 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9018 if (VT.isFixedLengthVector())
9019 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9020 return Src;
9021 }
9022
9023 unsigned InterConvOpc =
9025
9026 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9027 SDValue IntermediateConv =
9028 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9029 SDValue Result =
9030 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9031 if (VT.isFixedLengthVector())
9032 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9033 return Result;
9034}
9035
9036// Given a scalable vector type and an index into it, returns the type for the
9037// smallest subvector that the index fits in. This can be used to reduce LMUL
9038// for operations like vslidedown.
9039//
9040// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9041static std::optional<MVT>
9042getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9043 const RISCVSubtarget &Subtarget) {
9044 assert(VecVT.isScalableVector());
9045 const unsigned EltSize = VecVT.getScalarSizeInBits();
9046 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9047 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9048 MVT SmallerVT;
9049 if (MaxIdx < MinVLMAX)
9050 SmallerVT = getLMUL1VT(VecVT);
9051 else if (MaxIdx < MinVLMAX * 2)
9052 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9053 else if (MaxIdx < MinVLMAX * 4)
9054 SmallerVT = getLMUL1VT(VecVT)
9057 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9058 return std::nullopt;
9059 return SmallerVT;
9060}
9061
9062// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9063// first position of a vector, and that vector is slid up to the insert index.
9064// By limiting the active vector length to index+1 and merging with the
9065// original vector (with an undisturbed tail policy for elements >= VL), we
9066// achieve the desired result of leaving all elements untouched except the one
9067// at VL-1, which is replaced with the desired value.
9068SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9069 SelectionDAG &DAG) const {
9070 SDLoc DL(Op);
9071 MVT VecVT = Op.getSimpleValueType();
9072 MVT XLenVT = Subtarget.getXLenVT();
9073 SDValue Vec = Op.getOperand(0);
9074 SDValue Val = Op.getOperand(1);
9075 MVT ValVT = Val.getSimpleValueType();
9076 SDValue Idx = Op.getOperand(2);
9077
9078 if (VecVT.getVectorElementType() == MVT::i1) {
9079 // FIXME: For now we just promote to an i8 vector and insert into that,
9080 // but this is probably not optimal.
9081 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9082 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9083 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9084 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9085 }
9086
9087 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9088 ValVT == MVT::bf16) {
9089 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9090 MVT IntVT = VecVT.changeTypeToInteger();
9091 SDValue IntInsert = DAG.getNode(
9092 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9093 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9094 return DAG.getBitcast(VecVT, IntInsert);
9095 }
9096
9097 MVT ContainerVT = VecVT;
9098 // If the operand is a fixed-length vector, convert to a scalable one.
9099 if (VecVT.isFixedLengthVector()) {
9100 ContainerVT = getContainerForFixedLengthVector(VecVT);
9101 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9102 }
9103
9104 // If we know the index we're going to insert at, we can shrink Vec so that
9105 // we're performing the scalar inserts and slideup on a smaller LMUL.
9106 MVT OrigContainerVT = ContainerVT;
9107 SDValue OrigVec = Vec;
9108 SDValue AlignedIdx;
9109 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9110 const unsigned OrigIdx = IdxC->getZExtValue();
9111 // Do we know an upper bound on LMUL?
9112 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9113 DL, DAG, Subtarget)) {
9114 ContainerVT = *ShrunkVT;
9115 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9116 }
9117
9118 // If we're compiling for an exact VLEN value, we can always perform
9119 // the insert in m1 as we can determine the register corresponding to
9120 // the index in the register group.
9121 const MVT M1VT = getLMUL1VT(ContainerVT);
9122 if (auto VLEN = Subtarget.getRealVLen();
9123 VLEN && ContainerVT.bitsGT(M1VT)) {
9124 EVT ElemVT = VecVT.getVectorElementType();
9125 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9126 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9127 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9128 unsigned ExtractIdx =
9129 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9130 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9131 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9132 ContainerVT = M1VT;
9133 }
9134
9135 if (AlignedIdx)
9136 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9137 AlignedIdx);
9138 }
9139
9140 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9141 // Even i64-element vectors on RV32 can be lowered without scalar
9142 // legalization if the most-significant 32 bits of the value are not affected
9143 // by the sign-extension of the lower 32 bits.
9144 // TODO: We could also catch sign extensions of a 32-bit value.
9145 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9146 const auto *CVal = cast<ConstantSDNode>(Val);
9147 if (isInt<32>(CVal->getSExtValue())) {
9148 IsLegalInsert = true;
9149 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9150 }
9151 }
9152
9153 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9154
9155 SDValue ValInVec;
9156
9157 if (IsLegalInsert) {
9158 unsigned Opc =
9160 if (isNullConstant(Idx)) {
9161 if (!VecVT.isFloatingPoint())
9162 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9163 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9164
9165 if (AlignedIdx)
9166 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9167 Vec, AlignedIdx);
9168 if (!VecVT.isFixedLengthVector())
9169 return Vec;
9170 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9171 }
9172 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9173 } else {
9174 // On RV32, i64-element vectors must be specially handled to place the
9175 // value at element 0, by using two vslide1down instructions in sequence on
9176 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9177 // this.
9178 SDValue ValLo, ValHi;
9179 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9180 MVT I32ContainerVT =
9181 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9182 SDValue I32Mask =
9183 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9184 // Limit the active VL to two.
9185 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9186 // If the Idx is 0 we can insert directly into the vector.
9187 if (isNullConstant(Idx)) {
9188 // First slide in the lo value, then the hi in above it. We use slide1down
9189 // to avoid the register group overlap constraint of vslide1up.
9190 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9191 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9192 // If the source vector is undef don't pass along the tail elements from
9193 // the previous slide1down.
9194 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9195 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9196 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9197 // Bitcast back to the right container type.
9198 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9199
9200 if (AlignedIdx)
9201 ValInVec =
9202 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9203 ValInVec, AlignedIdx);
9204 if (!VecVT.isFixedLengthVector())
9205 return ValInVec;
9206 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9207 }
9208
9209 // First slide in the lo value, then the hi in above it. We use slide1down
9210 // to avoid the register group overlap constraint of vslide1up.
9211 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9212 DAG.getUNDEF(I32ContainerVT),
9213 DAG.getUNDEF(I32ContainerVT), ValLo,
9214 I32Mask, InsertI64VL);
9215 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9216 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9217 I32Mask, InsertI64VL);
9218 // Bitcast back to the right container type.
9219 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9220 }
9221
9222 // Now that the value is in a vector, slide it into position.
9223 SDValue InsertVL =
9224 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9225
9226 // Use tail agnostic policy if Idx is the last index of Vec.
9228 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9229 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9230 Policy = RISCVII::TAIL_AGNOSTIC;
9231 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9232 Idx, Mask, InsertVL, Policy);
9233
9234 if (AlignedIdx)
9235 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9236 Slideup, AlignedIdx);
9237 if (!VecVT.isFixedLengthVector())
9238 return Slideup;
9239 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9240}
9241
9242// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9243// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9244// types this is done using VMV_X_S to allow us to glean information about the
9245// sign bits of the result.
9246SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9247 SelectionDAG &DAG) const {
9248 SDLoc DL(Op);
9249 SDValue Idx = Op.getOperand(1);
9250 SDValue Vec = Op.getOperand(0);
9251 EVT EltVT = Op.getValueType();
9252 MVT VecVT = Vec.getSimpleValueType();
9253 MVT XLenVT = Subtarget.getXLenVT();
9254
9255 if (VecVT.getVectorElementType() == MVT::i1) {
9256 // Use vfirst.m to extract the first bit.
9257 if (isNullConstant(Idx)) {
9258 MVT ContainerVT = VecVT;
9259 if (VecVT.isFixedLengthVector()) {
9260 ContainerVT = getContainerForFixedLengthVector(VecVT);
9261 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9262 }
9263 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9264 SDValue Vfirst =
9265 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9266 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9267 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9268 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9269 }
9270 if (VecVT.isFixedLengthVector()) {
9271 unsigned NumElts = VecVT.getVectorNumElements();
9272 if (NumElts >= 8) {
9273 MVT WideEltVT;
9274 unsigned WidenVecLen;
9275 SDValue ExtractElementIdx;
9276 SDValue ExtractBitIdx;
9277 unsigned MaxEEW = Subtarget.getELen();
9278 MVT LargestEltVT = MVT::getIntegerVT(
9279 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9280 if (NumElts <= LargestEltVT.getSizeInBits()) {
9281 assert(isPowerOf2_32(NumElts) &&
9282 "the number of elements should be power of 2");
9283 WideEltVT = MVT::getIntegerVT(NumElts);
9284 WidenVecLen = 1;
9285 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9286 ExtractBitIdx = Idx;
9287 } else {
9288 WideEltVT = LargestEltVT;
9289 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9290 // extract element index = index / element width
9291 ExtractElementIdx = DAG.getNode(
9292 ISD::SRL, DL, XLenVT, Idx,
9293 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9294 // mask bit index = index % element width
9295 ExtractBitIdx = DAG.getNode(
9296 ISD::AND, DL, XLenVT, Idx,
9297 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9298 }
9299 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9300 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9301 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9302 Vec, ExtractElementIdx);
9303 // Extract the bit from GPR.
9304 SDValue ShiftRight =
9305 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9306 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9307 DAG.getConstant(1, DL, XLenVT));
9308 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9309 }
9310 }
9311 // Otherwise, promote to an i8 vector and extract from that.
9312 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9313 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9314 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9315 }
9316
9317 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9318 EltVT == MVT::bf16) {
9319 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9320 MVT IntVT = VecVT.changeTypeToInteger();
9321 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9322 SDValue IntExtract =
9323 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9324 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9325 }
9326
9327 // If this is a fixed vector, we need to convert it to a scalable vector.
9328 MVT ContainerVT = VecVT;
9329 if (VecVT.isFixedLengthVector()) {
9330 ContainerVT = getContainerForFixedLengthVector(VecVT);
9331 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9332 }
9333
9334 // If we're compiling for an exact VLEN value and we have a known
9335 // constant index, we can always perform the extract in m1 (or
9336 // smaller) as we can determine the register corresponding to
9337 // the index in the register group.
9338 const auto VLen = Subtarget.getRealVLen();
9339 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9340 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9341 MVT M1VT = getLMUL1VT(ContainerVT);
9342 unsigned OrigIdx = IdxC->getZExtValue();
9343 EVT ElemVT = VecVT.getVectorElementType();
9344 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9345 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9346 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9347 unsigned ExtractIdx =
9348 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9349 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9350 DAG.getVectorIdxConstant(ExtractIdx, DL));
9351 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9352 ContainerVT = M1VT;
9353 }
9354
9355 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9356 // contains our index.
9357 std::optional<uint64_t> MaxIdx;
9358 if (VecVT.isFixedLengthVector())
9359 MaxIdx = VecVT.getVectorNumElements() - 1;
9360 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9361 MaxIdx = IdxC->getZExtValue();
9362 if (MaxIdx) {
9363 if (auto SmallerVT =
9364 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9365 ContainerVT = *SmallerVT;
9366 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9367 DAG.getConstant(0, DL, XLenVT));
9368 }
9369 }
9370
9371 // If after narrowing, the required slide is still greater than LMUL2,
9372 // fallback to generic expansion and go through the stack. This is done
9373 // for a subtle reason: extracting *all* elements out of a vector is
9374 // widely expected to be linear in vector size, but because vslidedown
9375 // is linear in LMUL, performing N extracts using vslidedown becomes
9376 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9377 // seems to have the same problem (the store is linear in LMUL), but the
9378 // generic expansion *memoizes* the store, and thus for many extracts of
9379 // the same vector we end up with one store and a bunch of loads.
9380 // TODO: We don't have the same code for insert_vector_elt because we
9381 // have BUILD_VECTOR and handle the degenerate case there. Should we
9382 // consider adding an inverse BUILD_VECTOR node?
9383 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9384 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9385 return SDValue();
9386
9387 // If the index is 0, the vector is already in the right position.
9388 if (!isNullConstant(Idx)) {
9389 // Use a VL of 1 to avoid processing more elements than we need.
9390 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9391 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9392 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9393 }
9394
9395 if (!EltVT.isInteger()) {
9396 // Floating-point extracts are handled in TableGen.
9397 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9398 DAG.getVectorIdxConstant(0, DL));
9399 }
9400
9401 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9402 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9403}
9404
9405// Some RVV intrinsics may claim that they want an integer operand to be
9406// promoted or expanded.
9408 const RISCVSubtarget &Subtarget) {
9409 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9410 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9411 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9412 "Unexpected opcode");
9413
9414 if (!Subtarget.hasVInstructions())
9415 return SDValue();
9416
9417 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9418 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9419 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9420
9421 SDLoc DL(Op);
9422
9424 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9425 if (!II || !II->hasScalarOperand())
9426 return SDValue();
9427
9428 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9429 assert(SplatOp < Op.getNumOperands());
9430
9432 SDValue &ScalarOp = Operands[SplatOp];
9433 MVT OpVT = ScalarOp.getSimpleValueType();
9434 MVT XLenVT = Subtarget.getXLenVT();
9435
9436 // If this isn't a scalar, or its type is XLenVT we're done.
9437 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9438 return SDValue();
9439
9440 // Simplest case is that the operand needs to be promoted to XLenVT.
9441 if (OpVT.bitsLT(XLenVT)) {
9442 // If the operand is a constant, sign extend to increase our chances
9443 // of being able to use a .vi instruction. ANY_EXTEND would become a
9444 // a zero extend and the simm5 check in isel would fail.
9445 // FIXME: Should we ignore the upper bits in isel instead?
9446 unsigned ExtOpc =
9447 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9448 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9449 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9450 }
9451
9452 // Use the previous operand to get the vXi64 VT. The result might be a mask
9453 // VT for compares. Using the previous operand assumes that the previous
9454 // operand will never have a smaller element size than a scalar operand and
9455 // that a widening operation never uses SEW=64.
9456 // NOTE: If this fails the below assert, we can probably just find the
9457 // element count from any operand or result and use it to construct the VT.
9458 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9459 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9460
9461 // The more complex case is when the scalar is larger than XLenVT.
9462 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9463 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9464
9465 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9466 // instruction to sign-extend since SEW>XLEN.
9467 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9468 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9469 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9470 }
9471
9472 switch (IntNo) {
9473 case Intrinsic::riscv_vslide1up:
9474 case Intrinsic::riscv_vslide1down:
9475 case Intrinsic::riscv_vslide1up_mask:
9476 case Intrinsic::riscv_vslide1down_mask: {
9477 // We need to special case these when the scalar is larger than XLen.
9478 unsigned NumOps = Op.getNumOperands();
9479 bool IsMasked = NumOps == 7;
9480
9481 // Convert the vector source to the equivalent nxvXi32 vector.
9482 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9483 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9484 SDValue ScalarLo, ScalarHi;
9485 std::tie(ScalarLo, ScalarHi) =
9486 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9487
9488 // Double the VL since we halved SEW.
9489 SDValue AVL = getVLOperand(Op);
9490 SDValue I32VL;
9491
9492 // Optimize for constant AVL
9493 if (isa<ConstantSDNode>(AVL)) {
9494 const auto [MinVLMAX, MaxVLMAX] =
9496
9497 uint64_t AVLInt = AVL->getAsZExtVal();
9498 if (AVLInt <= MinVLMAX) {
9499 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9500 } else if (AVLInt >= 2 * MaxVLMAX) {
9501 // Just set vl to VLMAX in this situation
9502 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9503 } else {
9504 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9505 // is related to the hardware implementation.
9506 // So let the following code handle
9507 }
9508 }
9509 if (!I32VL) {
9511 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9512 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9513 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9514 SDValue SETVL =
9515 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9516 // Using vsetvli instruction to get actually used length which related to
9517 // the hardware implementation
9518 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9519 SEW, LMUL);
9520 I32VL =
9521 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9522 }
9523
9524 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9525
9526 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9527 // instructions.
9528 SDValue Passthru;
9529 if (IsMasked)
9530 Passthru = DAG.getUNDEF(I32VT);
9531 else
9532 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9533
9534 if (IntNo == Intrinsic::riscv_vslide1up ||
9535 IntNo == Intrinsic::riscv_vslide1up_mask) {
9536 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9537 ScalarHi, I32Mask, I32VL);
9538 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9539 ScalarLo, I32Mask, I32VL);
9540 } else {
9541 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9542 ScalarLo, I32Mask, I32VL);
9543 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9544 ScalarHi, I32Mask, I32VL);
9545 }
9546
9547 // Convert back to nxvXi64.
9548 Vec = DAG.getBitcast(VT, Vec);
9549
9550 if (!IsMasked)
9551 return Vec;
9552 // Apply mask after the operation.
9553 SDValue Mask = Operands[NumOps - 3];
9554 SDValue MaskedOff = Operands[1];
9555 // Assume Policy operand is the last operand.
9556 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9557 // We don't need to select maskedoff if it's undef.
9558 if (MaskedOff.isUndef())
9559 return Vec;
9560 // TAMU
9561 if (Policy == RISCVII::TAIL_AGNOSTIC)
9562 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9563 DAG.getUNDEF(VT), AVL);
9564 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9565 // It's fine because vmerge does not care mask policy.
9566 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9567 MaskedOff, AVL);
9568 }
9569 }
9570
9571 // We need to convert the scalar to a splat vector.
9572 SDValue VL = getVLOperand(Op);
9573 assert(VL.getValueType() == XLenVT);
9574 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9575 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9576}
9577
9578// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9579// scalable vector llvm.get.vector.length for now.
9580//
9581// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9582// (vscale * VF). The vscale and VF are independent of element width. We use
9583// SEW=8 for the vsetvli because it is the only element width that supports all
9584// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9585// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9586// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9587// SEW and LMUL are better for the surrounding vector instructions.
9589 const RISCVSubtarget &Subtarget) {
9590 MVT XLenVT = Subtarget.getXLenVT();
9591
9592 // The smallest LMUL is only valid for the smallest element width.
9593 const unsigned ElementWidth = 8;
9594
9595 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9596 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9597 // We don't support VF==1 with ELEN==32.
9598 [[maybe_unused]] unsigned MinVF =
9599 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9600
9601 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9602 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9603 "Unexpected VF");
9604
9605 bool Fractional = VF < LMul1VF;
9606 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9607 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9608 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9609
9610 SDLoc DL(N);
9611
9612 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9613 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9614
9615 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9616
9617 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9618 SDValue Res =
9619 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9620 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9621}
9622
9624 const RISCVSubtarget &Subtarget) {
9625 SDValue Op0 = N->getOperand(1);
9626 MVT OpVT = Op0.getSimpleValueType();
9627 MVT ContainerVT = OpVT;
9628 if (OpVT.isFixedLengthVector()) {
9629 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9630 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9631 }
9632 MVT XLenVT = Subtarget.getXLenVT();
9633 SDLoc DL(N);
9634 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9635 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9636 if (isOneConstant(N->getOperand(2)))
9637 return Res;
9638
9639 // Convert -1 to VL.
9640 SDValue Setcc =
9641 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9642 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9643 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9644}
9645
9646static inline void promoteVCIXScalar(const SDValue &Op,
9648 SelectionDAG &DAG) {
9649 const RISCVSubtarget &Subtarget =
9651
9652 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9653 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9654 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9655 SDLoc DL(Op);
9656
9658 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9659 if (!II || !II->hasScalarOperand())
9660 return;
9661
9662 unsigned SplatOp = II->ScalarOperand + 1;
9663 assert(SplatOp < Op.getNumOperands());
9664
9665 SDValue &ScalarOp = Operands[SplatOp];
9666 MVT OpVT = ScalarOp.getSimpleValueType();
9667 MVT XLenVT = Subtarget.getXLenVT();
9668
9669 // The code below is partially copied from lowerVectorIntrinsicScalars.
9670 // If this isn't a scalar, or its type is XLenVT we're done.
9671 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9672 return;
9673
9674 // Manually emit promote operation for scalar operation.
9675 if (OpVT.bitsLT(XLenVT)) {
9676 unsigned ExtOpc =
9677 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9678 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9679 }
9680}
9681
9682static void processVCIXOperands(SDValue &OrigOp,
9684 SelectionDAG &DAG) {
9685 promoteVCIXScalar(OrigOp, Operands, DAG);
9686 const RISCVSubtarget &Subtarget =
9688 for (SDValue &V : Operands) {
9689 EVT ValType = V.getValueType();
9690 if (ValType.isVector() && ValType.isFloatingPoint()) {
9691 MVT InterimIVT =
9692 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9693 ValType.getVectorElementCount());
9694 V = DAG.getBitcast(InterimIVT, V);
9695 }
9696 if (ValType.isFixedLengthVector()) {
9697 MVT OpContainerVT = getContainerForFixedLengthVector(
9698 DAG, V.getSimpleValueType(), Subtarget);
9699 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9700 }
9701 }
9702}
9703
9704// LMUL * VLEN should be greater than or equal to EGS * SEW
9705static inline bool isValidEGW(int EGS, EVT VT,
9706 const RISCVSubtarget &Subtarget) {
9707 return (Subtarget.getRealMinVLen() *
9709 EGS * VT.getScalarSizeInBits();
9710}
9711
9712SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9713 SelectionDAG &DAG) const {
9714 unsigned IntNo = Op.getConstantOperandVal(0);
9715 SDLoc DL(Op);
9716 MVT XLenVT = Subtarget.getXLenVT();
9717
9718 switch (IntNo) {
9719 default:
9720 break; // Don't custom lower most intrinsics.
9721 case Intrinsic::riscv_tuple_insert: {
9722 SDValue Vec = Op.getOperand(1);
9723 SDValue SubVec = Op.getOperand(2);
9724 SDValue Index = Op.getOperand(3);
9725
9726 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9727 SubVec, Index);
9728 }
9729 case Intrinsic::riscv_tuple_extract: {
9730 SDValue Vec = Op.getOperand(1);
9731 SDValue Index = Op.getOperand(2);
9732
9733 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9734 Index);
9735 }
9736 case Intrinsic::thread_pointer: {
9737 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9738 return DAG.getRegister(RISCV::X4, PtrVT);
9739 }
9740 case Intrinsic::riscv_orc_b:
9741 case Intrinsic::riscv_brev8:
9742 case Intrinsic::riscv_sha256sig0:
9743 case Intrinsic::riscv_sha256sig1:
9744 case Intrinsic::riscv_sha256sum0:
9745 case Intrinsic::riscv_sha256sum1:
9746 case Intrinsic::riscv_sm3p0:
9747 case Intrinsic::riscv_sm3p1: {
9748 unsigned Opc;
9749 switch (IntNo) {
9750 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9751 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9752 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9753 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9754 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9755 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9756 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9757 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9758 }
9759
9760 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9761 }
9762 case Intrinsic::riscv_sm4ks:
9763 case Intrinsic::riscv_sm4ed: {
9764 unsigned Opc =
9765 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9766
9767 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9768 Op.getOperand(3));
9769 }
9770 case Intrinsic::riscv_zip:
9771 case Intrinsic::riscv_unzip: {
9772 unsigned Opc =
9773 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9774 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9775 }
9776 case Intrinsic::riscv_mopr:
9777 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9778 Op.getOperand(2));
9779
9780 case Intrinsic::riscv_moprr: {
9781 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9782 Op.getOperand(2), Op.getOperand(3));
9783 }
9784 case Intrinsic::riscv_clmul:
9785 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9786 Op.getOperand(2));
9787 case Intrinsic::riscv_clmulh:
9788 case Intrinsic::riscv_clmulr: {
9789 unsigned Opc =
9790 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9791 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9792 }
9793 case Intrinsic::experimental_get_vector_length:
9794 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9795 case Intrinsic::experimental_cttz_elts:
9796 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9797 case Intrinsic::riscv_vmv_x_s: {
9798 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9799 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9800 }
9801 case Intrinsic::riscv_vfmv_f_s:
9802 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9803 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9804 case Intrinsic::riscv_vmv_v_x:
9805 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9806 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9807 Subtarget);
9808 case Intrinsic::riscv_vfmv_v_f:
9809 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9810 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9811 case Intrinsic::riscv_vmv_s_x: {
9812 SDValue Scalar = Op.getOperand(2);
9813
9814 if (Scalar.getValueType().bitsLE(XLenVT)) {
9815 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9816 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9817 Op.getOperand(1), Scalar, Op.getOperand(3));
9818 }
9819
9820 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9821
9822 // This is an i64 value that lives in two scalar registers. We have to
9823 // insert this in a convoluted way. First we build vXi64 splat containing
9824 // the two values that we assemble using some bit math. Next we'll use
9825 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9826 // to merge element 0 from our splat into the source vector.
9827 // FIXME: This is probably not the best way to do this, but it is
9828 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9829 // point.
9830 // sw lo, (a0)
9831 // sw hi, 4(a0)
9832 // vlse vX, (a0)
9833 //
9834 // vid.v vVid
9835 // vmseq.vx mMask, vVid, 0
9836 // vmerge.vvm vDest, vSrc, vVal, mMask
9837 MVT VT = Op.getSimpleValueType();
9838 SDValue Vec = Op.getOperand(1);
9839 SDValue VL = getVLOperand(Op);
9840
9841 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9842 if (Op.getOperand(1).isUndef())
9843 return SplattedVal;
9844 SDValue SplattedIdx =
9845 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9846 DAG.getConstant(0, DL, MVT::i32), VL);
9847
9848 MVT MaskVT = getMaskTypeFor(VT);
9849 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9850 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9851 SDValue SelectCond =
9852 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9853 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9854 DAG.getUNDEF(MaskVT), Mask, VL});
9855 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9856 Vec, DAG.getUNDEF(VT), VL);
9857 }
9858 case Intrinsic::riscv_vfmv_s_f:
9859 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9860 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9861 // EGS * EEW >= 128 bits
9862 case Intrinsic::riscv_vaesdf_vv:
9863 case Intrinsic::riscv_vaesdf_vs:
9864 case Intrinsic::riscv_vaesdm_vv:
9865 case Intrinsic::riscv_vaesdm_vs:
9866 case Intrinsic::riscv_vaesef_vv:
9867 case Intrinsic::riscv_vaesef_vs:
9868 case Intrinsic::riscv_vaesem_vv:
9869 case Intrinsic::riscv_vaesem_vs:
9870 case Intrinsic::riscv_vaeskf1:
9871 case Intrinsic::riscv_vaeskf2:
9872 case Intrinsic::riscv_vaesz_vs:
9873 case Intrinsic::riscv_vsm4k:
9874 case Intrinsic::riscv_vsm4r_vv:
9875 case Intrinsic::riscv_vsm4r_vs: {
9876 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9877 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9878 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9879 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9880 return Op;
9881 }
9882 // EGS * EEW >= 256 bits
9883 case Intrinsic::riscv_vsm3c:
9884 case Intrinsic::riscv_vsm3me: {
9885 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9886 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9887 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9888 return Op;
9889 }
9890 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9891 case Intrinsic::riscv_vsha2ch:
9892 case Intrinsic::riscv_vsha2cl:
9893 case Intrinsic::riscv_vsha2ms: {
9894 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9895 !Subtarget.hasStdExtZvknhb())
9896 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9897 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9898 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9899 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9900 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9901 return Op;
9902 }
9903 case Intrinsic::riscv_sf_vc_v_x:
9904 case Intrinsic::riscv_sf_vc_v_i:
9905 case Intrinsic::riscv_sf_vc_v_xv:
9906 case Intrinsic::riscv_sf_vc_v_iv:
9907 case Intrinsic::riscv_sf_vc_v_vv:
9908 case Intrinsic::riscv_sf_vc_v_fv:
9909 case Intrinsic::riscv_sf_vc_v_xvv:
9910 case Intrinsic::riscv_sf_vc_v_ivv:
9911 case Intrinsic::riscv_sf_vc_v_vvv:
9912 case Intrinsic::riscv_sf_vc_v_fvv:
9913 case Intrinsic::riscv_sf_vc_v_xvw:
9914 case Intrinsic::riscv_sf_vc_v_ivw:
9915 case Intrinsic::riscv_sf_vc_v_vvw:
9916 case Intrinsic::riscv_sf_vc_v_fvw: {
9917 MVT VT = Op.getSimpleValueType();
9918
9919 SmallVector<SDValue> Operands{Op->op_values()};
9921
9922 MVT RetVT = VT;
9923 if (VT.isFixedLengthVector())
9925 else if (VT.isFloatingPoint())
9928
9929 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9930
9931 if (VT.isFixedLengthVector())
9932 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9933 else if (VT.isFloatingPoint())
9934 NewNode = DAG.getBitcast(VT, NewNode);
9935
9936 if (Op == NewNode)
9937 break;
9938
9939 return NewNode;
9940 }
9941 }
9942
9943 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9944}
9945
9947 unsigned Type) {
9948 SDLoc DL(Op);
9949 SmallVector<SDValue> Operands{Op->op_values()};
9950 Operands.erase(Operands.begin() + 1);
9951
9952 const RISCVSubtarget &Subtarget =
9954 MVT VT = Op.getSimpleValueType();
9955 MVT RetVT = VT;
9956 MVT FloatVT = VT;
9957
9958 if (VT.isFloatingPoint()) {
9959 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9961 FloatVT = RetVT;
9962 }
9963 if (VT.isFixedLengthVector())
9965 Subtarget);
9966
9968
9969 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9970 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9971 SDValue Chain = NewNode.getValue(1);
9972
9973 if (VT.isFixedLengthVector())
9974 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9975 if (VT.isFloatingPoint())
9976 NewNode = DAG.getBitcast(VT, NewNode);
9977
9978 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9979
9980 return NewNode;
9981}
9982
9984 unsigned Type) {
9985 SmallVector<SDValue> Operands{Op->op_values()};
9986 Operands.erase(Operands.begin() + 1);
9988
9989 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9990}
9991
9992SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9993 SelectionDAG &DAG) const {
9994 unsigned IntNo = Op.getConstantOperandVal(1);
9995 switch (IntNo) {
9996 default:
9997 break;
9998 case Intrinsic::riscv_seg2_load:
9999 case Intrinsic::riscv_seg3_load:
10000 case Intrinsic::riscv_seg4_load:
10001 case Intrinsic::riscv_seg5_load:
10002 case Intrinsic::riscv_seg6_load:
10003 case Intrinsic::riscv_seg7_load:
10004 case Intrinsic::riscv_seg8_load: {
10005 SDLoc DL(Op);
10006 static const Intrinsic::ID VlsegInts[7] = {
10007 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10008 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10009 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10010 Intrinsic::riscv_vlseg8};
10011 unsigned NF = Op->getNumValues() - 1;
10012 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10013 MVT XLenVT = Subtarget.getXLenVT();
10014 MVT VT = Op->getSimpleValueType(0);
10015 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10016 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10017 ContainerVT.getScalarSizeInBits();
10018 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10019
10020 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10021 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10022 auto *Load = cast<MemIntrinsicSDNode>(Op);
10023
10024 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10025 SDValue Ops[] = {
10026 Load->getChain(),
10027 IntID,
10028 DAG.getUNDEF(VecTupTy),
10029 Op.getOperand(2),
10030 VL,
10031 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10032 SDValue Result =
10034 Load->getMemoryVT(), Load->getMemOperand());
10036 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10037 SDValue SubVec =
10038 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10039 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10040 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10041 }
10042 Results.push_back(Result.getValue(1));
10043 return DAG.getMergeValues(Results, DL);
10044 }
10045 case Intrinsic::riscv_sf_vc_v_x_se:
10047 case Intrinsic::riscv_sf_vc_v_i_se:
10049 case Intrinsic::riscv_sf_vc_v_xv_se:
10051 case Intrinsic::riscv_sf_vc_v_iv_se:
10053 case Intrinsic::riscv_sf_vc_v_vv_se:
10055 case Intrinsic::riscv_sf_vc_v_fv_se:
10057 case Intrinsic::riscv_sf_vc_v_xvv_se:
10059 case Intrinsic::riscv_sf_vc_v_ivv_se:
10061 case Intrinsic::riscv_sf_vc_v_vvv_se:
10063 case Intrinsic::riscv_sf_vc_v_fvv_se:
10065 case Intrinsic::riscv_sf_vc_v_xvw_se:
10067 case Intrinsic::riscv_sf_vc_v_ivw_se:
10069 case Intrinsic::riscv_sf_vc_v_vvw_se:
10071 case Intrinsic::riscv_sf_vc_v_fvw_se:
10073 }
10074
10075 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10076}
10077
10078SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10079 SelectionDAG &DAG) const {
10080 unsigned IntNo = Op.getConstantOperandVal(1);
10081 switch (IntNo) {
10082 default:
10083 break;
10084 case Intrinsic::riscv_seg2_store:
10085 case Intrinsic::riscv_seg3_store:
10086 case Intrinsic::riscv_seg4_store:
10087 case Intrinsic::riscv_seg5_store:
10088 case Intrinsic::riscv_seg6_store:
10089 case Intrinsic::riscv_seg7_store:
10090 case Intrinsic::riscv_seg8_store: {
10091 SDLoc DL(Op);
10092 static const Intrinsic::ID VssegInts[] = {
10093 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10094 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10095 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10096 Intrinsic::riscv_vsseg8};
10097 // Operands are (chain, int_id, vec*, ptr, vl)
10098 unsigned NF = Op->getNumOperands() - 4;
10099 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10100 MVT XLenVT = Subtarget.getXLenVT();
10101 MVT VT = Op->getOperand(2).getSimpleValueType();
10102 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10103 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10104 ContainerVT.getScalarSizeInBits();
10105 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10106
10107 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10108 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10109 SDValue Ptr = Op->getOperand(NF + 2);
10110
10111 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10112
10113 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10114 for (unsigned i = 0; i < NF; i++)
10115 StoredVal = DAG.getNode(
10116 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10118 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10119 DAG.getVectorIdxConstant(i, DL));
10120
10121 SDValue Ops[] = {
10122 FixedIntrinsic->getChain(),
10123 IntID,
10124 StoredVal,
10125 Ptr,
10126 VL,
10127 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10128
10129 return DAG.getMemIntrinsicNode(
10130 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10131 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10132 }
10133 case Intrinsic::riscv_sf_vc_xv_se:
10135 case Intrinsic::riscv_sf_vc_iv_se:
10137 case Intrinsic::riscv_sf_vc_vv_se:
10139 case Intrinsic::riscv_sf_vc_fv_se:
10141 case Intrinsic::riscv_sf_vc_xvv_se:
10143 case Intrinsic::riscv_sf_vc_ivv_se:
10145 case Intrinsic::riscv_sf_vc_vvv_se:
10147 case Intrinsic::riscv_sf_vc_fvv_se:
10149 case Intrinsic::riscv_sf_vc_xvw_se:
10151 case Intrinsic::riscv_sf_vc_ivw_se:
10153 case Intrinsic::riscv_sf_vc_vvw_se:
10155 case Intrinsic::riscv_sf_vc_fvw_se:
10157 }
10158
10159 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10160}
10161
10162static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10163 switch (ISDOpcode) {
10164 default:
10165 llvm_unreachable("Unhandled reduction");
10166 case ISD::VP_REDUCE_ADD:
10167 case ISD::VECREDUCE_ADD:
10169 case ISD::VP_REDUCE_UMAX:
10172 case ISD::VP_REDUCE_SMAX:
10175 case ISD::VP_REDUCE_UMIN:
10178 case ISD::VP_REDUCE_SMIN:
10181 case ISD::VP_REDUCE_AND:
10182 case ISD::VECREDUCE_AND:
10184 case ISD::VP_REDUCE_OR:
10185 case ISD::VECREDUCE_OR:
10187 case ISD::VP_REDUCE_XOR:
10188 case ISD::VECREDUCE_XOR:
10190 case ISD::VP_REDUCE_FADD:
10192 case ISD::VP_REDUCE_SEQ_FADD:
10194 case ISD::VP_REDUCE_FMAX:
10195 case ISD::VP_REDUCE_FMAXIMUM:
10197 case ISD::VP_REDUCE_FMIN:
10198 case ISD::VP_REDUCE_FMINIMUM:
10200 }
10201
10202}
10203
10204SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10205 SelectionDAG &DAG,
10206 bool IsVP) const {
10207 SDLoc DL(Op);
10208 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10209 MVT VecVT = Vec.getSimpleValueType();
10210 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10211 Op.getOpcode() == ISD::VECREDUCE_OR ||
10212 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10213 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10214 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10215 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10216 "Unexpected reduction lowering");
10217
10218 MVT XLenVT = Subtarget.getXLenVT();
10219
10220 MVT ContainerVT = VecVT;
10221 if (VecVT.isFixedLengthVector()) {
10222 ContainerVT = getContainerForFixedLengthVector(VecVT);
10223 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10224 }
10225
10226 SDValue Mask, VL;
10227 if (IsVP) {
10228 Mask = Op.getOperand(2);
10229 VL = Op.getOperand(3);
10230 } else {
10231 std::tie(Mask, VL) =
10232 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10233 }
10234
10236 switch (Op.getOpcode()) {
10237 default:
10238 llvm_unreachable("Unhandled reduction");
10239 case ISD::VECREDUCE_AND:
10240 case ISD::VP_REDUCE_AND: {
10241 // vcpop ~x == 0
10242 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10243 if (IsVP || VecVT.isFixedLengthVector())
10244 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10245 else
10246 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10247 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10248 CC = ISD::SETEQ;
10249 break;
10250 }
10251 case ISD::VECREDUCE_OR:
10252 case ISD::VP_REDUCE_OR:
10253 // vcpop x != 0
10254 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10255 CC = ISD::SETNE;
10256 break;
10257 case ISD::VECREDUCE_XOR:
10258 case ISD::VP_REDUCE_XOR: {
10259 // ((vcpop x) & 1) != 0
10260 SDValue One = DAG.getConstant(1, DL, XLenVT);
10261 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10262 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10263 CC = ISD::SETNE;
10264 break;
10265 }
10266 }
10267
10268 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10269 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10270 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10271
10272 if (!IsVP)
10273 return SetCC;
10274
10275 // Now include the start value in the operation.
10276 // Note that we must return the start value when no elements are operated
10277 // upon. The vcpop instructions we've emitted in each case above will return
10278 // 0 for an inactive vector, and so we've already received the neutral value:
10279 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10280 // can simply include the start value.
10281 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10282 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10283}
10284
10285static bool isNonZeroAVL(SDValue AVL) {
10286 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10287 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10288 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10289 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10290}
10291
10292/// Helper to lower a reduction sequence of the form:
10293/// scalar = reduce_op vec, scalar_start
10294static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10295 SDValue StartValue, SDValue Vec, SDValue Mask,
10296 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10297 const RISCVSubtarget &Subtarget) {
10298 const MVT VecVT = Vec.getSimpleValueType();
10299 const MVT M1VT = getLMUL1VT(VecVT);
10300 const MVT XLenVT = Subtarget.getXLenVT();
10301 const bool NonZeroAVL = isNonZeroAVL(VL);
10302
10303 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10304 // or the original VT if fractional.
10305 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10306 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10307 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10308 // be the result of the reduction operation.
10309 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10310 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10311 DAG, Subtarget);
10312 if (M1VT != InnerVT)
10313 InitialValue =
10314 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10315 InitialValue, DAG.getVectorIdxConstant(0, DL));
10316 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10317 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10318 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10319 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10320 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10321 DAG.getVectorIdxConstant(0, DL));
10322}
10323
10324SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10325 SelectionDAG &DAG) const {
10326 SDLoc DL(Op);
10327 SDValue Vec = Op.getOperand(0);
10328 EVT VecEVT = Vec.getValueType();
10329
10330 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10331
10332 // Due to ordering in legalize types we may have a vector type that needs to
10333 // be split. Do that manually so we can get down to a legal type.
10334 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10336 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10337 VecEVT = Lo.getValueType();
10338 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10339 }
10340
10341 // TODO: The type may need to be widened rather than split. Or widened before
10342 // it can be split.
10343 if (!isTypeLegal(VecEVT))
10344 return SDValue();
10345
10346 MVT VecVT = VecEVT.getSimpleVT();
10347 MVT VecEltVT = VecVT.getVectorElementType();
10348 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10349
10350 MVT ContainerVT = VecVT;
10351 if (VecVT.isFixedLengthVector()) {
10352 ContainerVT = getContainerForFixedLengthVector(VecVT);
10353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10354 }
10355
10356 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10357
10358 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10359 switch (BaseOpc) {
10360 case ISD::AND:
10361 case ISD::OR:
10362 case ISD::UMAX:
10363 case ISD::UMIN:
10364 case ISD::SMAX:
10365 case ISD::SMIN:
10366 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10367 DAG.getVectorIdxConstant(0, DL));
10368 }
10369 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10370 Mask, VL, DL, DAG, Subtarget);
10371}
10372
10373// Given a reduction op, this function returns the matching reduction opcode,
10374// the vector SDValue and the scalar SDValue required to lower this to a
10375// RISCVISD node.
10376static std::tuple<unsigned, SDValue, SDValue>
10378 const RISCVSubtarget &Subtarget) {
10379 SDLoc DL(Op);
10380 auto Flags = Op->getFlags();
10381 unsigned Opcode = Op.getOpcode();
10382 switch (Opcode) {
10383 default:
10384 llvm_unreachable("Unhandled reduction");
10385 case ISD::VECREDUCE_FADD: {
10386 // Use positive zero if we can. It is cheaper to materialize.
10387 SDValue Zero =
10388 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10389 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10390 }
10392 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10393 Op.getOperand(0));
10397 case ISD::VECREDUCE_FMAX: {
10398 SDValue Front =
10399 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10400 DAG.getVectorIdxConstant(0, DL));
10401 unsigned RVVOpc =
10402 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10405 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10406 }
10407 }
10408}
10409
10410SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10411 SelectionDAG &DAG) const {
10412 SDLoc DL(Op);
10413 MVT VecEltVT = Op.getSimpleValueType();
10414
10415 unsigned RVVOpcode;
10416 SDValue VectorVal, ScalarVal;
10417 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10418 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10419 MVT VecVT = VectorVal.getSimpleValueType();
10420
10421 MVT ContainerVT = VecVT;
10422 if (VecVT.isFixedLengthVector()) {
10423 ContainerVT = getContainerForFixedLengthVector(VecVT);
10424 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10425 }
10426
10427 MVT ResVT = Op.getSimpleValueType();
10428 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10429 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10430 VL, DL, DAG, Subtarget);
10431 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10432 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10433 return Res;
10434
10435 if (Op->getFlags().hasNoNaNs())
10436 return Res;
10437
10438 // Force output to NaN if any element is Nan.
10439 SDValue IsNan =
10440 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10441 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10442 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10443 MVT XLenVT = Subtarget.getXLenVT();
10444 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10445 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10446 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10447 return DAG.getSelect(
10448 DL, ResVT, NoNaNs, Res,
10449 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10450}
10451
10452SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10453 SelectionDAG &DAG) const {
10454 SDLoc DL(Op);
10455 unsigned Opc = Op.getOpcode();
10456 SDValue Start = Op.getOperand(0);
10457 SDValue Vec = Op.getOperand(1);
10458 EVT VecEVT = Vec.getValueType();
10459 MVT XLenVT = Subtarget.getXLenVT();
10460
10461 // TODO: The type may need to be widened rather than split. Or widened before
10462 // it can be split.
10463 if (!isTypeLegal(VecEVT))
10464 return SDValue();
10465
10466 MVT VecVT = VecEVT.getSimpleVT();
10467 unsigned RVVOpcode = getRVVReductionOp(Opc);
10468
10469 if (VecVT.isFixedLengthVector()) {
10470 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10471 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10472 }
10473
10474 SDValue VL = Op.getOperand(3);
10475 SDValue Mask = Op.getOperand(2);
10476 SDValue Res =
10477 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10478 Vec, Mask, VL, DL, DAG, Subtarget);
10479 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10480 Op->getFlags().hasNoNaNs())
10481 return Res;
10482
10483 // Propagate NaNs.
10484 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10485 // Check if any of the elements in Vec is NaN.
10486 SDValue IsNaN = DAG.getNode(
10487 RISCVISD::SETCC_VL, DL, PredVT,
10488 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10489 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10490 // Check if the start value is NaN.
10491 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10492 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10493 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10494 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10495 MVT ResVT = Res.getSimpleValueType();
10496 return DAG.getSelect(
10497 DL, ResVT, NoNaNs, Res,
10498 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10499}
10500
10501SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10502 SelectionDAG &DAG) const {
10503 SDValue Vec = Op.getOperand(0);
10504 SDValue SubVec = Op.getOperand(1);
10505 MVT VecVT = Vec.getSimpleValueType();
10506 MVT SubVecVT = SubVec.getSimpleValueType();
10507
10508 SDLoc DL(Op);
10509 MVT XLenVT = Subtarget.getXLenVT();
10510 unsigned OrigIdx = Op.getConstantOperandVal(2);
10511 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10512
10513 if (OrigIdx == 0 && Vec.isUndef())
10514 return Op;
10515
10516 // We don't have the ability to slide mask vectors up indexed by their i1
10517 // elements; the smallest we can do is i8. Often we are able to bitcast to
10518 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10519 // into a scalable one, we might not necessarily have enough scalable
10520 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10521 if (SubVecVT.getVectorElementType() == MVT::i1) {
10522 if (VecVT.getVectorMinNumElements() >= 8 &&
10523 SubVecVT.getVectorMinNumElements() >= 8) {
10524 assert(OrigIdx % 8 == 0 && "Invalid index");
10525 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10526 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10527 "Unexpected mask vector lowering");
10528 OrigIdx /= 8;
10529 SubVecVT =
10530 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10531 SubVecVT.isScalableVector());
10532 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10533 VecVT.isScalableVector());
10534 Vec = DAG.getBitcast(VecVT, Vec);
10535 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10536 } else {
10537 // We can't slide this mask vector up indexed by its i1 elements.
10538 // This poses a problem when we wish to insert a scalable vector which
10539 // can't be re-expressed as a larger type. Just choose the slow path and
10540 // extend to a larger type, then truncate back down.
10541 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10542 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10543 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10544 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10545 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10546 Op.getOperand(2));
10547 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10548 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10549 }
10550 }
10551
10552 // If the subvector vector is a fixed-length type and we don't know VLEN
10553 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10554 // don't know which register of a LMUL group contains the specific subvector
10555 // as we only know the minimum register size. Therefore we must slide the
10556 // vector group up the full amount.
10557 const auto VLen = Subtarget.getRealVLen();
10558 if (SubVecVT.isFixedLengthVector() && !VLen) {
10559 MVT ContainerVT = VecVT;
10560 if (VecVT.isFixedLengthVector()) {
10561 ContainerVT = getContainerForFixedLengthVector(VecVT);
10562 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10563 }
10564
10565 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10566 DAG.getUNDEF(ContainerVT), SubVec,
10567 DAG.getVectorIdxConstant(0, DL));
10568
10569 SDValue Mask =
10570 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10571 // Set the vector length to only the number of elements we care about. Note
10572 // that for slideup this includes the offset.
10573 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10574 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10575
10576 // Use tail agnostic policy if we're inserting over Vec's tail.
10578 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10579 Policy = RISCVII::TAIL_AGNOSTIC;
10580
10581 // If we're inserting into the lowest elements, use a tail undisturbed
10582 // vmv.v.v.
10583 if (OrigIdx == 0) {
10584 SubVec =
10585 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10586 } else {
10587 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10588 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10589 SlideupAmt, Mask, VL, Policy);
10590 }
10591
10592 if (VecVT.isFixedLengthVector())
10593 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10594 return DAG.getBitcast(Op.getValueType(), SubVec);
10595 }
10596
10597 MVT ContainerVecVT = VecVT;
10598 if (VecVT.isFixedLengthVector()) {
10599 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10600 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10601 }
10602
10603 MVT ContainerSubVecVT = SubVecVT;
10604 if (SubVecVT.isFixedLengthVector()) {
10605 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10606 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10607 }
10608
10609 unsigned SubRegIdx;
10610 ElementCount RemIdx;
10611 // insert_subvector scales the index by vscale if the subvector is scalable,
10612 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10613 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10614 if (SubVecVT.isFixedLengthVector()) {
10615 assert(VLen);
10616 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10617 auto Decompose =
10619 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10620 SubRegIdx = Decompose.first;
10621 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10622 (OrigIdx % Vscale));
10623 } else {
10624 auto Decompose =
10626 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10627 SubRegIdx = Decompose.first;
10628 RemIdx = ElementCount::getScalable(Decompose.second);
10629 }
10630
10633 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10634 bool ExactlyVecRegSized =
10635 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10636 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10637
10638 // 1. If the Idx has been completely eliminated and this subvector's size is
10639 // a vector register or a multiple thereof, or the surrounding elements are
10640 // undef, then this is a subvector insert which naturally aligns to a vector
10641 // register. These can easily be handled using subregister manipulation.
10642 // 2. If the subvector isn't an exact multiple of a valid register group size,
10643 // then the insertion must preserve the undisturbed elements of the register.
10644 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10645 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10646 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10647 // of that LMUL=1 type back into the larger vector (resolving to another
10648 // subregister operation). See below for how our VSLIDEUP works. We go via a
10649 // LMUL=1 type to avoid allocating a large register group to hold our
10650 // subvector.
10651 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10652 if (SubVecVT.isFixedLengthVector()) {
10653 // We may get NoSubRegister if inserting at index 0 and the subvec
10654 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10655 if (SubRegIdx == RISCV::NoSubRegister) {
10656 assert(OrigIdx == 0);
10657 return Op;
10658 }
10659
10660 // Use a insert_subvector that will resolve to an insert subreg.
10661 assert(VLen);
10662 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10663 SDValue Insert =
10664 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10665 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10666 if (VecVT.isFixedLengthVector())
10667 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10668 return Insert;
10669 }
10670 return Op;
10671 }
10672
10673 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10674 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10675 // (in our case undisturbed). This means we can set up a subvector insertion
10676 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10677 // size of the subvector.
10678 MVT InterSubVT = ContainerVecVT;
10679 SDValue AlignedExtract = Vec;
10680 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10681 if (SubVecVT.isFixedLengthVector()) {
10682 assert(VLen);
10683 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10684 }
10685 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10686 InterSubVT = getLMUL1VT(ContainerVecVT);
10687 // Extract a subvector equal to the nearest full vector register type. This
10688 // should resolve to a EXTRACT_SUBREG instruction.
10689 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10690 DAG.getVectorIdxConstant(AlignedIdx, DL));
10691 }
10692
10693 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10694 DAG.getUNDEF(InterSubVT), SubVec,
10695 DAG.getVectorIdxConstant(0, DL));
10696
10697 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10698
10699 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10700 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10701
10702 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10704 if (Subtarget.expandVScale(EndIndex) ==
10705 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10706 Policy = RISCVII::TAIL_AGNOSTIC;
10707
10708 // If we're inserting into the lowest elements, use a tail undisturbed
10709 // vmv.v.v.
10710 if (RemIdx.isZero()) {
10711 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10712 SubVec, VL);
10713 } else {
10714 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10715
10716 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10717 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10718
10719 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10720 SlideupAmt, Mask, VL, Policy);
10721 }
10722
10723 // If required, insert this subvector back into the correct vector register.
10724 // This should resolve to an INSERT_SUBREG instruction.
10725 if (ContainerVecVT.bitsGT(InterSubVT))
10726 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10727 DAG.getVectorIdxConstant(AlignedIdx, DL));
10728
10729 if (VecVT.isFixedLengthVector())
10730 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10731
10732 // We might have bitcast from a mask type: cast back to the original type if
10733 // required.
10734 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10735}
10736
10737SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10738 SelectionDAG &DAG) const {
10739 SDValue Vec = Op.getOperand(0);
10740 MVT SubVecVT = Op.getSimpleValueType();
10741 MVT VecVT = Vec.getSimpleValueType();
10742
10743 SDLoc DL(Op);
10744 MVT XLenVT = Subtarget.getXLenVT();
10745 unsigned OrigIdx = Op.getConstantOperandVal(1);
10746 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10747
10748 // With an index of 0 this is a cast-like subvector, which can be performed
10749 // with subregister operations.
10750 if (OrigIdx == 0)
10751 return Op;
10752
10753 // We don't have the ability to slide mask vectors down indexed by their i1
10754 // elements; the smallest we can do is i8. Often we are able to bitcast to
10755 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10756 // from a scalable one, we might not necessarily have enough scalable
10757 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10758 if (SubVecVT.getVectorElementType() == MVT::i1) {
10759 if (VecVT.getVectorMinNumElements() >= 8 &&
10760 SubVecVT.getVectorMinNumElements() >= 8) {
10761 assert(OrigIdx % 8 == 0 && "Invalid index");
10762 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10763 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10764 "Unexpected mask vector lowering");
10765 OrigIdx /= 8;
10766 SubVecVT =
10767 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10768 SubVecVT.isScalableVector());
10769 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10770 VecVT.isScalableVector());
10771 Vec = DAG.getBitcast(VecVT, Vec);
10772 } else {
10773 // We can't slide this mask vector down, indexed by its i1 elements.
10774 // This poses a problem when we wish to extract a scalable vector which
10775 // can't be re-expressed as a larger type. Just choose the slow path and
10776 // extend to a larger type, then truncate back down.
10777 // TODO: We could probably improve this when extracting certain fixed
10778 // from fixed, where we can extract as i8 and shift the correct element
10779 // right to reach the desired subvector?
10780 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10781 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10782 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10783 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10784 Op.getOperand(1));
10785 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10786 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10787 }
10788 }
10789
10790 const auto VLen = Subtarget.getRealVLen();
10791
10792 // If the subvector vector is a fixed-length type and we don't know VLEN
10793 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10794 // don't know which register of a LMUL group contains the specific subvector
10795 // as we only know the minimum register size. Therefore we must slide the
10796 // vector group down the full amount.
10797 if (SubVecVT.isFixedLengthVector() && !VLen) {
10798 MVT ContainerVT = VecVT;
10799 if (VecVT.isFixedLengthVector()) {
10800 ContainerVT = getContainerForFixedLengthVector(VecVT);
10801 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10802 }
10803
10804 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10805 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10806 if (auto ShrunkVT =
10807 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10808 ContainerVT = *ShrunkVT;
10809 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10810 DAG.getVectorIdxConstant(0, DL));
10811 }
10812
10813 SDValue Mask =
10814 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10815 // Set the vector length to only the number of elements we care about. This
10816 // avoids sliding down elements we're going to discard straight away.
10817 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10818 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10819 SDValue Slidedown =
10820 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10821 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10822 // Now we can use a cast-like subvector extract to get the result.
10823 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10824 DAG.getVectorIdxConstant(0, DL));
10825 return DAG.getBitcast(Op.getValueType(), Slidedown);
10826 }
10827
10828 if (VecVT.isFixedLengthVector()) {
10829 VecVT = getContainerForFixedLengthVector(VecVT);
10830 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10831 }
10832
10833 MVT ContainerSubVecVT = SubVecVT;
10834 if (SubVecVT.isFixedLengthVector())
10835 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10836
10837 unsigned SubRegIdx;
10838 ElementCount RemIdx;
10839 // extract_subvector scales the index by vscale if the subvector is scalable,
10840 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10841 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10842 if (SubVecVT.isFixedLengthVector()) {
10843 assert(VLen);
10844 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10845 auto Decompose =
10847 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10848 SubRegIdx = Decompose.first;
10849 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10850 (OrigIdx % Vscale));
10851 } else {
10852 auto Decompose =
10854 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10855 SubRegIdx = Decompose.first;
10856 RemIdx = ElementCount::getScalable(Decompose.second);
10857 }
10858
10859 // If the Idx has been completely eliminated then this is a subvector extract
10860 // which naturally aligns to a vector register. These can easily be handled
10861 // using subregister manipulation. We use an extract_subvector that will
10862 // resolve to an extract subreg.
10863 if (RemIdx.isZero()) {
10864 if (SubVecVT.isFixedLengthVector()) {
10865 assert(VLen);
10866 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10867 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10868 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10869 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10870 }
10871 return Op;
10872 }
10873
10874 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10875 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10876 // divide exactly.
10877 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10878 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10879
10880 // If the vector type is an LMUL-group type, extract a subvector equal to the
10881 // nearest full vector register type.
10882 MVT InterSubVT = VecVT;
10883 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10884 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10885 // we should have successfully decomposed the extract into a subregister.
10886 // We use an extract_subvector that will resolve to a subreg extract.
10887 assert(SubRegIdx != RISCV::NoSubRegister);
10888 (void)SubRegIdx;
10889 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10890 if (SubVecVT.isFixedLengthVector()) {
10891 assert(VLen);
10892 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10893 }
10894 InterSubVT = getLMUL1VT(VecVT);
10895 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10896 DAG.getConstant(Idx, DL, XLenVT));
10897 }
10898
10899 // Slide this vector register down by the desired number of elements in order
10900 // to place the desired subvector starting at element 0.
10901 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10902 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10903 if (SubVecVT.isFixedLengthVector())
10904 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10905 SDValue Slidedown =
10906 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10907 Vec, SlidedownAmt, Mask, VL);
10908
10909 // Now the vector is in the right position, extract our final subvector. This
10910 // should resolve to a COPY.
10911 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10912 DAG.getVectorIdxConstant(0, DL));
10913
10914 // We might have bitcast from a mask type: cast back to the original type if
10915 // required.
10916 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10917}
10918
10919// Widen a vector's operands to i8, then truncate its results back to the
10920// original type, typically i1. All operand and result types must be the same.
10922 SelectionDAG &DAG) {
10923 MVT VT = N.getSimpleValueType();
10924 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10926 for (SDValue Op : N->ops()) {
10927 assert(Op.getSimpleValueType() == VT &&
10928 "Operands and result must be same type");
10929 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10930 }
10931
10932 unsigned NumVals = N->getNumValues();
10933
10935 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10936 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10937 SmallVector<SDValue, 4> TruncVals;
10938 for (unsigned I = 0; I < NumVals; I++) {
10939 TruncVals.push_back(
10940 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10941 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10942 }
10943
10944 if (TruncVals.size() > 1)
10945 return DAG.getMergeValues(TruncVals, DL);
10946 return TruncVals.front();
10947}
10948
10949SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10950 SelectionDAG &DAG) const {
10951 SDLoc DL(Op);
10952 MVT VecVT = Op.getSimpleValueType();
10953
10954 assert(VecVT.isScalableVector() &&
10955 "vector_interleave on non-scalable vector!");
10956
10957 // 1 bit element vectors need to be widened to e8
10958 if (VecVT.getVectorElementType() == MVT::i1)
10959 return widenVectorOpsToi8(Op, DL, DAG);
10960
10961 // If the VT is LMUL=8, we need to split and reassemble.
10962 if (VecVT.getSizeInBits().getKnownMinValue() ==
10963 (8 * RISCV::RVVBitsPerBlock)) {
10964 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10965 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10966 EVT SplitVT = Op0Lo.getValueType();
10967
10969 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10971 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10972
10973 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10974 ResLo.getValue(0), ResHi.getValue(0));
10975 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10976 ResHi.getValue(1));
10977 return DAG.getMergeValues({Even, Odd}, DL);
10978 }
10979
10980 // Concatenate the two vectors as one vector to deinterleave
10981 MVT ConcatVT =
10984 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10985 Op.getOperand(0), Op.getOperand(1));
10986
10987 // We can deinterleave through vnsrl.wi if the element type is smaller than
10988 // ELEN
10989 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10990 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
10991 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
10992 return DAG.getMergeValues({Even, Odd}, DL);
10993 }
10994
10995 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
10996 // possibly mask vector, then extract the required subvector. Doing this
10997 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
10998 // creation to be rematerialized during register allocation to reduce
10999 // register pressure if needed.
11000
11001 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11002
11003 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11004 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11005 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11006 DAG.getVectorIdxConstant(0, DL));
11007
11008 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11009 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11010 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11011 DAG.getVectorIdxConstant(0, DL));
11012
11013 // vcompress the even and odd elements into two separate vectors
11014 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11015 EvenMask, DAG.getUNDEF(ConcatVT));
11016 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11017 OddMask, DAG.getUNDEF(ConcatVT));
11018
11019 // Extract the result half of the gather for even and odd
11020 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11021 DAG.getVectorIdxConstant(0, DL));
11022 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11023 DAG.getVectorIdxConstant(0, DL));
11024
11025 return DAG.getMergeValues({Even, Odd}, DL);
11026}
11027
11028SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11029 SelectionDAG &DAG) const {
11030 SDLoc DL(Op);
11031 MVT VecVT = Op.getSimpleValueType();
11032
11033 assert(VecVT.isScalableVector() &&
11034 "vector_interleave on non-scalable vector!");
11035
11036 // i1 vectors need to be widened to i8
11037 if (VecVT.getVectorElementType() == MVT::i1)
11038 return widenVectorOpsToi8(Op, DL, DAG);
11039
11040 MVT XLenVT = Subtarget.getXLenVT();
11041 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11042
11043 // If the VT is LMUL=8, we need to split and reassemble.
11044 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11045 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11046 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11047 EVT SplitVT = Op0Lo.getValueType();
11048
11050 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11052 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11053
11054 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11055 ResLo.getValue(0), ResLo.getValue(1));
11056 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11057 ResHi.getValue(0), ResHi.getValue(1));
11058 return DAG.getMergeValues({Lo, Hi}, DL);
11059 }
11060
11061 SDValue Interleaved;
11062
11063 // If the element type is smaller than ELEN, then we can interleave with
11064 // vwaddu.vv and vwmaccu.vx
11065 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11066 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11067 DAG, Subtarget);
11068 } else {
11069 // Otherwise, fallback to using vrgathere16.vv
11070 MVT ConcatVT =
11073 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11074 Op.getOperand(0), Op.getOperand(1));
11075
11076 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11077
11078 // 0 1 2 3 4 5 6 7 ...
11079 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11080
11081 // 1 1 1 1 1 1 1 1 ...
11082 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11083
11084 // 1 0 1 0 1 0 1 0 ...
11085 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11086 OddMask = DAG.getSetCC(
11087 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11088 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11090
11091 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11092
11093 // Build up the index vector for interleaving the concatenated vector
11094 // 0 0 1 1 2 2 3 3 ...
11095 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11096 // 0 n 1 n+1 2 n+2 3 n+3 ...
11097 Idx =
11098 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11099
11100 // Then perform the interleave
11101 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11102 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11103 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11104 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11105 }
11106
11107 // Extract the two halves from the interleaved result
11108 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11109 DAG.getVectorIdxConstant(0, DL));
11110 SDValue Hi = DAG.getNode(
11111 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11113
11114 return DAG.getMergeValues({Lo, Hi}, DL);
11115}
11116
11117// Lower step_vector to the vid instruction. Any non-identity step value must
11118// be accounted for my manual expansion.
11119SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11120 SelectionDAG &DAG) const {
11121 SDLoc DL(Op);
11122 MVT VT = Op.getSimpleValueType();
11123 assert(VT.isScalableVector() && "Expected scalable vector");
11124 MVT XLenVT = Subtarget.getXLenVT();
11125 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11126 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11127 uint64_t StepValImm = Op.getConstantOperandVal(0);
11128 if (StepValImm != 1) {
11129 if (isPowerOf2_64(StepValImm)) {
11130 SDValue StepVal =
11131 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11132 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11133 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11134 } else {
11135 SDValue StepVal = lowerScalarSplat(
11136 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11137 VL, VT, DL, DAG, Subtarget);
11138 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11139 }
11140 }
11141 return StepVec;
11142}
11143
11144// Implement vector_reverse using vrgather.vv with indices determined by
11145// subtracting the id of each element from (VLMAX-1). This will convert
11146// the indices like so:
11147// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11148// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11149SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11150 SelectionDAG &DAG) const {
11151 SDLoc DL(Op);
11152 MVT VecVT = Op.getSimpleValueType();
11153 if (VecVT.getVectorElementType() == MVT::i1) {
11154 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11155 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11156 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11157 return DAG.getSetCC(DL, VecVT, Op2,
11158 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11159 }
11160
11161 MVT ContainerVT = VecVT;
11162 SDValue Vec = Op.getOperand(0);
11163 if (VecVT.isFixedLengthVector()) {
11164 ContainerVT = getContainerForFixedLengthVector(VecVT);
11165 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11166 }
11167
11168 MVT XLenVT = Subtarget.getXLenVT();
11169 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11170
11171 // On some uarchs vrgather.vv will read from every input register for each
11172 // output register, regardless of the indices. However to reverse a vector
11173 // each output register only needs to read from one register. So decompose it
11174 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11175 // O(LMUL^2).
11176 //
11177 // vsetvli a1, zero, e64, m4, ta, ma
11178 // vrgatherei16.vv v12, v8, v16
11179 // ->
11180 // vsetvli a1, zero, e64, m1, ta, ma
11181 // vrgather.vv v15, v8, v16
11182 // vrgather.vv v14, v9, v16
11183 // vrgather.vv v13, v10, v16
11184 // vrgather.vv v12, v11, v16
11185 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11186 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11187 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11188 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11189 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11190 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11191
11192 // Fixed length vectors might not fit exactly into their container, and so
11193 // leave a gap in the front of the vector after being reversed. Slide this
11194 // away.
11195 //
11196 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11197 // 0 1 2 3 x x x x <- reverse
11198 // x x x x 0 1 2 3 <- vslidedown.vx
11199 if (VecVT.isFixedLengthVector()) {
11200 SDValue Offset = DAG.getNode(
11201 ISD::SUB, DL, XLenVT,
11202 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11203 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11204 Concat =
11205 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11206 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11207 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11208 }
11209 return Concat;
11210 }
11211
11212 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11213 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11214 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11215 unsigned MaxVLMAX =
11216 VecVT.isFixedLengthVector()
11217 ? VecVT.getVectorNumElements()
11218 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11219
11220 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11221 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11222
11223 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11224 // to use vrgatherei16.vv.
11225 if (MaxVLMAX > 256 && EltSize == 8) {
11226 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11227 // Reverse each half, then reassemble them in reverse order.
11228 // NOTE: It's also possible that after splitting that VLMAX no longer
11229 // requires vrgatherei16.vv.
11230 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11231 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11232 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11233 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11234 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11235 // Reassemble the low and high pieces reversed.
11236 // FIXME: This is a CONCAT_VECTORS.
11237 SDValue Res =
11238 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11239 DAG.getVectorIdxConstant(0, DL));
11240 return DAG.getNode(
11241 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11242 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11243 }
11244
11245 // Just promote the int type to i16 which will double the LMUL.
11246 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11247 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11248 }
11249
11250 // At LMUL > 1, do the index computation in 16 bits to reduce register
11251 // pressure.
11252 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11253 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11254 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11255 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11256 IntVT = IntVT.changeVectorElementType(MVT::i16);
11257 }
11258
11259 // Calculate VLMAX-1 for the desired SEW.
11260 SDValue VLMinus1 = DAG.getNode(
11261 ISD::SUB, DL, XLenVT,
11262 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11263 DAG.getConstant(1, DL, XLenVT));
11264
11265 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11266 bool IsRV32E64 =
11267 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11268 SDValue SplatVL;
11269 if (!IsRV32E64)
11270 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11271 else
11272 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11273 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11274
11275 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11276 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11277 DAG.getUNDEF(IntVT), Mask, VL);
11278
11279 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11280 DAG.getUNDEF(ContainerVT), Mask, VL);
11281 if (VecVT.isFixedLengthVector())
11282 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11283 return Gather;
11284}
11285
11286SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11287 SelectionDAG &DAG) const {
11288 SDLoc DL(Op);
11289 SDValue V1 = Op.getOperand(0);
11290 SDValue V2 = Op.getOperand(1);
11291 MVT XLenVT = Subtarget.getXLenVT();
11292 MVT VecVT = Op.getSimpleValueType();
11293
11294 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11295
11296 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11297 SDValue DownOffset, UpOffset;
11298 if (ImmValue >= 0) {
11299 // The operand is a TargetConstant, we need to rebuild it as a regular
11300 // constant.
11301 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11302 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11303 } else {
11304 // The operand is a TargetConstant, we need to rebuild it as a regular
11305 // constant rather than negating the original operand.
11306 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11307 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11308 }
11309
11310 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11311
11312 SDValue SlideDown =
11313 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11314 DownOffset, TrueMask, UpOffset);
11315 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11316 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11318}
11319
11320SDValue
11321RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11322 SelectionDAG &DAG) const {
11323 SDLoc DL(Op);
11324 auto *Load = cast<LoadSDNode>(Op);
11325
11327 Load->getMemoryVT(),
11328 *Load->getMemOperand()) &&
11329 "Expecting a correctly-aligned load");
11330
11331 MVT VT = Op.getSimpleValueType();
11332 MVT XLenVT = Subtarget.getXLenVT();
11333 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11334
11335 // If we know the exact VLEN and our fixed length vector completely fills
11336 // the container, use a whole register load instead.
11337 const auto [MinVLMAX, MaxVLMAX] =
11338 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11339 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11340 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11341 MachineMemOperand *MMO = Load->getMemOperand();
11342 SDValue NewLoad =
11343 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11344 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11345 MMO->getAAInfo(), MMO->getRanges());
11346 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11347 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11348 }
11349
11350 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11351
11352 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11353 SDValue IntID = DAG.getTargetConstant(
11354 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11355 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11356 if (!IsMaskOp)
11357 Ops.push_back(DAG.getUNDEF(ContainerVT));
11358 Ops.push_back(Load->getBasePtr());
11359 Ops.push_back(VL);
11360 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11361 SDValue NewLoad =
11363 Load->getMemoryVT(), Load->getMemOperand());
11364
11365 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11366 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11367}
11368
11369SDValue
11370RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11371 SelectionDAG &DAG) const {
11372 SDLoc DL(Op);
11373 auto *Store = cast<StoreSDNode>(Op);
11374
11376 Store->getMemoryVT(),
11377 *Store->getMemOperand()) &&
11378 "Expecting a correctly-aligned store");
11379
11380 SDValue StoreVal = Store->getValue();
11381 MVT VT = StoreVal.getSimpleValueType();
11382 MVT XLenVT = Subtarget.getXLenVT();
11383
11384 // If the size less than a byte, we need to pad with zeros to make a byte.
11385 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11386 VT = MVT::v8i1;
11387 StoreVal =
11388 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11389 StoreVal, DAG.getVectorIdxConstant(0, DL));
11390 }
11391
11392 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11393
11394 SDValue NewValue =
11395 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11396
11397 // If we know the exact VLEN and our fixed length vector completely fills
11398 // the container, use a whole register store instead.
11399 const auto [MinVLMAX, MaxVLMAX] =
11400 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11401 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11402 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11403 MachineMemOperand *MMO = Store->getMemOperand();
11404 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11405 MMO->getPointerInfo(), MMO->getBaseAlign(),
11406 MMO->getFlags(), MMO->getAAInfo());
11407 }
11408
11409 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11410
11411 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11412 SDValue IntID = DAG.getTargetConstant(
11413 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11414 return DAG.getMemIntrinsicNode(
11415 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11416 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11417 Store->getMemoryVT(), Store->getMemOperand());
11418}
11419
11420SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11421 SelectionDAG &DAG) const {
11422 SDLoc DL(Op);
11423 MVT VT = Op.getSimpleValueType();
11424
11425 const auto *MemSD = cast<MemSDNode>(Op);
11426 EVT MemVT = MemSD->getMemoryVT();
11427 MachineMemOperand *MMO = MemSD->getMemOperand();
11428 SDValue Chain = MemSD->getChain();
11429 SDValue BasePtr = MemSD->getBasePtr();
11430
11431 SDValue Mask, PassThru, VL;
11432 bool IsExpandingLoad = false;
11433 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11434 Mask = VPLoad->getMask();
11435 PassThru = DAG.getUNDEF(VT);
11436 VL = VPLoad->getVectorLength();
11437 } else {
11438 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11439 Mask = MLoad->getMask();
11440 PassThru = MLoad->getPassThru();
11441 IsExpandingLoad = MLoad->isExpandingLoad();
11442 }
11443
11444 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11445
11446 MVT XLenVT = Subtarget.getXLenVT();
11447
11448 MVT ContainerVT = VT;
11449 if (VT.isFixedLengthVector()) {
11450 ContainerVT = getContainerForFixedLengthVector(VT);
11451 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11452 if (!IsUnmasked) {
11453 MVT MaskVT = getMaskTypeFor(ContainerVT);
11454 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11455 }
11456 }
11457
11458 if (!VL)
11459 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11460
11461 SDValue ExpandingVL;
11462 if (!IsUnmasked && IsExpandingLoad) {
11463 ExpandingVL = VL;
11464 VL =
11465 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11466 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11467 }
11468
11469 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11470 : Intrinsic::riscv_vle_mask;
11471 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11472 if (IntID == Intrinsic::riscv_vle)
11473 Ops.push_back(DAG.getUNDEF(ContainerVT));
11474 else
11475 Ops.push_back(PassThru);
11476 Ops.push_back(BasePtr);
11477 if (IntID == Intrinsic::riscv_vle_mask)
11478 Ops.push_back(Mask);
11479 Ops.push_back(VL);
11480 if (IntID == Intrinsic::riscv_vle_mask)
11481 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11482
11483 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11484
11485 SDValue Result =
11486 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11487 Chain = Result.getValue(1);
11488 if (ExpandingVL) {
11489 MVT IndexVT = ContainerVT;
11490 if (ContainerVT.isFloatingPoint())
11491 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11492
11493 MVT IndexEltVT = IndexVT.getVectorElementType();
11494 bool UseVRGATHEREI16 = false;
11495 // If index vector is an i8 vector and the element count exceeds 256, we
11496 // should change the element type of index vector to i16 to avoid
11497 // overflow.
11498 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11499 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11500 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11501 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11502 UseVRGATHEREI16 = true;
11503 }
11504
11505 SDValue Iota =
11506 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11507 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11508 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11509 Result =
11510 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11512 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11513 }
11514
11515 if (VT.isFixedLengthVector())
11516 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11517
11518 return DAG.getMergeValues({Result, Chain}, DL);
11519}
11520
11521SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11522 SelectionDAG &DAG) const {
11523 SDLoc DL(Op);
11524
11525 const auto *MemSD = cast<MemSDNode>(Op);
11526 EVT MemVT = MemSD->getMemoryVT();
11527 MachineMemOperand *MMO = MemSD->getMemOperand();
11528 SDValue Chain = MemSD->getChain();
11529 SDValue BasePtr = MemSD->getBasePtr();
11530 SDValue Val, Mask, VL;
11531
11532 bool IsCompressingStore = false;
11533 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11534 Val = VPStore->getValue();
11535 Mask = VPStore->getMask();
11536 VL = VPStore->getVectorLength();
11537 } else {
11538 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11539 Val = MStore->getValue();
11540 Mask = MStore->getMask();
11541 IsCompressingStore = MStore->isCompressingStore();
11542 }
11543
11544 bool IsUnmasked =
11545 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11546
11547 MVT VT = Val.getSimpleValueType();
11548 MVT XLenVT = Subtarget.getXLenVT();
11549
11550 MVT ContainerVT = VT;
11551 if (VT.isFixedLengthVector()) {
11552 ContainerVT = getContainerForFixedLengthVector(VT);
11553
11554 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11555 if (!IsUnmasked || IsCompressingStore) {
11556 MVT MaskVT = getMaskTypeFor(ContainerVT);
11557 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11558 }
11559 }
11560
11561 if (!VL)
11562 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11563
11564 if (IsCompressingStore) {
11565 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11566 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11567 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11568 VL =
11569 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11570 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11571 }
11572
11573 unsigned IntID =
11574 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11575 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11576 Ops.push_back(Val);
11577 Ops.push_back(BasePtr);
11578 if (!IsUnmasked)
11579 Ops.push_back(Mask);
11580 Ops.push_back(VL);
11581
11583 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11584}
11585
11586SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11587 SelectionDAG &DAG) const {
11588 SDLoc DL(Op);
11589 SDValue Val = Op.getOperand(0);
11590 SDValue Mask = Op.getOperand(1);
11591 SDValue Passthru = Op.getOperand(2);
11592
11593 MVT VT = Val.getSimpleValueType();
11594 MVT XLenVT = Subtarget.getXLenVT();
11595 MVT ContainerVT = VT;
11596 if (VT.isFixedLengthVector()) {
11597 ContainerVT = getContainerForFixedLengthVector(VT);
11598 MVT MaskVT = getMaskTypeFor(ContainerVT);
11599 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11600 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11601 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11602 }
11603
11604 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11605 SDValue Res =
11606 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11607 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11608 Passthru, Val, Mask, VL);
11609
11610 if (VT.isFixedLengthVector())
11611 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11612
11613 return Res;
11614}
11615
11616SDValue
11617RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 MVT InVT = Op.getOperand(0).getSimpleValueType();
11620 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11621
11622 MVT VT = Op.getSimpleValueType();
11623
11624 SDValue Op1 =
11625 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11626 SDValue Op2 =
11627 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11628
11629 SDLoc DL(Op);
11630 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11631 DAG, Subtarget);
11632 MVT MaskVT = getMaskTypeFor(ContainerVT);
11633
11634 SDValue Cmp =
11635 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11636 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11637
11638 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11639}
11640
11641SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11642 SelectionDAG &DAG) const {
11643 unsigned Opc = Op.getOpcode();
11644 SDLoc DL(Op);
11645 SDValue Chain = Op.getOperand(0);
11646 SDValue Op1 = Op.getOperand(1);
11647 SDValue Op2 = Op.getOperand(2);
11648 SDValue CC = Op.getOperand(3);
11649 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11650 MVT VT = Op.getSimpleValueType();
11651 MVT InVT = Op1.getSimpleValueType();
11652
11653 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11654 // condition code.
11655 if (Opc == ISD::STRICT_FSETCCS) {
11656 // Expand strict_fsetccs(x, oeq) to
11657 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11658 SDVTList VTList = Op->getVTList();
11659 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11660 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11661 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11662 Op2, OLECCVal);
11663 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11664 Op1, OLECCVal);
11665 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11666 Tmp1.getValue(1), Tmp2.getValue(1));
11667 // Tmp1 and Tmp2 might be the same node.
11668 if (Tmp1 != Tmp2)
11669 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11670 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11671 }
11672
11673 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11674 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11675 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11676 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11677 Op2, OEQCCVal);
11678 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11679 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11680 }
11681 }
11682
11683 MVT ContainerInVT = InVT;
11684 if (InVT.isFixedLengthVector()) {
11685 ContainerInVT = getContainerForFixedLengthVector(InVT);
11686 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11687 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11688 }
11689 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11690
11691 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11692
11693 SDValue Res;
11694 if (Opc == ISD::STRICT_FSETCC &&
11695 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11696 CCVal == ISD::SETOLE)) {
11697 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11698 // active when both input elements are ordered.
11699 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11700 SDValue OrderMask1 = DAG.getNode(
11701 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11702 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11703 True, VL});
11704 SDValue OrderMask2 = DAG.getNode(
11705 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11706 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11707 True, VL});
11708 Mask =
11709 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11710 // Use Mask as the passthru operand to let the result be 0 if either of the
11711 // inputs is unordered.
11713 DAG.getVTList(MaskVT, MVT::Other),
11714 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11715 } else {
11716 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11718 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11719 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11720 }
11721
11722 if (VT.isFixedLengthVector()) {
11723 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11724 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11725 }
11726 return Res;
11727}
11728
11729// Lower vector ABS to smax(X, sub(0, X)).
11730SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11731 SDLoc DL(Op);
11732 MVT VT = Op.getSimpleValueType();
11733 SDValue X = Op.getOperand(0);
11734
11735 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11736 "Unexpected type for ISD::ABS");
11737
11738 MVT ContainerVT = VT;
11739 if (VT.isFixedLengthVector()) {
11740 ContainerVT = getContainerForFixedLengthVector(VT);
11741 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11742 }
11743
11744 SDValue Mask, VL;
11745 if (Op->getOpcode() == ISD::VP_ABS) {
11746 Mask = Op->getOperand(1);
11747 if (VT.isFixedLengthVector())
11748 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11749 Subtarget);
11750 VL = Op->getOperand(2);
11751 } else
11752 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11753
11754 SDValue SplatZero = DAG.getNode(
11755 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11756 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11757 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11758 DAG.getUNDEF(ContainerVT), Mask, VL);
11759 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11760 DAG.getUNDEF(ContainerVT), Mask, VL);
11761
11762 if (VT.isFixedLengthVector())
11763 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11764 return Max;
11765}
11766
11767SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11768 SDValue Op, SelectionDAG &DAG) const {
11769 SDLoc DL(Op);
11770 MVT VT = Op.getSimpleValueType();
11771 SDValue Mag = Op.getOperand(0);
11772 SDValue Sign = Op.getOperand(1);
11773 assert(Mag.getValueType() == Sign.getValueType() &&
11774 "Can only handle COPYSIGN with matching types.");
11775
11776 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11777 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11778 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11779
11780 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11781
11782 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11783 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11784
11785 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11786}
11787
11788SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11789 SDValue Op, SelectionDAG &DAG) const {
11790 MVT VT = Op.getSimpleValueType();
11791 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11792
11793 MVT I1ContainerVT =
11794 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11795
11796 SDValue CC =
11797 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11798 SDValue Op1 =
11799 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11800 SDValue Op2 =
11801 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11802
11803 SDLoc DL(Op);
11804 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11805
11806 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11807 Op2, DAG.getUNDEF(ContainerVT), VL);
11808
11809 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11810}
11811
11812SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11813 SelectionDAG &DAG) const {
11814 unsigned NewOpc = getRISCVVLOp(Op);
11815 bool HasPassthruOp = hasPassthruOp(NewOpc);
11816 bool HasMask = hasMaskOp(NewOpc);
11817
11818 MVT VT = Op.getSimpleValueType();
11819 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11820
11821 // Create list of operands by converting existing ones to scalable types.
11823 for (const SDValue &V : Op->op_values()) {
11824 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11825
11826 // Pass through non-vector operands.
11827 if (!V.getValueType().isVector()) {
11828 Ops.push_back(V);
11829 continue;
11830 }
11831
11832 // "cast" fixed length vector to a scalable vector.
11833 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11834 "Only fixed length vectors are supported!");
11835 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11836 }
11837
11838 SDLoc DL(Op);
11839 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11840 if (HasPassthruOp)
11841 Ops.push_back(DAG.getUNDEF(ContainerVT));
11842 if (HasMask)
11843 Ops.push_back(Mask);
11844 Ops.push_back(VL);
11845
11846 // StrictFP operations have two result values. Their lowered result should
11847 // have same result count.
11848 if (Op->isStrictFPOpcode()) {
11849 SDValue ScalableRes =
11850 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11851 Op->getFlags());
11852 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11853 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11854 }
11855
11856 SDValue ScalableRes =
11857 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11858 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11859}
11860
11861// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11862// * Operands of each node are assumed to be in the same order.
11863// * The EVL operand is promoted from i32 to i64 on RV64.
11864// * Fixed-length vectors are converted to their scalable-vector container
11865// types.
11866SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11867 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11868 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11869
11870 SDLoc DL(Op);
11871 MVT VT = Op.getSimpleValueType();
11873
11874 MVT ContainerVT = VT;
11875 if (VT.isFixedLengthVector())
11876 ContainerVT = getContainerForFixedLengthVector(VT);
11877
11878 for (const auto &OpIdx : enumerate(Op->ops())) {
11879 SDValue V = OpIdx.value();
11880 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11881 // Add dummy passthru value before the mask. Or if there isn't a mask,
11882 // before EVL.
11883 if (HasPassthruOp) {
11884 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11885 if (MaskIdx) {
11886 if (*MaskIdx == OpIdx.index())
11887 Ops.push_back(DAG.getUNDEF(ContainerVT));
11888 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11889 OpIdx.index()) {
11890 if (Op.getOpcode() == ISD::VP_MERGE) {
11891 // For VP_MERGE, copy the false operand instead of an undef value.
11892 Ops.push_back(Ops.back());
11893 } else {
11894 assert(Op.getOpcode() == ISD::VP_SELECT);
11895 // For VP_SELECT, add an undef value.
11896 Ops.push_back(DAG.getUNDEF(ContainerVT));
11897 }
11898 }
11899 }
11900 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11901 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11902 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11904 Subtarget.getXLenVT()));
11905 // Pass through operands which aren't fixed-length vectors.
11906 if (!V.getValueType().isFixedLengthVector()) {
11907 Ops.push_back(V);
11908 continue;
11909 }
11910 // "cast" fixed length vector to a scalable vector.
11911 MVT OpVT = V.getSimpleValueType();
11912 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11913 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11914 "Only fixed length vectors are supported!");
11915 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11916 }
11917
11918 if (!VT.isFixedLengthVector())
11919 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11920
11921 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11922
11923 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11924}
11925
11926SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11927 SelectionDAG &DAG) const {
11928 SDLoc DL(Op);
11929 MVT VT = Op.getSimpleValueType();
11930
11931 SDValue Src = Op.getOperand(0);
11932 // NOTE: Mask is dropped.
11933 SDValue VL = Op.getOperand(2);
11934
11935 MVT ContainerVT = VT;
11936 if (VT.isFixedLengthVector()) {
11937 ContainerVT = getContainerForFixedLengthVector(VT);
11938 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11939 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11940 }
11941
11942 MVT XLenVT = Subtarget.getXLenVT();
11943 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11944 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11945 DAG.getUNDEF(ContainerVT), Zero, VL);
11946
11947 SDValue SplatValue = DAG.getSignedConstant(
11948 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11949 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11950 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11951
11952 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11953 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11954 if (!VT.isFixedLengthVector())
11955 return Result;
11956 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11957}
11958
11959SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11960 SelectionDAG &DAG) const {
11961 SDLoc DL(Op);
11962 MVT VT = Op.getSimpleValueType();
11963
11964 SDValue Op1 = Op.getOperand(0);
11965 SDValue Op2 = Op.getOperand(1);
11966 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11967 // NOTE: Mask is dropped.
11968 SDValue VL = Op.getOperand(4);
11969
11970 MVT ContainerVT = VT;
11971 if (VT.isFixedLengthVector()) {
11972 ContainerVT = getContainerForFixedLengthVector(VT);
11973 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11974 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11975 }
11976
11978 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11979
11980 switch (Condition) {
11981 default:
11982 break;
11983 // X != Y --> (X^Y)
11984 case ISD::SETNE:
11985 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11986 break;
11987 // X == Y --> ~(X^Y)
11988 case ISD::SETEQ: {
11989 SDValue Temp =
11990 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11991 Result =
11992 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11993 break;
11994 }
11995 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11996 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11997 case ISD::SETGT:
11998 case ISD::SETULT: {
11999 SDValue Temp =
12000 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12001 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12002 break;
12003 }
12004 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12005 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12006 case ISD::SETLT:
12007 case ISD::SETUGT: {
12008 SDValue Temp =
12009 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12010 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12011 break;
12012 }
12013 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12014 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12015 case ISD::SETGE:
12016 case ISD::SETULE: {
12017 SDValue Temp =
12018 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12019 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12020 break;
12021 }
12022 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12023 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12024 case ISD::SETLE:
12025 case ISD::SETUGE: {
12026 SDValue Temp =
12027 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12028 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12029 break;
12030 }
12031 }
12032
12033 if (!VT.isFixedLengthVector())
12034 return Result;
12035 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12036}
12037
12038// Lower Floating-Point/Integer Type-Convert VP SDNodes
12039SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12040 SelectionDAG &DAG) const {
12041 SDLoc DL(Op);
12042
12043 SDValue Src = Op.getOperand(0);
12044 SDValue Mask = Op.getOperand(1);
12045 SDValue VL = Op.getOperand(2);
12046 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12047
12048 MVT DstVT = Op.getSimpleValueType();
12049 MVT SrcVT = Src.getSimpleValueType();
12050 if (DstVT.isFixedLengthVector()) {
12051 DstVT = getContainerForFixedLengthVector(DstVT);
12052 SrcVT = getContainerForFixedLengthVector(SrcVT);
12053 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12054 MVT MaskVT = getMaskTypeFor(DstVT);
12055 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12056 }
12057
12058 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12059 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12060
12062 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12063 if (SrcVT.isInteger()) {
12064 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12065
12066 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12069
12070 // Do we need to do any pre-widening before converting?
12071 if (SrcEltSize == 1) {
12072 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12073 MVT XLenVT = Subtarget.getXLenVT();
12074 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12075 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12076 DAG.getUNDEF(IntVT), Zero, VL);
12077 SDValue One = DAG.getSignedConstant(
12078 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12079 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12080 DAG.getUNDEF(IntVT), One, VL);
12081 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12082 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12083 } else if (DstEltSize > (2 * SrcEltSize)) {
12084 // Widen before converting.
12085 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12086 DstVT.getVectorElementCount());
12087 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12088 }
12089
12090 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12091 } else {
12092 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12093 "Wrong input/output vector types");
12094
12095 // Convert f16 to f32 then convert f32 to i64.
12096 if (DstEltSize > (2 * SrcEltSize)) {
12097 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12098 MVT InterimFVT =
12099 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12100 Src =
12101 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12102 }
12103
12104 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12105 }
12106 } else { // Narrowing + Conversion
12107 if (SrcVT.isInteger()) {
12108 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12109 // First do a narrowing convert to an FP type half the size, then round
12110 // the FP type to a small FP type if needed.
12111
12112 MVT InterimFVT = DstVT;
12113 if (SrcEltSize > (2 * DstEltSize)) {
12114 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12115 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12116 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12117 }
12118
12119 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12120
12121 if (InterimFVT != DstVT) {
12122 Src = Result;
12123 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12124 }
12125 } else {
12126 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12127 "Wrong input/output vector types");
12128 // First do a narrowing conversion to an integer half the size, then
12129 // truncate if needed.
12130
12131 if (DstEltSize == 1) {
12132 // First convert to the same size integer, then convert to mask using
12133 // setcc.
12134 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12135 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12136 DstVT.getVectorElementCount());
12137 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12138
12139 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12140 // otherwise the conversion was undefined.
12141 MVT XLenVT = Subtarget.getXLenVT();
12142 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12143 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12144 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12145 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12146 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12147 DAG.getUNDEF(DstVT), Mask, VL});
12148 } else {
12149 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12150 DstVT.getVectorElementCount());
12151
12152 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12153
12154 while (InterimIVT != DstVT) {
12155 SrcEltSize /= 2;
12156 Src = Result;
12157 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12158 DstVT.getVectorElementCount());
12159 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12160 Src, Mask, VL);
12161 }
12162 }
12163 }
12164 }
12165
12166 MVT VT = Op.getSimpleValueType();
12167 if (!VT.isFixedLengthVector())
12168 return Result;
12169 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12170}
12171
12172SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12173 SelectionDAG &DAG) const {
12174 SDLoc DL(Op);
12175 MVT VT = Op.getSimpleValueType();
12176 MVT XLenVT = Subtarget.getXLenVT();
12177
12178 SDValue Mask = Op.getOperand(0);
12179 SDValue TrueVal = Op.getOperand(1);
12180 SDValue FalseVal = Op.getOperand(2);
12181 SDValue VL = Op.getOperand(3);
12182
12183 // Use default legalization if a vector of EVL type would be legal.
12184 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12186 if (isTypeLegal(EVLVecVT))
12187 return SDValue();
12188
12189 MVT ContainerVT = VT;
12190 if (VT.isFixedLengthVector()) {
12191 ContainerVT = getContainerForFixedLengthVector(VT);
12192 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12193 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12194 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12195 }
12196
12197 // Promote to a vector of i8.
12198 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12199
12200 // Promote TrueVal and FalseVal using VLMax.
12201 // FIXME: Is there a better way to do this?
12202 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12203 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12204 DAG.getUNDEF(PromotedVT),
12205 DAG.getConstant(1, DL, XLenVT), VLMax);
12206 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12207 DAG.getUNDEF(PromotedVT),
12208 DAG.getConstant(0, DL, XLenVT), VLMax);
12209 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12210 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12211 // Any element past VL uses FalseVal, so use VLMax
12212 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12213 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12214
12215 // VP_MERGE the two promoted values.
12216 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12217 TrueVal, FalseVal, FalseVal, VL);
12218
12219 // Convert back to mask.
12220 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12221 SDValue Result = DAG.getNode(
12222 RISCVISD::SETCC_VL, DL, ContainerVT,
12223 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12224 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12225
12226 if (VT.isFixedLengthVector())
12227 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12228 return Result;
12229}
12230
12231SDValue
12232RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12233 SelectionDAG &DAG) const {
12234 SDLoc DL(Op);
12235
12236 SDValue Op1 = Op.getOperand(0);
12237 SDValue Op2 = Op.getOperand(1);
12238 SDValue Offset = Op.getOperand(2);
12239 SDValue Mask = Op.getOperand(3);
12240 SDValue EVL1 = Op.getOperand(4);
12241 SDValue EVL2 = Op.getOperand(5);
12242
12243 const MVT XLenVT = Subtarget.getXLenVT();
12244 MVT VT = Op.getSimpleValueType();
12245 MVT ContainerVT = VT;
12246 if (VT.isFixedLengthVector()) {
12247 ContainerVT = getContainerForFixedLengthVector(VT);
12248 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12249 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12250 MVT MaskVT = getMaskTypeFor(ContainerVT);
12251 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12252 }
12253
12254 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12255 if (IsMaskVector) {
12256 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12257
12258 // Expand input operands
12259 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12260 DAG.getUNDEF(ContainerVT),
12261 DAG.getConstant(1, DL, XLenVT), EVL1);
12262 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12263 DAG.getUNDEF(ContainerVT),
12264 DAG.getConstant(0, DL, XLenVT), EVL1);
12265 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12266 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12267
12268 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12269 DAG.getUNDEF(ContainerVT),
12270 DAG.getConstant(1, DL, XLenVT), EVL2);
12271 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12272 DAG.getUNDEF(ContainerVT),
12273 DAG.getConstant(0, DL, XLenVT), EVL2);
12274 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12275 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12276 }
12277
12278 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12279 SDValue DownOffset, UpOffset;
12280 if (ImmValue >= 0) {
12281 // The operand is a TargetConstant, we need to rebuild it as a regular
12282 // constant.
12283 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12284 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12285 } else {
12286 // The operand is a TargetConstant, we need to rebuild it as a regular
12287 // constant rather than negating the original operand.
12288 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12289 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12290 }
12291
12292 SDValue SlideDown =
12293 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12294 Op1, DownOffset, Mask, UpOffset);
12295 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12296 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12297
12298 if (IsMaskVector) {
12299 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12300 Result = DAG.getNode(
12301 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12302 {Result, DAG.getConstant(0, DL, ContainerVT),
12303 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12304 Mask, EVL2});
12305 }
12306
12307 if (!VT.isFixedLengthVector())
12308 return Result;
12309 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12310}
12311
12312SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12313 SelectionDAG &DAG) const {
12314 SDLoc DL(Op);
12315 SDValue Val = Op.getOperand(0);
12316 SDValue Mask = Op.getOperand(1);
12317 SDValue VL = Op.getOperand(2);
12318 MVT VT = Op.getSimpleValueType();
12319
12320 MVT ContainerVT = VT;
12321 if (VT.isFixedLengthVector()) {
12322 ContainerVT = getContainerForFixedLengthVector(VT);
12323 MVT MaskVT = getMaskTypeFor(ContainerVT);
12324 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12325 }
12326
12327 SDValue Result =
12328 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12329
12330 if (!VT.isFixedLengthVector())
12331 return Result;
12332 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12333}
12334
12335SDValue
12336RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12337 SelectionDAG &DAG) const {
12338 SDLoc DL(Op);
12339 MVT VT = Op.getSimpleValueType();
12340 MVT XLenVT = Subtarget.getXLenVT();
12341
12342 SDValue Op1 = Op.getOperand(0);
12343 SDValue Mask = Op.getOperand(1);
12344 SDValue EVL = Op.getOperand(2);
12345
12346 MVT ContainerVT = VT;
12347 if (VT.isFixedLengthVector()) {
12348 ContainerVT = getContainerForFixedLengthVector(VT);
12349 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12350 MVT MaskVT = getMaskTypeFor(ContainerVT);
12351 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12352 }
12353
12354 MVT GatherVT = ContainerVT;
12355 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12356 // Check if we are working with mask vectors
12357 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12358 if (IsMaskVector) {
12359 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12360
12361 // Expand input operand
12362 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12363 DAG.getUNDEF(IndicesVT),
12364 DAG.getConstant(1, DL, XLenVT), EVL);
12365 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12366 DAG.getUNDEF(IndicesVT),
12367 DAG.getConstant(0, DL, XLenVT), EVL);
12368 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12369 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12370 }
12371
12372 unsigned EltSize = GatherVT.getScalarSizeInBits();
12373 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12374 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12375 unsigned MaxVLMAX =
12376 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12377
12378 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12379 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12380 // to use vrgatherei16.vv.
12381 // TODO: It's also possible to use vrgatherei16.vv for other types to
12382 // decrease register width for the index calculation.
12383 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12384 if (MaxVLMAX > 256 && EltSize == 8) {
12385 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12386 // Split the vector in half and reverse each half using a full register
12387 // reverse.
12388 // Swap the halves and concatenate them.
12389 // Slide the concatenated result by (VLMax - VL).
12390 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12391 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12392 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12393
12394 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12395 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12396
12397 // Reassemble the low and high pieces reversed.
12398 // NOTE: this Result is unmasked (because we do not need masks for
12399 // shuffles). If in the future this has to change, we can use a SELECT_VL
12400 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12401 SDValue Result =
12402 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12403
12404 // Slide off any elements from past EVL that were reversed into the low
12405 // elements.
12406 unsigned MinElts = GatherVT.getVectorMinNumElements();
12407 SDValue VLMax =
12408 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12409 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12410
12411 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12412 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12413
12414 if (IsMaskVector) {
12415 // Truncate Result back to a mask vector
12416 Result =
12417 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12418 {Result, DAG.getConstant(0, DL, GatherVT),
12420 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12421 }
12422
12423 if (!VT.isFixedLengthVector())
12424 return Result;
12425 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12426 }
12427
12428 // Just promote the int type to i16 which will double the LMUL.
12429 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12430 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12431 }
12432
12433 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12434 SDValue VecLen =
12435 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12436 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12437 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12438 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12439 DAG.getUNDEF(IndicesVT), Mask, EVL);
12440 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12441 DAG.getUNDEF(GatherVT), Mask, EVL);
12442
12443 if (IsMaskVector) {
12444 // Truncate Result back to a mask vector
12445 Result = DAG.getNode(
12446 RISCVISD::SETCC_VL, DL, ContainerVT,
12447 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12448 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12449 }
12450
12451 if (!VT.isFixedLengthVector())
12452 return Result;
12453 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12454}
12455
12456SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12457 SelectionDAG &DAG) const {
12458 MVT VT = Op.getSimpleValueType();
12459 if (VT.getVectorElementType() != MVT::i1)
12460 return lowerVPOp(Op, DAG);
12461
12462 // It is safe to drop mask parameter as masked-off elements are undef.
12463 SDValue Op1 = Op->getOperand(0);
12464 SDValue Op2 = Op->getOperand(1);
12465 SDValue VL = Op->getOperand(3);
12466
12467 MVT ContainerVT = VT;
12468 const bool IsFixed = VT.isFixedLengthVector();
12469 if (IsFixed) {
12470 ContainerVT = getContainerForFixedLengthVector(VT);
12471 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12472 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12473 }
12474
12475 SDLoc DL(Op);
12476 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12477 if (!IsFixed)
12478 return Val;
12479 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12480}
12481
12482SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12483 SelectionDAG &DAG) const {
12484 SDLoc DL(Op);
12485 MVT XLenVT = Subtarget.getXLenVT();
12486 MVT VT = Op.getSimpleValueType();
12487 MVT ContainerVT = VT;
12488 if (VT.isFixedLengthVector())
12489 ContainerVT = getContainerForFixedLengthVector(VT);
12490
12491 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12492
12493 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12494 // Check if the mask is known to be all ones
12495 SDValue Mask = VPNode->getMask();
12496 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12497
12498 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12499 : Intrinsic::riscv_vlse_mask,
12500 DL, XLenVT);
12501 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12502 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12503 VPNode->getStride()};
12504 if (!IsUnmasked) {
12505 if (VT.isFixedLengthVector()) {
12506 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12507 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12508 }
12509 Ops.push_back(Mask);
12510 }
12511 Ops.push_back(VPNode->getVectorLength());
12512 if (!IsUnmasked) {
12513 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12514 Ops.push_back(Policy);
12515 }
12516
12517 SDValue Result =
12519 VPNode->getMemoryVT(), VPNode->getMemOperand());
12520 SDValue Chain = Result.getValue(1);
12521
12522 if (VT.isFixedLengthVector())
12523 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12524
12525 return DAG.getMergeValues({Result, Chain}, DL);
12526}
12527
12528SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12529 SelectionDAG &DAG) const {
12530 SDLoc DL(Op);
12531 MVT XLenVT = Subtarget.getXLenVT();
12532
12533 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12534 SDValue StoreVal = VPNode->getValue();
12535 MVT VT = StoreVal.getSimpleValueType();
12536 MVT ContainerVT = VT;
12537 if (VT.isFixedLengthVector()) {
12538 ContainerVT = getContainerForFixedLengthVector(VT);
12539 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12540 }
12541
12542 // Check if the mask is known to be all ones
12543 SDValue Mask = VPNode->getMask();
12544 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12545
12546 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12547 : Intrinsic::riscv_vsse_mask,
12548 DL, XLenVT);
12549 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12550 VPNode->getBasePtr(), VPNode->getStride()};
12551 if (!IsUnmasked) {
12552 if (VT.isFixedLengthVector()) {
12553 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12554 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12555 }
12556 Ops.push_back(Mask);
12557 }
12558 Ops.push_back(VPNode->getVectorLength());
12559
12560 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12561 Ops, VPNode->getMemoryVT(),
12562 VPNode->getMemOperand());
12563}
12564
12565// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12566// matched to a RVV indexed load. The RVV indexed load instructions only
12567// support the "unsigned unscaled" addressing mode; indices are implicitly
12568// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12569// signed or scaled indexing is extended to the XLEN value type and scaled
12570// accordingly.
12571SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12572 SelectionDAG &DAG) const {
12573 SDLoc DL(Op);
12574 MVT VT = Op.getSimpleValueType();
12575
12576 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12577 EVT MemVT = MemSD->getMemoryVT();
12578 MachineMemOperand *MMO = MemSD->getMemOperand();
12579 SDValue Chain = MemSD->getChain();
12580 SDValue BasePtr = MemSD->getBasePtr();
12581
12582 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12583 SDValue Index, Mask, PassThru, VL;
12584
12585 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12586 Index = VPGN->getIndex();
12587 Mask = VPGN->getMask();
12588 PassThru = DAG.getUNDEF(VT);
12589 VL = VPGN->getVectorLength();
12590 // VP doesn't support extending loads.
12592 } else {
12593 // Else it must be a MGATHER.
12594 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12595 Index = MGN->getIndex();
12596 Mask = MGN->getMask();
12597 PassThru = MGN->getPassThru();
12598 LoadExtType = MGN->getExtensionType();
12599 }
12600
12601 MVT IndexVT = Index.getSimpleValueType();
12602 MVT XLenVT = Subtarget.getXLenVT();
12603
12605 "Unexpected VTs!");
12606 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12607 // Targets have to explicitly opt-in for extending vector loads.
12608 assert(LoadExtType == ISD::NON_EXTLOAD &&
12609 "Unexpected extending MGATHER/VP_GATHER");
12610
12611 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12612 // the selection of the masked intrinsics doesn't do this for us.
12613 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12614
12615 MVT ContainerVT = VT;
12616 if (VT.isFixedLengthVector()) {
12617 ContainerVT = getContainerForFixedLengthVector(VT);
12618 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12619 ContainerVT.getVectorElementCount());
12620
12621 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12622
12623 if (!IsUnmasked) {
12624 MVT MaskVT = getMaskTypeFor(ContainerVT);
12625 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12626 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12627 }
12628 }
12629
12630 if (!VL)
12631 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12632
12633 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12634 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12635 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12636 }
12637
12638 unsigned IntID =
12639 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12640 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12641 if (IsUnmasked)
12642 Ops.push_back(DAG.getUNDEF(ContainerVT));
12643 else
12644 Ops.push_back(PassThru);
12645 Ops.push_back(BasePtr);
12646 Ops.push_back(Index);
12647 if (!IsUnmasked)
12648 Ops.push_back(Mask);
12649 Ops.push_back(VL);
12650 if (!IsUnmasked)
12652
12653 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12654 SDValue Result =
12655 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12656 Chain = Result.getValue(1);
12657
12658 if (VT.isFixedLengthVector())
12659 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12660
12661 return DAG.getMergeValues({Result, Chain}, DL);
12662}
12663
12664// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12665// matched to a RVV indexed store. The RVV indexed store instructions only
12666// support the "unsigned unscaled" addressing mode; indices are implicitly
12667// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12668// signed or scaled indexing is extended to the XLEN value type and scaled
12669// accordingly.
12670SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12671 SelectionDAG &DAG) const {
12672 SDLoc DL(Op);
12673 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12674 EVT MemVT = MemSD->getMemoryVT();
12675 MachineMemOperand *MMO = MemSD->getMemOperand();
12676 SDValue Chain = MemSD->getChain();
12677 SDValue BasePtr = MemSD->getBasePtr();
12678
12679 [[maybe_unused]] bool IsTruncatingStore = false;
12680 SDValue Index, Mask, Val, VL;
12681
12682 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12683 Index = VPSN->getIndex();
12684 Mask = VPSN->getMask();
12685 Val = VPSN->getValue();
12686 VL = VPSN->getVectorLength();
12687 // VP doesn't support truncating stores.
12688 IsTruncatingStore = false;
12689 } else {
12690 // Else it must be a MSCATTER.
12691 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12692 Index = MSN->getIndex();
12693 Mask = MSN->getMask();
12694 Val = MSN->getValue();
12695 IsTruncatingStore = MSN->isTruncatingStore();
12696 }
12697
12698 MVT VT = Val.getSimpleValueType();
12699 MVT IndexVT = Index.getSimpleValueType();
12700 MVT XLenVT = Subtarget.getXLenVT();
12701
12703 "Unexpected VTs!");
12704 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12705 // Targets have to explicitly opt-in for extending vector loads and
12706 // truncating vector stores.
12707 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12708
12709 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12710 // the selection of the masked intrinsics doesn't do this for us.
12711 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12712
12713 MVT ContainerVT = VT;
12714 if (VT.isFixedLengthVector()) {
12715 ContainerVT = getContainerForFixedLengthVector(VT);
12716 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12717 ContainerVT.getVectorElementCount());
12718
12719 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12720 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12721
12722 if (!IsUnmasked) {
12723 MVT MaskVT = getMaskTypeFor(ContainerVT);
12724 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12725 }
12726 }
12727
12728 if (!VL)
12729 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12730
12731 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12732 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12733 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12734 }
12735
12736 unsigned IntID =
12737 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12738 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12739 Ops.push_back(Val);
12740 Ops.push_back(BasePtr);
12741 Ops.push_back(Index);
12742 if (!IsUnmasked)
12743 Ops.push_back(Mask);
12744 Ops.push_back(VL);
12745
12747 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12748}
12749
12750SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12751 SelectionDAG &DAG) const {
12752 const MVT XLenVT = Subtarget.getXLenVT();
12753 SDLoc DL(Op);
12754 SDValue Chain = Op->getOperand(0);
12755 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12756 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12757 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12758
12759 // Encoding used for rounding mode in RISC-V differs from that used in
12760 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12761 // table, which consists of a sequence of 4-bit fields, each representing
12762 // corresponding FLT_ROUNDS mode.
12763 static const int Table =
12769
12770 SDValue Shift =
12771 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12772 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12773 DAG.getConstant(Table, DL, XLenVT), Shift);
12774 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12775 DAG.getConstant(7, DL, XLenVT));
12776
12777 return DAG.getMergeValues({Masked, Chain}, DL);
12778}
12779
12780SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12781 SelectionDAG &DAG) const {
12782 const MVT XLenVT = Subtarget.getXLenVT();
12783 SDLoc DL(Op);
12784 SDValue Chain = Op->getOperand(0);
12785 SDValue RMValue = Op->getOperand(1);
12786 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12787
12788 // Encoding used for rounding mode in RISC-V differs from that used in
12789 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12790 // a table, which consists of a sequence of 4-bit fields, each representing
12791 // corresponding RISC-V mode.
12792 static const unsigned Table =
12798
12799 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12800
12801 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12802 DAG.getConstant(2, DL, XLenVT));
12803 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12804 DAG.getConstant(Table, DL, XLenVT), Shift);
12805 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12806 DAG.getConstant(0x7, DL, XLenVT));
12807 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12808 RMValue);
12809}
12810
12811SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12812 SelectionDAG &DAG) const {
12814
12815 bool isRISCV64 = Subtarget.is64Bit();
12816 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12817
12818 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12819 return DAG.getFrameIndex(FI, PtrVT);
12820}
12821
12822// Returns the opcode of the target-specific SDNode that implements the 32-bit
12823// form of the given Opcode.
12824static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12825 switch (Opcode) {
12826 default:
12827 llvm_unreachable("Unexpected opcode");
12828 case ISD::SHL:
12829 return RISCVISD::SLLW;
12830 case ISD::SRA:
12831 return RISCVISD::SRAW;
12832 case ISD::SRL:
12833 return RISCVISD::SRLW;
12834 case ISD::SDIV:
12835 return RISCVISD::DIVW;
12836 case ISD::UDIV:
12837 return RISCVISD::DIVUW;
12838 case ISD::UREM:
12839 return RISCVISD::REMUW;
12840 case ISD::ROTL:
12841 return RISCVISD::ROLW;
12842 case ISD::ROTR:
12843 return RISCVISD::RORW;
12844 }
12845}
12846
12847// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12848// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12849// otherwise be promoted to i64, making it difficult to select the
12850// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12851// type i8/i16/i32 is lost.
12853 unsigned ExtOpc = ISD::ANY_EXTEND) {
12854 SDLoc DL(N);
12855 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12856 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12857 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12858 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12859 // ReplaceNodeResults requires we maintain the same type for the return value.
12860 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12861}
12862
12863// Converts the given 32-bit operation to a i64 operation with signed extension
12864// semantic to reduce the signed extension instructions.
12866 SDLoc DL(N);
12867 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12868 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12869 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12870 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12871 DAG.getValueType(MVT::i32));
12872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12873}
12874
12877 SelectionDAG &DAG) const {
12878 SDLoc DL(N);
12879 switch (N->getOpcode()) {
12880 default:
12881 llvm_unreachable("Don't know how to custom type legalize this operation!");
12884 case ISD::FP_TO_SINT:
12885 case ISD::FP_TO_UINT: {
12886 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12887 "Unexpected custom legalisation");
12888 bool IsStrict = N->isStrictFPOpcode();
12889 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12890 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12891 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12892 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12894 if (!isTypeLegal(Op0.getValueType()))
12895 return;
12896 if (IsStrict) {
12897 SDValue Chain = N->getOperand(0);
12898 // In absense of Zfh, promote f16 to f32, then convert.
12899 if (Op0.getValueType() == MVT::f16 &&
12900 !Subtarget.hasStdExtZfhOrZhinx()) {
12901 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12902 {Chain, Op0});
12903 Chain = Op0.getValue(1);
12904 }
12905 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12907 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12908 SDValue Res = DAG.getNode(
12909 Opc, DL, VTs, Chain, Op0,
12910 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12911 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12912 Results.push_back(Res.getValue(1));
12913 return;
12914 }
12915 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12916 // convert.
12917 if ((Op0.getValueType() == MVT::f16 &&
12918 !Subtarget.hasStdExtZfhOrZhinx()) ||
12919 Op0.getValueType() == MVT::bf16)
12920 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12921
12922 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12923 SDValue Res =
12924 DAG.getNode(Opc, DL, MVT::i64, Op0,
12925 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12927 return;
12928 }
12929 // If the FP type needs to be softened, emit a library call using the 'si'
12930 // version. If we left it to default legalization we'd end up with 'di'. If
12931 // the FP type doesn't need to be softened just let generic type
12932 // legalization promote the result type.
12933 RTLIB::Libcall LC;
12934 if (IsSigned)
12935 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12936 else
12937 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12938 MakeLibCallOptions CallOptions;
12939 EVT OpVT = Op0.getValueType();
12940 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12941 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12942 SDValue Result;
12943 std::tie(Result, Chain) =
12944 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12945 Results.push_back(Result);
12946 if (IsStrict)
12947 Results.push_back(Chain);
12948 break;
12949 }
12950 case ISD::LROUND: {
12951 SDValue Op0 = N->getOperand(0);
12952 EVT Op0VT = Op0.getValueType();
12953 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12955 if (!isTypeLegal(Op0VT))
12956 return;
12957
12958 // In absense of Zfh, promote f16 to f32, then convert.
12959 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12960 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12961
12962 SDValue Res =
12963 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12964 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12965 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12966 return;
12967 }
12968 // If the FP type needs to be softened, emit a library call to lround. We'll
12969 // need to truncate the result. We assume any value that doesn't fit in i32
12970 // is allowed to return an unspecified value.
12971 RTLIB::Libcall LC =
12972 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12973 MakeLibCallOptions CallOptions;
12974 EVT OpVT = Op0.getValueType();
12975 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12976 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12977 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12978 Results.push_back(Result);
12979 break;
12980 }
12983 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12984 "has custom type legalization on riscv32");
12985
12986 SDValue LoCounter, HiCounter;
12987 MVT XLenVT = Subtarget.getXLenVT();
12988 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12989 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
12990 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
12991 } else {
12992 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
12993 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
12994 }
12995 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12997 N->getOperand(0), LoCounter, HiCounter);
12998
12999 Results.push_back(
13000 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13001 Results.push_back(RCW.getValue(2));
13002 break;
13003 }
13004 case ISD::LOAD: {
13005 if (!ISD::isNON_EXTLoad(N))
13006 return;
13007
13008 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13009 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13010 LoadSDNode *Ld = cast<LoadSDNode>(N);
13011
13012 SDLoc dl(N);
13013 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13014 Ld->getBasePtr(), Ld->getMemoryVT(),
13015 Ld->getMemOperand());
13016 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13017 Results.push_back(Res.getValue(1));
13018 return;
13019 }
13020 case ISD::MUL: {
13021 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13022 unsigned XLen = Subtarget.getXLen();
13023 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13024 if (Size > XLen) {
13025 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13026 SDValue LHS = N->getOperand(0);
13027 SDValue RHS = N->getOperand(1);
13028 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13029
13030 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13031 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13032 // We need exactly one side to be unsigned.
13033 if (LHSIsU == RHSIsU)
13034 return;
13035
13036 auto MakeMULPair = [&](SDValue S, SDValue U) {
13037 MVT XLenVT = Subtarget.getXLenVT();
13038 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13039 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13040 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13041 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13042 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13043 };
13044
13045 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13046 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13047
13048 // The other operand should be signed, but still prefer MULH when
13049 // possible.
13050 if (RHSIsU && LHSIsS && !RHSIsS)
13051 Results.push_back(MakeMULPair(LHS, RHS));
13052 else if (LHSIsU && RHSIsS && !LHSIsS)
13053 Results.push_back(MakeMULPair(RHS, LHS));
13054
13055 return;
13056 }
13057 [[fallthrough]];
13058 }
13059 case ISD::ADD:
13060 case ISD::SUB:
13061 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13062 "Unexpected custom legalisation");
13063 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13064 break;
13065 case ISD::SHL:
13066 case ISD::SRA:
13067 case ISD::SRL:
13068 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13069 "Unexpected custom legalisation");
13070 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13071 // If we can use a BSET instruction, allow default promotion to apply.
13072 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13073 isOneConstant(N->getOperand(0)))
13074 break;
13075 Results.push_back(customLegalizeToWOp(N, DAG));
13076 break;
13077 }
13078
13079 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13080 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13081 // shift amount.
13082 if (N->getOpcode() == ISD::SHL) {
13083 SDLoc DL(N);
13084 SDValue NewOp0 =
13085 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13086 SDValue NewOp1 =
13087 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13088 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13089 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13090 DAG.getValueType(MVT::i32));
13091 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13092 }
13093
13094 break;
13095 case ISD::ROTL:
13096 case ISD::ROTR:
13097 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13098 "Unexpected custom legalisation");
13099 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13100 Subtarget.hasVendorXTHeadBb()) &&
13101 "Unexpected custom legalization");
13102 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13103 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13104 return;
13105 Results.push_back(customLegalizeToWOp(N, DAG));
13106 break;
13107 case ISD::CTTZ:
13109 case ISD::CTLZ:
13110 case ISD::CTLZ_ZERO_UNDEF: {
13111 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13112 "Unexpected custom legalisation");
13113
13114 SDValue NewOp0 =
13115 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13116 bool IsCTZ =
13117 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13118 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13119 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13120 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13121 return;
13122 }
13123 case ISD::SDIV:
13124 case ISD::UDIV:
13125 case ISD::UREM: {
13126 MVT VT = N->getSimpleValueType(0);
13127 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13128 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13129 "Unexpected custom legalisation");
13130 // Don't promote division/remainder by constant since we should expand those
13131 // to multiply by magic constant.
13133 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13134 !isIntDivCheap(N->getValueType(0), Attr))
13135 return;
13136
13137 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13138 // the upper 32 bits. For other types we need to sign or zero extend
13139 // based on the opcode.
13140 unsigned ExtOpc = ISD::ANY_EXTEND;
13141 if (VT != MVT::i32)
13142 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13144
13145 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13146 break;
13147 }
13148 case ISD::SADDO: {
13149 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13150 "Unexpected custom legalisation");
13151
13152 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13153 // use the default legalization.
13154 if (!isa<ConstantSDNode>(N->getOperand(1)))
13155 return;
13156
13157 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13158 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13159 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13160 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13161 DAG.getValueType(MVT::i32));
13162
13163 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13164
13165 // For an addition, the result should be less than one of the operands (LHS)
13166 // if and only if the other operand (RHS) is negative, otherwise there will
13167 // be overflow.
13168 // For a subtraction, the result should be less than one of the operands
13169 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13170 // otherwise there will be overflow.
13171 EVT OType = N->getValueType(1);
13172 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13173 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13174
13175 SDValue Overflow =
13176 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13177 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13178 Results.push_back(Overflow);
13179 return;
13180 }
13181 case ISD::UADDO:
13182 case ISD::USUBO: {
13183 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13184 "Unexpected custom legalisation");
13185 bool IsAdd = N->getOpcode() == ISD::UADDO;
13186 // Create an ADDW or SUBW.
13187 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13188 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13189 SDValue Res =
13190 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13191 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13192 DAG.getValueType(MVT::i32));
13193
13194 SDValue Overflow;
13195 if (IsAdd && isOneConstant(RHS)) {
13196 // Special case uaddo X, 1 overflowed if the addition result is 0.
13197 // The general case (X + C) < C is not necessarily beneficial. Although we
13198 // reduce the live range of X, we may introduce the materialization of
13199 // constant C, especially when the setcc result is used by branch. We have
13200 // no compare with constant and branch instructions.
13201 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13202 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13203 } else if (IsAdd && isAllOnesConstant(RHS)) {
13204 // Special case uaddo X, -1 overflowed if X != 0.
13205 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13206 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13207 } else {
13208 // Sign extend the LHS and perform an unsigned compare with the ADDW
13209 // result. Since the inputs are sign extended from i32, this is equivalent
13210 // to comparing the lower 32 bits.
13211 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13212 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13213 IsAdd ? ISD::SETULT : ISD::SETUGT);
13214 }
13215
13216 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13217 Results.push_back(Overflow);
13218 return;
13219 }
13220 case ISD::UADDSAT:
13221 case ISD::USUBSAT: {
13222 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13223 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13224 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13225 // promotion for UADDO/USUBO.
13226 Results.push_back(expandAddSubSat(N, DAG));
13227 return;
13228 }
13229 case ISD::SADDSAT:
13230 case ISD::SSUBSAT: {
13231 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13232 "Unexpected custom legalisation");
13233 Results.push_back(expandAddSubSat(N, DAG));
13234 return;
13235 }
13236 case ISD::ABS: {
13237 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13238 "Unexpected custom legalisation");
13239
13240 if (Subtarget.hasStdExtZbb()) {
13241 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13242 // This allows us to remember that the result is sign extended. Expanding
13243 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13244 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13245 N->getOperand(0));
13246 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13247 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13248 return;
13249 }
13250
13251 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13252 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13253
13254 // Freeze the source so we can increase it's use count.
13255 Src = DAG.getFreeze(Src);
13256
13257 // Copy sign bit to all bits using the sraiw pattern.
13258 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13259 DAG.getValueType(MVT::i32));
13260 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13261 DAG.getConstant(31, DL, MVT::i64));
13262
13263 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13264 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13265
13266 // NOTE: The result is only required to be anyextended, but sext is
13267 // consistent with type legalization of sub.
13268 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13269 DAG.getValueType(MVT::i32));
13270 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13271 return;
13272 }
13273 case ISD::BITCAST: {
13274 EVT VT = N->getValueType(0);
13275 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13276 SDValue Op0 = N->getOperand(0);
13277 EVT Op0VT = Op0.getValueType();
13278 MVT XLenVT = Subtarget.getXLenVT();
13279 if (VT == MVT::i16 &&
13280 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13281 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13282 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13283 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13284 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13285 Subtarget.hasStdExtFOrZfinx()) {
13286 SDValue FPConv =
13287 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13288 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13289 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13290 Subtarget.hasStdExtDOrZdinx()) {
13291 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13292 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13293 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13294 NewReg.getValue(0), NewReg.getValue(1));
13295 Results.push_back(RetReg);
13296 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13297 isTypeLegal(Op0VT)) {
13298 // Custom-legalize bitcasts from fixed-length vector types to illegal
13299 // scalar types in order to improve codegen. Bitcast the vector to a
13300 // one-element vector type whose element type is the same as the result
13301 // type, and extract the first element.
13302 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13303 if (isTypeLegal(BVT)) {
13304 SDValue BVec = DAG.getBitcast(BVT, Op0);
13305 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13306 DAG.getVectorIdxConstant(0, DL)));
13307 }
13308 }
13309 break;
13310 }
13311 case RISCVISD::BREV8:
13312 case RISCVISD::ORC_B: {
13313 MVT VT = N->getSimpleValueType(0);
13314 MVT XLenVT = Subtarget.getXLenVT();
13315 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13316 "Unexpected custom legalisation");
13317 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13318 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13319 "Unexpected extension");
13320 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13321 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13322 // ReplaceNodeResults requires we maintain the same type for the return
13323 // value.
13324 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13325 break;
13326 }
13328 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13329 // type is illegal (currently only vXi64 RV32).
13330 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13331 // transferred to the destination register. We issue two of these from the
13332 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13333 // first element.
13334 SDValue Vec = N->getOperand(0);
13335 SDValue Idx = N->getOperand(1);
13336
13337 // The vector type hasn't been legalized yet so we can't issue target
13338 // specific nodes if it needs legalization.
13339 // FIXME: We would manually legalize if it's important.
13340 if (!isTypeLegal(Vec.getValueType()))
13341 return;
13342
13343 MVT VecVT = Vec.getSimpleValueType();
13344
13345 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13346 VecVT.getVectorElementType() == MVT::i64 &&
13347 "Unexpected EXTRACT_VECTOR_ELT legalization");
13348
13349 // If this is a fixed vector, we need to convert it to a scalable vector.
13350 MVT ContainerVT = VecVT;
13351 if (VecVT.isFixedLengthVector()) {
13352 ContainerVT = getContainerForFixedLengthVector(VecVT);
13353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13354 }
13355
13356 MVT XLenVT = Subtarget.getXLenVT();
13357
13358 // Use a VL of 1 to avoid processing more elements than we need.
13359 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13360
13361 // Unless the index is known to be 0, we must slide the vector down to get
13362 // the desired element into index 0.
13363 if (!isNullConstant(Idx)) {
13364 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13365 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13366 }
13367
13368 // Extract the lower XLEN bits of the correct vector element.
13369 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13370
13371 // To extract the upper XLEN bits of the vector element, shift the first
13372 // element right by 32 bits and re-extract the lower XLEN bits.
13373 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13374 DAG.getUNDEF(ContainerVT),
13375 DAG.getConstant(32, DL, XLenVT), VL);
13376 SDValue LShr32 =
13377 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13378 DAG.getUNDEF(ContainerVT), Mask, VL);
13379
13380 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13381
13382 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13383 break;
13384 }
13386 unsigned IntNo = N->getConstantOperandVal(0);
13387 switch (IntNo) {
13388 default:
13390 "Don't know how to custom type legalize this intrinsic!");
13391 case Intrinsic::experimental_get_vector_length: {
13392 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13393 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13394 return;
13395 }
13396 case Intrinsic::experimental_cttz_elts: {
13397 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13398 Results.push_back(
13399 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13400 return;
13401 }
13402 case Intrinsic::riscv_orc_b:
13403 case Intrinsic::riscv_brev8:
13404 case Intrinsic::riscv_sha256sig0:
13405 case Intrinsic::riscv_sha256sig1:
13406 case Intrinsic::riscv_sha256sum0:
13407 case Intrinsic::riscv_sha256sum1:
13408 case Intrinsic::riscv_sm3p0:
13409 case Intrinsic::riscv_sm3p1: {
13410 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13411 return;
13412 unsigned Opc;
13413 switch (IntNo) {
13414 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13415 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13416 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13417 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13418 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13419 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13420 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13421 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13422 }
13423
13424 SDValue NewOp =
13425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13426 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13427 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13428 return;
13429 }
13430 case Intrinsic::riscv_sm4ks:
13431 case Intrinsic::riscv_sm4ed: {
13432 unsigned Opc =
13433 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13434 SDValue NewOp0 =
13435 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13436 SDValue NewOp1 =
13437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13438 SDValue Res =
13439 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13440 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13441 return;
13442 }
13443 case Intrinsic::riscv_mopr: {
13444 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13445 return;
13446 SDValue NewOp =
13447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13448 SDValue Res = DAG.getNode(
13449 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13450 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13451 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13452 return;
13453 }
13454 case Intrinsic::riscv_moprr: {
13455 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13456 return;
13457 SDValue NewOp0 =
13458 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13459 SDValue NewOp1 =
13460 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13461 SDValue Res = DAG.getNode(
13462 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13463 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13464 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13465 return;
13466 }
13467 case Intrinsic::riscv_clmul: {
13468 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13469 return;
13470
13471 SDValue NewOp0 =
13472 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13473 SDValue NewOp1 =
13474 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13475 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13476 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13477 return;
13478 }
13479 case Intrinsic::riscv_clmulh:
13480 case Intrinsic::riscv_clmulr: {
13481 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13482 return;
13483
13484 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13485 // to the full 128-bit clmul result of multiplying two xlen values.
13486 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13487 // upper 32 bits.
13488 //
13489 // The alternative is to mask the inputs to 32 bits and use clmul, but
13490 // that requires two shifts to mask each input without zext.w.
13491 // FIXME: If the inputs are known zero extended or could be freely
13492 // zero extended, the mask form would be better.
13493 SDValue NewOp0 =
13494 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13495 SDValue NewOp1 =
13496 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13497 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13498 DAG.getConstant(32, DL, MVT::i64));
13499 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13500 DAG.getConstant(32, DL, MVT::i64));
13501 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13503 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13504 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13505 DAG.getConstant(32, DL, MVT::i64));
13506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13507 return;
13508 }
13509 case Intrinsic::riscv_vmv_x_s: {
13510 EVT VT = N->getValueType(0);
13511 MVT XLenVT = Subtarget.getXLenVT();
13512 if (VT.bitsLT(XLenVT)) {
13513 // Simple case just extract using vmv.x.s and truncate.
13514 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13515 Subtarget.getXLenVT(), N->getOperand(1));
13516 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13517 return;
13518 }
13519
13520 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13521 "Unexpected custom legalization");
13522
13523 // We need to do the move in two steps.
13524 SDValue Vec = N->getOperand(1);
13525 MVT VecVT = Vec.getSimpleValueType();
13526
13527 // First extract the lower XLEN bits of the element.
13528 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13529
13530 // To extract the upper XLEN bits of the vector element, shift the first
13531 // element right by 32 bits and re-extract the lower XLEN bits.
13532 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13533
13534 SDValue ThirtyTwoV =
13535 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13536 DAG.getConstant(32, DL, XLenVT), VL);
13537 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13538 DAG.getUNDEF(VecVT), Mask, VL);
13539 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13540
13541 Results.push_back(
13542 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13543 break;
13544 }
13545 }
13546 break;
13547 }
13548 case ISD::VECREDUCE_ADD:
13549 case ISD::VECREDUCE_AND:
13550 case ISD::VECREDUCE_OR:
13551 case ISD::VECREDUCE_XOR:
13556 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13557 Results.push_back(V);
13558 break;
13559 case ISD::VP_REDUCE_ADD:
13560 case ISD::VP_REDUCE_AND:
13561 case ISD::VP_REDUCE_OR:
13562 case ISD::VP_REDUCE_XOR:
13563 case ISD::VP_REDUCE_SMAX:
13564 case ISD::VP_REDUCE_UMAX:
13565 case ISD::VP_REDUCE_SMIN:
13566 case ISD::VP_REDUCE_UMIN:
13567 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13568 Results.push_back(V);
13569 break;
13570 case ISD::GET_ROUNDING: {
13571 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13572 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13573 Results.push_back(Res.getValue(0));
13574 Results.push_back(Res.getValue(1));
13575 break;
13576 }
13577 }
13578}
13579
13580/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13581/// which corresponds to it.
13582static unsigned getVecReduceOpcode(unsigned Opc) {
13583 switch (Opc) {
13584 default:
13585 llvm_unreachable("Unhandled binary to transfrom reduction");
13586 case ISD::ADD:
13587 return ISD::VECREDUCE_ADD;
13588 case ISD::UMAX:
13589 return ISD::VECREDUCE_UMAX;
13590 case ISD::SMAX:
13591 return ISD::VECREDUCE_SMAX;
13592 case ISD::UMIN:
13593 return ISD::VECREDUCE_UMIN;
13594 case ISD::SMIN:
13595 return ISD::VECREDUCE_SMIN;
13596 case ISD::AND:
13597 return ISD::VECREDUCE_AND;
13598 case ISD::OR:
13599 return ISD::VECREDUCE_OR;
13600 case ISD::XOR:
13601 return ISD::VECREDUCE_XOR;
13602 case ISD::FADD:
13603 // Note: This is the associative form of the generic reduction opcode.
13604 return ISD::VECREDUCE_FADD;
13605 }
13606}
13607
13608/// Perform two related transforms whose purpose is to incrementally recognize
13609/// an explode_vector followed by scalar reduction as a vector reduction node.
13610/// This exists to recover from a deficiency in SLP which can't handle
13611/// forests with multiple roots sharing common nodes. In some cases, one
13612/// of the trees will be vectorized, and the other will remain (unprofitably)
13613/// scalarized.
13614static SDValue
13616 const RISCVSubtarget &Subtarget) {
13617
13618 // This transforms need to run before all integer types have been legalized
13619 // to i64 (so that the vector element type matches the add type), and while
13620 // it's safe to introduce odd sized vector types.
13622 return SDValue();
13623
13624 // Without V, this transform isn't useful. We could form the (illegal)
13625 // operations and let them be scalarized again, but there's really no point.
13626 if (!Subtarget.hasVInstructions())
13627 return SDValue();
13628
13629 const SDLoc DL(N);
13630 const EVT VT = N->getValueType(0);
13631 const unsigned Opc = N->getOpcode();
13632
13633 // For FADD, we only handle the case with reassociation allowed. We
13634 // could handle strict reduction order, but at the moment, there's no
13635 // known reason to, and the complexity isn't worth it.
13636 // TODO: Handle fminnum and fmaxnum here
13637 if (!VT.isInteger() &&
13638 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13639 return SDValue();
13640
13641 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13642 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13643 "Inconsistent mappings");
13644 SDValue LHS = N->getOperand(0);
13645 SDValue RHS = N->getOperand(1);
13646
13647 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13648 return SDValue();
13649
13650 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13651 std::swap(LHS, RHS);
13652
13653 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13654 !isa<ConstantSDNode>(RHS.getOperand(1)))
13655 return SDValue();
13656
13657 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13658 SDValue SrcVec = RHS.getOperand(0);
13659 EVT SrcVecVT = SrcVec.getValueType();
13660 assert(SrcVecVT.getVectorElementType() == VT);
13661 if (SrcVecVT.isScalableVector())
13662 return SDValue();
13663
13664 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13665 return SDValue();
13666
13667 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13668 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13669 // root of our reduction tree. TODO: We could extend this to any two
13670 // adjacent aligned constant indices if desired.
13671 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13672 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13673 uint64_t LHSIdx =
13674 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13675 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13676 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13677 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13678 DAG.getVectorIdxConstant(0, DL));
13679 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13680 }
13681 }
13682
13683 // Match (binop (reduce (extract_subvector V, 0),
13684 // (extract_vector_elt V, sizeof(SubVec))))
13685 // into a reduction of one more element from the original vector V.
13686 if (LHS.getOpcode() != ReduceOpc)
13687 return SDValue();
13688
13689 SDValue ReduceVec = LHS.getOperand(0);
13690 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13691 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13692 isNullConstant(ReduceVec.getOperand(1)) &&
13693 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13694 // For illegal types (e.g. 3xi32), most will be combined again into a
13695 // wider (hopefully legal) type. If this is a terminal state, we are
13696 // relying on type legalization here to produce something reasonable
13697 // and this lowering quality could probably be improved. (TODO)
13698 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13699 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13700 DAG.getVectorIdxConstant(0, DL));
13701 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13702 ReduceVec->getFlags() & N->getFlags());
13703 }
13704
13705 return SDValue();
13706}
13707
13708
13709// Try to fold (<bop> x, (reduction.<bop> vec, start))
13711 const RISCVSubtarget &Subtarget) {
13712 auto BinOpToRVVReduce = [](unsigned Opc) {
13713 switch (Opc) {
13714 default:
13715 llvm_unreachable("Unhandled binary to transfrom reduction");
13716 case ISD::ADD:
13718 case ISD::UMAX:
13720 case ISD::SMAX:
13722 case ISD::UMIN:
13724 case ISD::SMIN:
13726 case ISD::AND:
13728 case ISD::OR:
13730 case ISD::XOR:
13732 case ISD::FADD:
13734 case ISD::FMAXNUM:
13736 case ISD::FMINNUM:
13738 }
13739 };
13740
13741 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13742 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13743 isNullConstant(V.getOperand(1)) &&
13744 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13745 };
13746
13747 unsigned Opc = N->getOpcode();
13748 unsigned ReduceIdx;
13749 if (IsReduction(N->getOperand(0), Opc))
13750 ReduceIdx = 0;
13751 else if (IsReduction(N->getOperand(1), Opc))
13752 ReduceIdx = 1;
13753 else
13754 return SDValue();
13755
13756 // Skip if FADD disallows reassociation but the combiner needs.
13757 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13758 return SDValue();
13759
13760 SDValue Extract = N->getOperand(ReduceIdx);
13761 SDValue Reduce = Extract.getOperand(0);
13762 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13763 return SDValue();
13764
13765 SDValue ScalarV = Reduce.getOperand(2);
13766 EVT ScalarVT = ScalarV.getValueType();
13767 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13768 ScalarV.getOperand(0)->isUndef() &&
13769 isNullConstant(ScalarV.getOperand(2)))
13770 ScalarV = ScalarV.getOperand(1);
13771
13772 // Make sure that ScalarV is a splat with VL=1.
13773 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13774 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13775 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13776 return SDValue();
13777
13778 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13779 return SDValue();
13780
13781 // Check the scalar of ScalarV is neutral element
13782 // TODO: Deal with value other than neutral element.
13783 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13784 0))
13785 return SDValue();
13786
13787 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13788 // FIXME: We might be able to improve this if operand 0 is undef.
13789 if (!isNonZeroAVL(Reduce.getOperand(5)))
13790 return SDValue();
13791
13792 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13793
13794 SDLoc DL(N);
13795 SDValue NewScalarV =
13796 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13797 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13798
13799 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13800 if (ScalarVT != ScalarV.getValueType())
13801 NewScalarV =
13802 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13803 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13804
13805 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13806 NewScalarV, Reduce.getOperand(3),
13807 Reduce.getOperand(4), Reduce.getOperand(5)};
13808 SDValue NewReduce =
13809 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13810 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13811 Extract.getOperand(1));
13812}
13813
13814// Optimize (add (shl x, c0), (shl y, c1)) ->
13815// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13817 const RISCVSubtarget &Subtarget) {
13818 // Perform this optimization only in the zba extension.
13819 if (!Subtarget.hasStdExtZba())
13820 return SDValue();
13821
13822 // Skip for vector types and larger types.
13823 EVT VT = N->getValueType(0);
13824 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13825 return SDValue();
13826
13827 // The two operand nodes must be SHL and have no other use.
13828 SDValue N0 = N->getOperand(0);
13829 SDValue N1 = N->getOperand(1);
13830 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13831 !N0->hasOneUse() || !N1->hasOneUse())
13832 return SDValue();
13833
13834 // Check c0 and c1.
13835 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13836 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13837 if (!N0C || !N1C)
13838 return SDValue();
13839 int64_t C0 = N0C->getSExtValue();
13840 int64_t C1 = N1C->getSExtValue();
13841 if (C0 <= 0 || C1 <= 0)
13842 return SDValue();
13843
13844 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13845 int64_t Bits = std::min(C0, C1);
13846 int64_t Diff = std::abs(C0 - C1);
13847 if (Diff != 1 && Diff != 2 && Diff != 3)
13848 return SDValue();
13849
13850 // Build nodes.
13851 SDLoc DL(N);
13852 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13853 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13854 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13855 DAG.getConstant(Diff, DL, VT), NS);
13856 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13857}
13858
13859// Combine a constant select operand into its use:
13860//
13861// (and (select cond, -1, c), x)
13862// -> (select cond, x, (and x, c)) [AllOnes=1]
13863// (or (select cond, 0, c), x)
13864// -> (select cond, x, (or x, c)) [AllOnes=0]
13865// (xor (select cond, 0, c), x)
13866// -> (select cond, x, (xor x, c)) [AllOnes=0]
13867// (add (select cond, 0, c), x)
13868// -> (select cond, x, (add x, c)) [AllOnes=0]
13869// (sub x, (select cond, 0, c))
13870// -> (select cond, x, (sub x, c)) [AllOnes=0]
13872 SelectionDAG &DAG, bool AllOnes,
13873 const RISCVSubtarget &Subtarget) {
13874 EVT VT = N->getValueType(0);
13875
13876 // Skip vectors.
13877 if (VT.isVector())
13878 return SDValue();
13879
13880 if (!Subtarget.hasConditionalMoveFusion()) {
13881 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13882 if ((!Subtarget.hasStdExtZicond() &&
13883 !Subtarget.hasVendorXVentanaCondOps()) ||
13884 N->getOpcode() != ISD::AND)
13885 return SDValue();
13886
13887 // Maybe harmful when condition code has multiple use.
13888 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13889 return SDValue();
13890
13891 // Maybe harmful when VT is wider than XLen.
13892 if (VT.getSizeInBits() > Subtarget.getXLen())
13893 return SDValue();
13894 }
13895
13896 if ((Slct.getOpcode() != ISD::SELECT &&
13897 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13898 !Slct.hasOneUse())
13899 return SDValue();
13900
13901 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13903 };
13904
13905 bool SwapSelectOps;
13906 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13907 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13908 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13909 SDValue NonConstantVal;
13910 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13911 SwapSelectOps = false;
13912 NonConstantVal = FalseVal;
13913 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13914 SwapSelectOps = true;
13915 NonConstantVal = TrueVal;
13916 } else
13917 return SDValue();
13918
13919 // Slct is now know to be the desired identity constant when CC is true.
13920 TrueVal = OtherOp;
13921 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13922 // Unless SwapSelectOps says the condition should be false.
13923 if (SwapSelectOps)
13924 std::swap(TrueVal, FalseVal);
13925
13926 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13927 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13928 {Slct.getOperand(0), Slct.getOperand(1),
13929 Slct.getOperand(2), TrueVal, FalseVal});
13930
13931 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13932 {Slct.getOperand(0), TrueVal, FalseVal});
13933}
13934
13935// Attempt combineSelectAndUse on each operand of a commutative operator N.
13937 bool AllOnes,
13938 const RISCVSubtarget &Subtarget) {
13939 SDValue N0 = N->getOperand(0);
13940 SDValue N1 = N->getOperand(1);
13941 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13942 return Result;
13943 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13944 return Result;
13945 return SDValue();
13946}
13947
13948// Transform (add (mul x, c0), c1) ->
13949// (add (mul (add x, c1/c0), c0), c1%c0).
13950// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13951// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13952// to an infinite loop in DAGCombine if transformed.
13953// Or transform (add (mul x, c0), c1) ->
13954// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13955// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13956// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13957// lead to an infinite loop in DAGCombine if transformed.
13958// Or transform (add (mul x, c0), c1) ->
13959// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13960// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13961// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13962// lead to an infinite loop in DAGCombine if transformed.
13963// Or transform (add (mul x, c0), c1) ->
13964// (mul (add x, c1/c0), c0).
13965// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13967 const RISCVSubtarget &Subtarget) {
13968 // Skip for vector types and larger types.
13969 EVT VT = N->getValueType(0);
13970 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13971 return SDValue();
13972 // The first operand node must be a MUL and has no other use.
13973 SDValue N0 = N->getOperand(0);
13974 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13975 return SDValue();
13976 // Check if c0 and c1 match above conditions.
13977 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13978 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13979 if (!N0C || !N1C)
13980 return SDValue();
13981 // If N0C has multiple uses it's possible one of the cases in
13982 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13983 // in an infinite loop.
13984 if (!N0C->hasOneUse())
13985 return SDValue();
13986 int64_t C0 = N0C->getSExtValue();
13987 int64_t C1 = N1C->getSExtValue();
13988 int64_t CA, CB;
13989 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13990 return SDValue();
13991 // Search for proper CA (non-zero) and CB that both are simm12.
13992 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13993 !isInt<12>(C0 * (C1 / C0))) {
13994 CA = C1 / C0;
13995 CB = C1 % C0;
13996 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13997 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13998 CA = C1 / C0 + 1;
13999 CB = C1 % C0 - C0;
14000 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14001 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14002 CA = C1 / C0 - 1;
14003 CB = C1 % C0 + C0;
14004 } else
14005 return SDValue();
14006 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14007 SDLoc DL(N);
14008 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14009 DAG.getSignedConstant(CA, DL, VT));
14010 SDValue New1 =
14011 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14012 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14013}
14014
14015// add (zext, zext) -> zext (add (zext, zext))
14016// sub (zext, zext) -> sext (sub (zext, zext))
14017// mul (zext, zext) -> zext (mul (zext, zext))
14018// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14019// udiv (zext, zext) -> zext (udiv (zext, zext))
14020// srem (zext, zext) -> zext (srem (zext, zext))
14021// urem (zext, zext) -> zext (urem (zext, zext))
14022//
14023// where the sum of the extend widths match, and the the range of the bin op
14024// fits inside the width of the narrower bin op. (For profitability on rvv, we
14025// use a power of two for both inner and outer extend.)
14027
14028 EVT VT = N->getValueType(0);
14029 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14030 return SDValue();
14031
14032 SDValue N0 = N->getOperand(0);
14033 SDValue N1 = N->getOperand(1);
14035 return SDValue();
14036 if (!N0.hasOneUse() || !N1.hasOneUse())
14037 return SDValue();
14038
14039 SDValue Src0 = N0.getOperand(0);
14040 SDValue Src1 = N1.getOperand(0);
14041 EVT SrcVT = Src0.getValueType();
14042 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14043 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14044 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14045 return SDValue();
14046
14047 LLVMContext &C = *DAG.getContext();
14049 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14050
14051 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14052 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14053
14054 // Src0 and Src1 are zero extended, so they're always positive if signed.
14055 //
14056 // sub can produce a negative from two positive operands, so it needs sign
14057 // extended. Other nodes produce a positive from two positive operands, so
14058 // zero extend instead.
14059 unsigned OuterExtend =
14060 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14061
14062 return DAG.getNode(
14063 OuterExtend, SDLoc(N), VT,
14064 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14065}
14066
14067// Try to turn (add (xor bool, 1) -1) into (neg bool).
14069 SDValue N0 = N->getOperand(0);
14070 SDValue N1 = N->getOperand(1);
14071 EVT VT = N->getValueType(0);
14072 SDLoc DL(N);
14073
14074 // RHS should be -1.
14075 if (!isAllOnesConstant(N1))
14076 return SDValue();
14077
14078 // Look for (xor X, 1).
14079 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14080 return SDValue();
14081
14082 // First xor input should be 0 or 1.
14084 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14085 return SDValue();
14086
14087 // Emit a negate of the setcc.
14088 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14089 N0.getOperand(0));
14090}
14091
14094 const RISCVSubtarget &Subtarget) {
14095 SelectionDAG &DAG = DCI.DAG;
14096 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14097 return V;
14098 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14099 return V;
14100 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14101 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14102 return V;
14103 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14104 return V;
14105 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14106 return V;
14107 if (SDValue V = combineBinOpOfZExt(N, DAG))
14108 return V;
14109
14110 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14111 // (select lhs, rhs, cc, x, (add x, y))
14112 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14113}
14114
14115// Try to turn a sub boolean RHS and constant LHS into an addi.
14117 SDValue N0 = N->getOperand(0);
14118 SDValue N1 = N->getOperand(1);
14119 EVT VT = N->getValueType(0);
14120 SDLoc DL(N);
14121
14122 // Require a constant LHS.
14123 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14124 if (!N0C)
14125 return SDValue();
14126
14127 // All our optimizations involve subtracting 1 from the immediate and forming
14128 // an ADDI. Make sure the new immediate is valid for an ADDI.
14129 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14130 if (!ImmValMinus1.isSignedIntN(12))
14131 return SDValue();
14132
14133 SDValue NewLHS;
14134 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14135 // (sub constant, (setcc x, y, eq/neq)) ->
14136 // (add (setcc x, y, neq/eq), constant - 1)
14137 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14138 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14139 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14140 return SDValue();
14141 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14142 NewLHS =
14143 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14144 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14145 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14146 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14147 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14148 NewLHS = N1.getOperand(0);
14149 } else
14150 return SDValue();
14151
14152 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14153 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14154}
14155
14156// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14157// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14158// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14159// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14161 const RISCVSubtarget &Subtarget) {
14162 if (!Subtarget.hasStdExtZbb())
14163 return SDValue();
14164
14165 EVT VT = N->getValueType(0);
14166
14167 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14168 return SDValue();
14169
14170 SDValue N0 = N->getOperand(0);
14171 SDValue N1 = N->getOperand(1);
14172
14173 if (N0->getOpcode() != ISD::SHL)
14174 return SDValue();
14175
14176 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14177 if (!ShAmtCLeft)
14178 return SDValue();
14179 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14180
14181 if (ShiftedAmount >= 8)
14182 return SDValue();
14183
14184 SDValue LeftShiftOperand = N0->getOperand(0);
14185 SDValue RightShiftOperand = N1;
14186
14187 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14188 if (N1->getOpcode() != ISD::SRL)
14189 return SDValue();
14190 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14191 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14192 return SDValue();
14193 RightShiftOperand = N1.getOperand(0);
14194 }
14195
14196 // At least one shift should have a single use.
14197 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14198 return SDValue();
14199
14200 if (LeftShiftOperand != RightShiftOperand)
14201 return SDValue();
14202
14203 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14204 Mask <<= ShiftedAmount;
14205 // Check that X has indeed the right shape (only the Y-th bit can be set in
14206 // every byte).
14207 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14208 return SDValue();
14209
14210 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14211}
14212
14214 const RISCVSubtarget &Subtarget) {
14215 if (SDValue V = combineSubOfBoolean(N, DAG))
14216 return V;
14217
14218 EVT VT = N->getValueType(0);
14219 SDValue N0 = N->getOperand(0);
14220 SDValue N1 = N->getOperand(1);
14221 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14222 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14223 isNullConstant(N1.getOperand(1))) {
14224 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14225 if (CCVal == ISD::SETLT) {
14226 SDLoc DL(N);
14227 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14228 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14229 DAG.getConstant(ShAmt, DL, VT));
14230 }
14231 }
14232
14233 if (SDValue V = combineBinOpOfZExt(N, DAG))
14234 return V;
14235 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14236 return V;
14237
14238 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14239 // (select lhs, rhs, cc, x, (sub x, y))
14240 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14241}
14242
14243// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14244// Legalizing setcc can introduce xors like this. Doing this transform reduces
14245// the number of xors and may allow the xor to fold into a branch condition.
14247 SDValue N0 = N->getOperand(0);
14248 SDValue N1 = N->getOperand(1);
14249 bool IsAnd = N->getOpcode() == ISD::AND;
14250
14251 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14252 return SDValue();
14253
14254 if (!N0.hasOneUse() || !N1.hasOneUse())
14255 return SDValue();
14256
14257 SDValue N01 = N0.getOperand(1);
14258 SDValue N11 = N1.getOperand(1);
14259
14260 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14261 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14262 // operation is And, allow one of the Xors to use -1.
14263 if (isOneConstant(N01)) {
14264 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14265 return SDValue();
14266 } else if (isOneConstant(N11)) {
14267 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14268 if (!(IsAnd && isAllOnesConstant(N01)))
14269 return SDValue();
14270 } else
14271 return SDValue();
14272
14273 EVT VT = N->getValueType(0);
14274
14275 SDValue N00 = N0.getOperand(0);
14276 SDValue N10 = N1.getOperand(0);
14277
14278 // The LHS of the xors needs to be 0/1.
14280 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14281 return SDValue();
14282
14283 // Invert the opcode and insert a new xor.
14284 SDLoc DL(N);
14285 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14286 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14287 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14288}
14289
14290// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14291// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14292// value to an unsigned value. This will be lowered to vmax and series of
14293// vnclipu instructions later. This can be extended to other truncated types
14294// other than i8 by replacing 256 and 255 with the equivalent constants for the
14295// type.
14297 EVT VT = N->getValueType(0);
14298 SDValue N0 = N->getOperand(0);
14299 EVT SrcVT = N0.getValueType();
14300
14301 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14302 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14303 return SDValue();
14304
14305 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14306 return SDValue();
14307
14308 SDValue Cond = N0.getOperand(0);
14309 SDValue True = N0.getOperand(1);
14310 SDValue False = N0.getOperand(2);
14311
14312 if (Cond.getOpcode() != ISD::SETCC)
14313 return SDValue();
14314
14315 // FIXME: Support the version of this pattern with the select operands
14316 // swapped.
14317 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14318 if (CCVal != ISD::SETULT)
14319 return SDValue();
14320
14321 SDValue CondLHS = Cond.getOperand(0);
14322 SDValue CondRHS = Cond.getOperand(1);
14323
14324 if (CondLHS != True)
14325 return SDValue();
14326
14327 unsigned ScalarBits = VT.getScalarSizeInBits();
14328
14329 // FIXME: Support other constants.
14330 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14331 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14332 return SDValue();
14333
14334 if (False.getOpcode() != ISD::SIGN_EXTEND)
14335 return SDValue();
14336
14337 False = False.getOperand(0);
14338
14339 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14340 return SDValue();
14341
14342 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14343 if (!FalseRHSC || !FalseRHSC->isZero())
14344 return SDValue();
14345
14346 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14347 if (CCVal2 != ISD::SETGT)
14348 return SDValue();
14349
14350 // Emit the signed to unsigned saturation pattern.
14351 SDLoc DL(N);
14352 SDValue Max =
14353 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14354 SDValue Min =
14355 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14356 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14357 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14358}
14359
14361 const RISCVSubtarget &Subtarget) {
14362 SDValue N0 = N->getOperand(0);
14363 EVT VT = N->getValueType(0);
14364
14365 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14366 // extending X. This is safe since we only need the LSB after the shift and
14367 // shift amounts larger than 31 would produce poison. If we wait until
14368 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14369 // to use a BEXT instruction.
14370 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14371 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14372 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14373 SDLoc DL(N0);
14374 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14375 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14376 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14377 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14378 }
14379
14380 return combineTruncSelectToSMaxUSat(N, DAG);
14381}
14382
14383// Combines two comparison operation and logic operation to one selection
14384// operation(min, max) and logic operation. Returns new constructed Node if
14385// conditions for optimization are satisfied.
14388 const RISCVSubtarget &Subtarget) {
14389 SelectionDAG &DAG = DCI.DAG;
14390
14391 SDValue N0 = N->getOperand(0);
14392 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14393 // extending X. This is safe since we only need the LSB after the shift and
14394 // shift amounts larger than 31 would produce poison. If we wait until
14395 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14396 // to use a BEXT instruction.
14397 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14398 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14399 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14400 N0.hasOneUse()) {
14401 SDLoc DL(N);
14402 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14403 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14404 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14405 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14406 DAG.getConstant(1, DL, MVT::i64));
14407 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14408 }
14409
14410 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14411 return V;
14412 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14413 return V;
14414
14415 if (DCI.isAfterLegalizeDAG())
14416 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14417 return V;
14418
14419 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14420 // (select lhs, rhs, cc, x, (and x, y))
14421 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14422}
14423
14424// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14425// FIXME: Generalize to other binary operators with same operand.
14427 SelectionDAG &DAG) {
14428 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14429
14430 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14432 !N0.hasOneUse() || !N1.hasOneUse())
14433 return SDValue();
14434
14435 // Should have the same condition.
14436 SDValue Cond = N0.getOperand(1);
14437 if (Cond != N1.getOperand(1))
14438 return SDValue();
14439
14440 SDValue TrueV = N0.getOperand(0);
14441 SDValue FalseV = N1.getOperand(0);
14442
14443 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14444 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14445 !isOneConstant(TrueV.getOperand(1)) ||
14446 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14447 return SDValue();
14448
14449 EVT VT = N->getValueType(0);
14450 SDLoc DL(N);
14451
14452 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14453 Cond);
14454 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14455 Cond);
14456 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14457 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14458}
14459
14461 const RISCVSubtarget &Subtarget) {
14462 SelectionDAG &DAG = DCI.DAG;
14463
14464 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14465 return V;
14466 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14467 return V;
14468
14469 if (DCI.isAfterLegalizeDAG())
14470 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14471 return V;
14472
14473 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14474 // We may be able to pull a common operation out of the true and false value.
14475 SDValue N0 = N->getOperand(0);
14476 SDValue N1 = N->getOperand(1);
14477 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14478 return V;
14479 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14480 return V;
14481
14482 // fold (or (select cond, 0, y), x) ->
14483 // (select cond, x, (or x, y))
14484 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14485}
14486
14488 const RISCVSubtarget &Subtarget) {
14489 SDValue N0 = N->getOperand(0);
14490 SDValue N1 = N->getOperand(1);
14491
14492 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14493 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14494 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14495 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14496 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14497 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14498 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14499 SDLoc DL(N);
14500 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14501 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14502 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14503 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14504 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14505 }
14506
14507 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14508 // NOTE: Assumes ROL being legal means ROLW is legal.
14509 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14510 if (N0.getOpcode() == RISCVISD::SLLW &&
14512 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14513 SDLoc DL(N);
14514 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14515 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14516 }
14517
14518 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14519 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14520 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14521 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14522 if (ConstN00 && CC == ISD::SETLT) {
14523 EVT VT = N0.getValueType();
14524 SDLoc DL(N0);
14525 const APInt &Imm = ConstN00->getAPIntValue();
14526 if ((Imm + 1).isSignedIntN(12))
14527 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14528 DAG.getConstant(Imm + 1, DL, VT), CC);
14529 }
14530 }
14531
14532 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14533 return V;
14534 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14535 return V;
14536
14537 // fold (xor (select cond, 0, y), x) ->
14538 // (select cond, x, (xor x, y))
14539 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14540}
14541
14542// Try to expand a scalar multiply to a faster sequence.
14545 const RISCVSubtarget &Subtarget) {
14546
14547 EVT VT = N->getValueType(0);
14548
14549 // LI + MUL is usually smaller than the alternative sequence.
14551 return SDValue();
14552
14553 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14554 return SDValue();
14555
14556 if (VT != Subtarget.getXLenVT())
14557 return SDValue();
14558
14559 const bool HasShlAdd =
14560 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14561
14562 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14563 if (!CNode)
14564 return SDValue();
14565 uint64_t MulAmt = CNode->getZExtValue();
14566
14567 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14568 // We're adding additional uses of X here, and in principle, we should be freezing
14569 // X before doing so. However, adding freeze here causes real regressions, and no
14570 // other target properly freezes X in these cases either.
14571 SDValue X = N->getOperand(0);
14572
14573 if (HasShlAdd) {
14574 for (uint64_t Divisor : {3, 5, 9}) {
14575 if (MulAmt % Divisor != 0)
14576 continue;
14577 uint64_t MulAmt2 = MulAmt / Divisor;
14578 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14579 if (isPowerOf2_64(MulAmt2)) {
14580 SDLoc DL(N);
14581 SDValue X = N->getOperand(0);
14582 // Put the shift first if we can fold a zext into the
14583 // shift forming a slli.uw.
14584 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14585 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14586 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14587 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14588 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14589 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14590 Shl);
14591 }
14592 // Otherwise, put rhe shl second so that it can fold with following
14593 // instructions (e.g. sext or add).
14594 SDValue Mul359 =
14595 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14596 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14597 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14598 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14599 }
14600
14601 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14602 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14603 SDLoc DL(N);
14604 SDValue Mul359 =
14605 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14606 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14607 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14608 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14609 Mul359);
14610 }
14611 }
14612
14613 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14614 // shXadd. First check if this a sum of two power of 2s because that's
14615 // easy. Then count how many zeros are up to the first bit.
14616 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14617 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14618 if (ScaleShift >= 1 && ScaleShift < 4) {
14619 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14620 SDLoc DL(N);
14621 SDValue Shift1 =
14622 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14623 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14624 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14625 }
14626 }
14627
14628 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14629 // This is the two instruction form, there are also three instruction
14630 // variants we could implement. e.g.
14631 // (2^(1,2,3) * 3,5,9 + 1) << C2
14632 // 2^(C1>3) * 3,5,9 +/- 1
14633 for (uint64_t Divisor : {3, 5, 9}) {
14634 uint64_t C = MulAmt - 1;
14635 if (C <= Divisor)
14636 continue;
14637 unsigned TZ = llvm::countr_zero(C);
14638 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14639 SDLoc DL(N);
14640 SDValue Mul359 =
14641 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14642 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14643 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14644 DAG.getConstant(TZ, DL, VT), X);
14645 }
14646 }
14647
14648 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14649 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14650 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14651 if (ScaleShift >= 1 && ScaleShift < 4) {
14652 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14653 SDLoc DL(N);
14654 SDValue Shift1 =
14655 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14656 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14657 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14658 DAG.getConstant(ScaleShift, DL, VT), X));
14659 }
14660 }
14661
14662 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14663 for (uint64_t Offset : {3, 5, 9}) {
14664 if (isPowerOf2_64(MulAmt + Offset)) {
14665 SDLoc DL(N);
14666 SDValue Shift1 =
14667 DAG.getNode(ISD::SHL, DL, VT, X,
14668 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14669 SDValue Mul359 =
14670 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14671 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14672 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14673 }
14674 }
14675 }
14676
14677 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14678 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14679 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14680 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14681 SDLoc DL(N);
14682 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14683 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14684 SDValue Shift2 =
14685 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14686 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14687 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14688 }
14689
14690 if (HasShlAdd) {
14691 for (uint64_t Divisor : {3, 5, 9}) {
14692 if (MulAmt % Divisor != 0)
14693 continue;
14694 uint64_t MulAmt2 = MulAmt / Divisor;
14695 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14696 // of 25 which happen to be quite common.
14697 for (uint64_t Divisor2 : {3, 5, 9}) {
14698 if (MulAmt2 % Divisor2 != 0)
14699 continue;
14700 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14701 if (isPowerOf2_64(MulAmt3)) {
14702 SDLoc DL(N);
14703 SDValue Mul359A =
14704 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14705 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14706 SDValue Mul359B = DAG.getNode(
14707 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14708 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14709 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14710 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14711 }
14712 }
14713 }
14714 }
14715
14716 return SDValue();
14717}
14718
14719// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14720// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14721// Same for other equivalent types with other equivalent constants.
14723 EVT VT = N->getValueType(0);
14724 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14725
14726 // Do this for legal vectors unless they are i1 or i8 vectors.
14727 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14728 return SDValue();
14729
14730 if (N->getOperand(0).getOpcode() != ISD::AND ||
14731 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14732 return SDValue();
14733
14734 SDValue And = N->getOperand(0);
14735 SDValue Srl = And.getOperand(0);
14736
14737 APInt V1, V2, V3;
14738 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14739 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14741 return SDValue();
14742
14743 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14744 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14745 V3 != (HalfSize - 1))
14746 return SDValue();
14747
14748 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14749 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14750 VT.getVectorElementCount() * 2);
14751 SDLoc DL(N);
14752 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14753 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14754 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14755 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14756}
14757
14760 const RISCVSubtarget &Subtarget) {
14761 EVT VT = N->getValueType(0);
14762 if (!VT.isVector())
14763 return expandMul(N, DAG, DCI, Subtarget);
14764
14765 SDLoc DL(N);
14766 SDValue N0 = N->getOperand(0);
14767 SDValue N1 = N->getOperand(1);
14768 SDValue MulOper;
14769 unsigned AddSubOpc;
14770
14771 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14772 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14773 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14774 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14775 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14776 AddSubOpc = V->getOpcode();
14777 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14778 SDValue Opnd = V->getOperand(1);
14779 MulOper = V->getOperand(0);
14780 if (AddSubOpc == ISD::SUB)
14781 std::swap(Opnd, MulOper);
14782 if (isOneOrOneSplat(Opnd))
14783 return true;
14784 }
14785 return false;
14786 };
14787
14788 if (IsAddSubWith1(N0)) {
14789 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14790 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14791 }
14792
14793 if (IsAddSubWith1(N1)) {
14794 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14795 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14796 }
14797
14798 if (SDValue V = combineBinOpOfZExt(N, DAG))
14799 return V;
14800
14802 return V;
14803
14804 return SDValue();
14805}
14806
14807/// According to the property that indexed load/store instructions zero-extend
14808/// their indices, try to narrow the type of index operand.
14809static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14810 if (isIndexTypeSigned(IndexType))
14811 return false;
14812
14813 if (!N->hasOneUse())
14814 return false;
14815
14816 EVT VT = N.getValueType();
14817 SDLoc DL(N);
14818
14819 // In general, what we're doing here is seeing if we can sink a truncate to
14820 // a smaller element type into the expression tree building our index.
14821 // TODO: We can generalize this and handle a bunch more cases if useful.
14822
14823 // Narrow a buildvector to the narrowest element type. This requires less
14824 // work and less register pressure at high LMUL, and creates smaller constants
14825 // which may be cheaper to materialize.
14826 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14827 KnownBits Known = DAG.computeKnownBits(N);
14828 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14829 LLVMContext &C = *DAG.getContext();
14830 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14831 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14832 N = DAG.getNode(ISD::TRUNCATE, DL,
14833 VT.changeVectorElementType(ResultVT), N);
14834 return true;
14835 }
14836 }
14837
14838 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14839 if (N.getOpcode() != ISD::SHL)
14840 return false;
14841
14842 SDValue N0 = N.getOperand(0);
14843 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14845 return false;
14846 if (!N0->hasOneUse())
14847 return false;
14848
14849 APInt ShAmt;
14850 SDValue N1 = N.getOperand(1);
14851 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14852 return false;
14853
14854 SDValue Src = N0.getOperand(0);
14855 EVT SrcVT = Src.getValueType();
14856 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14857 unsigned ShAmtV = ShAmt.getZExtValue();
14858 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14859 NewElen = std::max(NewElen, 8U);
14860
14861 // Skip if NewElen is not narrower than the original extended type.
14862 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14863 return false;
14864
14865 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14866 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14867
14868 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14869 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14870 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14871 return true;
14872}
14873
14874// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14875// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14876// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14877// can become a sext.w instead of a shift pair.
14879 const RISCVSubtarget &Subtarget) {
14880 SDValue N0 = N->getOperand(0);
14881 SDValue N1 = N->getOperand(1);
14882 EVT VT = N->getValueType(0);
14883 EVT OpVT = N0.getValueType();
14884
14885 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14886 return SDValue();
14887
14888 // RHS needs to be a constant.
14889 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14890 if (!N1C)
14891 return SDValue();
14892
14893 // LHS needs to be (and X, 0xffffffff).
14894 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14895 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14896 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14897 return SDValue();
14898
14899 // Looking for an equality compare.
14900 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14901 if (!isIntEqualitySetCC(Cond))
14902 return SDValue();
14903
14904 // Don't do this if the sign bit is provably zero, it will be turned back into
14905 // an AND.
14906 APInt SignMask = APInt::getOneBitSet(64, 31);
14907 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14908 return SDValue();
14909
14910 const APInt &C1 = N1C->getAPIntValue();
14911
14912 SDLoc dl(N);
14913 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14914 // to be equal.
14915 if (C1.getActiveBits() > 32)
14916 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14917
14918 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14919 N0.getOperand(0), DAG.getValueType(MVT::i32));
14920 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14921 dl, OpVT), Cond);
14922}
14923
14924static SDValue
14926 const RISCVSubtarget &Subtarget) {
14927 SDValue Src = N->getOperand(0);
14928 EVT VT = N->getValueType(0);
14929 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14930 unsigned Opc = Src.getOpcode();
14931
14932 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14933 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14934 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14935 Subtarget.hasStdExtZfhmin())
14936 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14937 Src.getOperand(0));
14938
14939 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14940 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14941 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14942 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14943 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14944 Src.getOperand(1));
14945
14946 return SDValue();
14947}
14948
14949namespace {
14950// Forward declaration of the structure holding the necessary information to
14951// apply a combine.
14952struct CombineResult;
14953
14954enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14955/// Helper class for folding sign/zero extensions.
14956/// In particular, this class is used for the following combines:
14957/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14958/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14959/// mul | mul_vl -> vwmul(u) | vwmul_su
14960/// shl | shl_vl -> vwsll
14961/// fadd -> vfwadd | vfwadd_w
14962/// fsub -> vfwsub | vfwsub_w
14963/// fmul -> vfwmul
14964/// An object of this class represents an operand of the operation we want to
14965/// combine.
14966/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14967/// NodeExtensionHelper for `a` and one for `b`.
14968///
14969/// This class abstracts away how the extension is materialized and
14970/// how its number of users affect the combines.
14971///
14972/// In particular:
14973/// - VWADD_W is conceptually == add(op0, sext(op1))
14974/// - VWADDU_W == add(op0, zext(op1))
14975/// - VWSUB_W == sub(op0, sext(op1))
14976/// - VWSUBU_W == sub(op0, zext(op1))
14977/// - VFWADD_W == fadd(op0, fpext(op1))
14978/// - VFWSUB_W == fsub(op0, fpext(op1))
14979/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14980/// zext|sext(smaller_value).
14981struct NodeExtensionHelper {
14982 /// Records if this operand is like being zero extended.
14983 bool SupportsZExt;
14984 /// Records if this operand is like being sign extended.
14985 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14986 /// instance, a splat constant (e.g., 3), would support being both sign and
14987 /// zero extended.
14988 bool SupportsSExt;
14989 /// Records if this operand is like being floating-Point extended.
14990 bool SupportsFPExt;
14991 /// This boolean captures whether we care if this operand would still be
14992 /// around after the folding happens.
14993 bool EnforceOneUse;
14994 /// Original value that this NodeExtensionHelper represents.
14995 SDValue OrigOperand;
14996
14997 /// Get the value feeding the extension or the value itself.
14998 /// E.g., for zext(a), this would return a.
14999 SDValue getSource() const {
15000 switch (OrigOperand.getOpcode()) {
15001 case ISD::ZERO_EXTEND:
15002 case ISD::SIGN_EXTEND:
15003 case RISCVISD::VSEXT_VL:
15004 case RISCVISD::VZEXT_VL:
15006 return OrigOperand.getOperand(0);
15007 default:
15008 return OrigOperand;
15009 }
15010 }
15011
15012 /// Check if this instance represents a splat.
15013 bool isSplat() const {
15014 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15015 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15016 }
15017
15018 /// Get the extended opcode.
15019 unsigned getExtOpc(ExtKind SupportsExt) const {
15020 switch (SupportsExt) {
15021 case ExtKind::SExt:
15022 return RISCVISD::VSEXT_VL;
15023 case ExtKind::ZExt:
15024 return RISCVISD::VZEXT_VL;
15025 case ExtKind::FPExt:
15027 }
15028 llvm_unreachable("Unknown ExtKind enum");
15029 }
15030
15031 /// Get or create a value that can feed \p Root with the given extension \p
15032 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15033 /// operand. \see ::getSource().
15034 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15035 const RISCVSubtarget &Subtarget,
15036 std::optional<ExtKind> SupportsExt) const {
15037 if (!SupportsExt.has_value())
15038 return OrigOperand;
15039
15040 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15041
15042 SDValue Source = getSource();
15043 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15044 if (Source.getValueType() == NarrowVT)
15045 return Source;
15046
15047 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15048 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15049 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15050 Root->getOpcode() == RISCVISD::VFMADD_VL);
15051 return Source;
15052 }
15053
15054 unsigned ExtOpc = getExtOpc(*SupportsExt);
15055
15056 // If we need an extension, we should be changing the type.
15057 SDLoc DL(OrigOperand);
15058 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15059 switch (OrigOperand.getOpcode()) {
15060 case ISD::ZERO_EXTEND:
15061 case ISD::SIGN_EXTEND:
15062 case RISCVISD::VSEXT_VL:
15063 case RISCVISD::VZEXT_VL:
15065 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15066 case ISD::SPLAT_VECTOR:
15067 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15069 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15070 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15072 Source = Source.getOperand(1);
15073 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15074 Source = Source.getOperand(0);
15075 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15076 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15077 DAG.getUNDEF(NarrowVT), Source, VL);
15078 default:
15079 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15080 // and that operand should already have the right NarrowVT so no
15081 // extension should be required at this point.
15082 llvm_unreachable("Unsupported opcode");
15083 }
15084 }
15085
15086 /// Helper function to get the narrow type for \p Root.
15087 /// The narrow type is the type of \p Root where we divided the size of each
15088 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15089 /// \pre Both the narrow type and the original type should be legal.
15090 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15091 MVT VT = Root->getSimpleValueType(0);
15092
15093 // Determine the narrow size.
15094 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15095
15096 MVT EltVT = SupportsExt == ExtKind::FPExt
15097 ? MVT::getFloatingPointVT(NarrowSize)
15098 : MVT::getIntegerVT(NarrowSize);
15099
15100 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15101 "Trying to extend something we can't represent");
15102 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15103 return NarrowVT;
15104 }
15105
15106 /// Get the opcode to materialize:
15107 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15108 static unsigned getSExtOpcode(unsigned Opcode) {
15109 switch (Opcode) {
15110 case ISD::ADD:
15111 case RISCVISD::ADD_VL:
15114 case ISD::OR:
15115 return RISCVISD::VWADD_VL;
15116 case ISD::SUB:
15117 case RISCVISD::SUB_VL:
15120 return RISCVISD::VWSUB_VL;
15121 case ISD::MUL:
15122 case RISCVISD::MUL_VL:
15123 return RISCVISD::VWMUL_VL;
15124 default:
15125 llvm_unreachable("Unexpected opcode");
15126 }
15127 }
15128
15129 /// Get the opcode to materialize:
15130 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15131 static unsigned getZExtOpcode(unsigned Opcode) {
15132 switch (Opcode) {
15133 case ISD::ADD:
15134 case RISCVISD::ADD_VL:
15137 case ISD::OR:
15138 return RISCVISD::VWADDU_VL;
15139 case ISD::SUB:
15140 case RISCVISD::SUB_VL:
15143 return RISCVISD::VWSUBU_VL;
15144 case ISD::MUL:
15145 case RISCVISD::MUL_VL:
15146 return RISCVISD::VWMULU_VL;
15147 case ISD::SHL:
15148 case RISCVISD::SHL_VL:
15149 return RISCVISD::VWSLL_VL;
15150 default:
15151 llvm_unreachable("Unexpected opcode");
15152 }
15153 }
15154
15155 /// Get the opcode to materialize:
15156 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15157 static unsigned getFPExtOpcode(unsigned Opcode) {
15158 switch (Opcode) {
15159 case RISCVISD::FADD_VL:
15161 return RISCVISD::VFWADD_VL;
15162 case RISCVISD::FSUB_VL:
15164 return RISCVISD::VFWSUB_VL;
15165 case RISCVISD::FMUL_VL:
15166 return RISCVISD::VFWMUL_VL;
15168 return RISCVISD::VFWMADD_VL;
15170 return RISCVISD::VFWMSUB_VL;
15172 return RISCVISD::VFWNMADD_VL;
15174 return RISCVISD::VFWNMSUB_VL;
15175 default:
15176 llvm_unreachable("Unexpected opcode");
15177 }
15178 }
15179
15180 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15181 /// newOpcode(a, b).
15182 static unsigned getSUOpcode(unsigned Opcode) {
15183 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15184 "SU is only supported for MUL");
15185 return RISCVISD::VWMULSU_VL;
15186 }
15187
15188 /// Get the opcode to materialize
15189 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15190 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15191 switch (Opcode) {
15192 case ISD::ADD:
15193 case RISCVISD::ADD_VL:
15194 case ISD::OR:
15195 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15197 case ISD::SUB:
15198 case RISCVISD::SUB_VL:
15199 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15201 case RISCVISD::FADD_VL:
15202 return RISCVISD::VFWADD_W_VL;
15203 case RISCVISD::FSUB_VL:
15204 return RISCVISD::VFWSUB_W_VL;
15205 default:
15206 llvm_unreachable("Unexpected opcode");
15207 }
15208 }
15209
15210 using CombineToTry = std::function<std::optional<CombineResult>(
15211 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15212 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15213 const RISCVSubtarget &)>;
15214
15215 /// Check if this node needs to be fully folded or extended for all users.
15216 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15217
15218 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15219 const RISCVSubtarget &Subtarget) {
15220 unsigned Opc = OrigOperand.getOpcode();
15221 MVT VT = OrigOperand.getSimpleValueType();
15222
15223 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15224 "Unexpected Opcode");
15225
15226 // The pasthru must be undef for tail agnostic.
15227 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15228 return;
15229
15230 // Get the scalar value.
15231 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15232 : OrigOperand.getOperand(1);
15233
15234 // See if we have enough sign bits or zero bits in the scalar to use a
15235 // widening opcode by splatting to smaller element size.
15236 unsigned EltBits = VT.getScalarSizeInBits();
15237 unsigned ScalarBits = Op.getValueSizeInBits();
15238 // If we're not getting all bits from the element, we need special handling.
15239 if (ScalarBits < EltBits) {
15240 // This should only occur on RV32.
15241 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15242 !Subtarget.is64Bit() && "Unexpected splat");
15243 // vmv.v.x sign extends narrow inputs.
15244 SupportsSExt = true;
15245
15246 // If the input is positive, then sign extend is also zero extend.
15247 if (DAG.SignBitIsZero(Op))
15248 SupportsZExt = true;
15249
15250 EnforceOneUse = false;
15251 return;
15252 }
15253
15254 unsigned NarrowSize = EltBits / 2;
15255 // If the narrow type cannot be expressed with a legal VMV,
15256 // this is not a valid candidate.
15257 if (NarrowSize < 8)
15258 return;
15259
15260 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15261 SupportsSExt = true;
15262
15263 if (DAG.MaskedValueIsZero(Op,
15264 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15265 SupportsZExt = true;
15266
15267 EnforceOneUse = false;
15268 }
15269
15270 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15271 const RISCVSubtarget &Subtarget) {
15272 // Any f16 extension will neeed zvfh
15273 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15274 return false;
15275 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15276 // zvfbfwma
15277 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15278 Root->getOpcode() != RISCVISD::VFMADD_VL))
15279 return false;
15280 return true;
15281 }
15282
15283 /// Helper method to set the various fields of this struct based on the
15284 /// type of \p Root.
15285 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15286 const RISCVSubtarget &Subtarget) {
15287 SupportsZExt = false;
15288 SupportsSExt = false;
15289 SupportsFPExt = false;
15290 EnforceOneUse = true;
15291 unsigned Opc = OrigOperand.getOpcode();
15292 // For the nodes we handle below, we end up using their inputs directly: see
15293 // getSource(). However since they either don't have a passthru or we check
15294 // that their passthru is undef, we can safely ignore their mask and VL.
15295 switch (Opc) {
15296 case ISD::ZERO_EXTEND:
15297 case ISD::SIGN_EXTEND: {
15298 MVT VT = OrigOperand.getSimpleValueType();
15299 if (!VT.isVector())
15300 break;
15301
15302 SDValue NarrowElt = OrigOperand.getOperand(0);
15303 MVT NarrowVT = NarrowElt.getSimpleValueType();
15304 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15305 if (NarrowVT.getVectorElementType() == MVT::i1)
15306 break;
15307
15308 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15309 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15310 break;
15311 }
15312 case RISCVISD::VZEXT_VL:
15313 SupportsZExt = true;
15314 break;
15315 case RISCVISD::VSEXT_VL:
15316 SupportsSExt = true;
15317 break;
15319 MVT NarrowEltVT =
15321 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15322 break;
15323 SupportsFPExt = true;
15324 break;
15325 }
15326 case ISD::SPLAT_VECTOR:
15328 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15329 break;
15330 case RISCVISD::VFMV_V_F_VL: {
15331 MVT VT = OrigOperand.getSimpleValueType();
15332
15333 if (!OrigOperand.getOperand(0).isUndef())
15334 break;
15335
15336 SDValue Op = OrigOperand.getOperand(1);
15337 if (Op.getOpcode() != ISD::FP_EXTEND)
15338 break;
15339
15340 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15341 Subtarget))
15342 break;
15343
15344 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15345 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15346 if (NarrowSize != ScalarBits)
15347 break;
15348
15349 SupportsFPExt = true;
15350 break;
15351 }
15352 default:
15353 break;
15354 }
15355 }
15356
15357 /// Check if \p Root supports any extension folding combines.
15358 static bool isSupportedRoot(const SDNode *Root,
15359 const RISCVSubtarget &Subtarget) {
15360 switch (Root->getOpcode()) {
15361 case ISD::ADD:
15362 case ISD::SUB:
15363 case ISD::MUL: {
15364 return Root->getValueType(0).isScalableVector();
15365 }
15366 case ISD::OR: {
15367 return Root->getValueType(0).isScalableVector() &&
15368 Root->getFlags().hasDisjoint();
15369 }
15370 // Vector Widening Integer Add/Sub/Mul Instructions
15371 case RISCVISD::ADD_VL:
15372 case RISCVISD::MUL_VL:
15375 case RISCVISD::SUB_VL:
15378 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15379 case RISCVISD::FADD_VL:
15380 case RISCVISD::FSUB_VL:
15381 case RISCVISD::FMUL_VL:
15384 return true;
15385 case ISD::SHL:
15386 return Root->getValueType(0).isScalableVector() &&
15387 Subtarget.hasStdExtZvbb();
15388 case RISCVISD::SHL_VL:
15389 return Subtarget.hasStdExtZvbb();
15394 return true;
15395 default:
15396 return false;
15397 }
15398 }
15399
15400 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15401 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15402 const RISCVSubtarget &Subtarget) {
15403 assert(isSupportedRoot(Root, Subtarget) &&
15404 "Trying to build an helper with an "
15405 "unsupported root");
15406 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15408 OrigOperand = Root->getOperand(OperandIdx);
15409
15410 unsigned Opc = Root->getOpcode();
15411 switch (Opc) {
15412 // We consider
15413 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15414 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15415 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15422 if (OperandIdx == 1) {
15423 SupportsZExt =
15425 SupportsSExt =
15427 SupportsFPExt =
15429 // There's no existing extension here, so we don't have to worry about
15430 // making sure it gets removed.
15431 EnforceOneUse = false;
15432 break;
15433 }
15434 [[fallthrough]];
15435 default:
15436 fillUpExtensionSupport(Root, DAG, Subtarget);
15437 break;
15438 }
15439 }
15440
15441 /// Helper function to get the Mask and VL from \p Root.
15442 static std::pair<SDValue, SDValue>
15443 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15444 const RISCVSubtarget &Subtarget) {
15445 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15446 switch (Root->getOpcode()) {
15447 case ISD::ADD:
15448 case ISD::SUB:
15449 case ISD::MUL:
15450 case ISD::OR:
15451 case ISD::SHL: {
15452 SDLoc DL(Root);
15453 MVT VT = Root->getSimpleValueType(0);
15454 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15455 }
15456 default:
15457 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15458 }
15459 }
15460
15461 /// Helper function to check if \p N is commutative with respect to the
15462 /// foldings that are supported by this class.
15463 static bool isCommutative(const SDNode *N) {
15464 switch (N->getOpcode()) {
15465 case ISD::ADD:
15466 case ISD::MUL:
15467 case ISD::OR:
15468 case RISCVISD::ADD_VL:
15469 case RISCVISD::MUL_VL:
15472 case RISCVISD::FADD_VL:
15473 case RISCVISD::FMUL_VL:
15479 return true;
15480 case ISD::SUB:
15481 case RISCVISD::SUB_VL:
15484 case RISCVISD::FSUB_VL:
15486 case ISD::SHL:
15487 case RISCVISD::SHL_VL:
15488 return false;
15489 default:
15490 llvm_unreachable("Unexpected opcode");
15491 }
15492 }
15493
15494 /// Get a list of combine to try for folding extensions in \p Root.
15495 /// Note that each returned CombineToTry function doesn't actually modify
15496 /// anything. Instead they produce an optional CombineResult that if not None,
15497 /// need to be materialized for the combine to be applied.
15498 /// \see CombineResult::materialize.
15499 /// If the related CombineToTry function returns std::nullopt, that means the
15500 /// combine didn't match.
15501 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15502};
15503
15504/// Helper structure that holds all the necessary information to materialize a
15505/// combine that does some extension folding.
15506struct CombineResult {
15507 /// Opcode to be generated when materializing the combine.
15508 unsigned TargetOpcode;
15509 // No value means no extension is needed.
15510 std::optional<ExtKind> LHSExt;
15511 std::optional<ExtKind> RHSExt;
15512 /// Root of the combine.
15513 SDNode *Root;
15514 /// LHS of the TargetOpcode.
15515 NodeExtensionHelper LHS;
15516 /// RHS of the TargetOpcode.
15517 NodeExtensionHelper RHS;
15518
15519 CombineResult(unsigned TargetOpcode, SDNode *Root,
15520 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15521 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15522 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15523 LHS(LHS), RHS(RHS) {}
15524
15525 /// Return a value that uses TargetOpcode and that can be used to replace
15526 /// Root.
15527 /// The actual replacement is *not* done in that method.
15528 SDValue materialize(SelectionDAG &DAG,
15529 const RISCVSubtarget &Subtarget) const {
15530 SDValue Mask, VL, Passthru;
15531 std::tie(Mask, VL) =
15532 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15533 switch (Root->getOpcode()) {
15534 default:
15535 Passthru = Root->getOperand(2);
15536 break;
15537 case ISD::ADD:
15538 case ISD::SUB:
15539 case ISD::MUL:
15540 case ISD::OR:
15541 case ISD::SHL:
15542 Passthru = DAG.getUNDEF(Root->getValueType(0));
15543 break;
15544 }
15545 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15546 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15547 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15548 Passthru, Mask, VL);
15549 }
15550};
15551
15552/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15553/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15554/// are zext) and LHS and RHS can be folded into Root.
15555/// AllowExtMask define which form `ext` can take in this pattern.
15556///
15557/// \note If the pattern can match with both zext and sext, the returned
15558/// CombineResult will feature the zext result.
15559///
15560/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15561/// can be used to apply the pattern.
15562static std::optional<CombineResult>
15563canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15564 const NodeExtensionHelper &RHS,
15565 uint8_t AllowExtMask, SelectionDAG &DAG,
15566 const RISCVSubtarget &Subtarget) {
15567 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15568 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15569 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15570 /*RHSExt=*/{ExtKind::ZExt});
15571 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15572 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15573 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15574 /*RHSExt=*/{ExtKind::SExt});
15575 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15576 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15577 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15578 /*RHSExt=*/{ExtKind::FPExt});
15579 return std::nullopt;
15580}
15581
15582/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15583/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15584/// are zext) and LHS and RHS can be folded into Root.
15585///
15586/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15587/// can be used to apply the pattern.
15588static std::optional<CombineResult>
15589canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15590 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15591 const RISCVSubtarget &Subtarget) {
15592 return canFoldToVWWithSameExtensionImpl(
15593 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15594 Subtarget);
15595}
15596
15597/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15598///
15599/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15600/// can be used to apply the pattern.
15601static std::optional<CombineResult>
15602canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15603 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15604 const RISCVSubtarget &Subtarget) {
15605 if (RHS.SupportsFPExt)
15606 return CombineResult(
15607 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15608 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15609
15610 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15611 // sext/zext?
15612 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15613 // purposes.
15614 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15615 return CombineResult(
15616 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15617 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15618 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15619 return CombineResult(
15620 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15621 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15622 return std::nullopt;
15623}
15624
15625/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15626///
15627/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15628/// can be used to apply the pattern.
15629static std::optional<CombineResult>
15630canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15631 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15632 const RISCVSubtarget &Subtarget) {
15633 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15634 Subtarget);
15635}
15636
15637/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15638///
15639/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15640/// can be used to apply the pattern.
15641static std::optional<CombineResult>
15642canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15643 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15644 const RISCVSubtarget &Subtarget) {
15645 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15646 Subtarget);
15647}
15648
15649/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15650///
15651/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15652/// can be used to apply the pattern.
15653static std::optional<CombineResult>
15654canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15655 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15656 const RISCVSubtarget &Subtarget) {
15657 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15658 Subtarget);
15659}
15660
15661/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15662///
15663/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15664/// can be used to apply the pattern.
15665static std::optional<CombineResult>
15666canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15667 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15668 const RISCVSubtarget &Subtarget) {
15669
15670 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15671 return std::nullopt;
15672 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15673 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15674 /*RHSExt=*/{ExtKind::ZExt});
15675}
15676
15678NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15679 SmallVector<CombineToTry> Strategies;
15680 switch (Root->getOpcode()) {
15681 case ISD::ADD:
15682 case ISD::SUB:
15683 case ISD::OR:
15684 case RISCVISD::ADD_VL:
15685 case RISCVISD::SUB_VL:
15686 case RISCVISD::FADD_VL:
15687 case RISCVISD::FSUB_VL:
15688 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15689 Strategies.push_back(canFoldToVWWithSameExtension);
15690 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15691 Strategies.push_back(canFoldToVW_W);
15692 break;
15693 case RISCVISD::FMUL_VL:
15698 Strategies.push_back(canFoldToVWWithSameExtension);
15699 break;
15700 case ISD::MUL:
15701 case RISCVISD::MUL_VL:
15702 // mul -> vwmul(u)
15703 Strategies.push_back(canFoldToVWWithSameExtension);
15704 // mul -> vwmulsu
15705 Strategies.push_back(canFoldToVW_SU);
15706 break;
15707 case ISD::SHL:
15708 case RISCVISD::SHL_VL:
15709 // shl -> vwsll
15710 Strategies.push_back(canFoldToVWWithZEXT);
15711 break;
15714 // vwadd_w|vwsub_w -> vwadd|vwsub
15715 Strategies.push_back(canFoldToVWWithSEXT);
15716 break;
15719 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15720 Strategies.push_back(canFoldToVWWithZEXT);
15721 break;
15724 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15725 Strategies.push_back(canFoldToVWWithFPEXT);
15726 break;
15727 default:
15728 llvm_unreachable("Unexpected opcode");
15729 }
15730 return Strategies;
15731}
15732} // End anonymous namespace.
15733
15734/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15735/// The supported combines are:
15736/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15737/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15738/// mul | mul_vl -> vwmul(u) | vwmul_su
15739/// shl | shl_vl -> vwsll
15740/// fadd_vl -> vfwadd | vfwadd_w
15741/// fsub_vl -> vfwsub | vfwsub_w
15742/// fmul_vl -> vfwmul
15743/// vwadd_w(u) -> vwadd(u)
15744/// vwsub_w(u) -> vwsub(u)
15745/// vfwadd_w -> vfwadd
15746/// vfwsub_w -> vfwsub
15749 const RISCVSubtarget &Subtarget) {
15750 SelectionDAG &DAG = DCI.DAG;
15751 if (DCI.isBeforeLegalize())
15752 return SDValue();
15753
15754 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15755 return SDValue();
15756
15757 SmallVector<SDNode *> Worklist;
15758 SmallSet<SDNode *, 8> Inserted;
15759 Worklist.push_back(N);
15760 Inserted.insert(N);
15761 SmallVector<CombineResult> CombinesToApply;
15762
15763 while (!Worklist.empty()) {
15764 SDNode *Root = Worklist.pop_back_val();
15765
15766 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15767 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15768 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15769 &Inserted](const NodeExtensionHelper &Op) {
15770 if (Op.needToPromoteOtherUsers()) {
15771 for (SDUse &Use : Op.OrigOperand->uses()) {
15772 SDNode *TheUser = Use.getUser();
15773 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15774 return false;
15775 // We only support the first 2 operands of FMA.
15776 if (Use.getOperandNo() >= 2)
15777 return false;
15778 if (Inserted.insert(TheUser).second)
15779 Worklist.push_back(TheUser);
15780 }
15781 }
15782 return true;
15783 };
15784
15785 // Control the compile time by limiting the number of node we look at in
15786 // total.
15787 if (Inserted.size() > ExtensionMaxWebSize)
15788 return SDValue();
15789
15791 NodeExtensionHelper::getSupportedFoldings(Root);
15792
15793 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15794 bool Matched = false;
15795 for (int Attempt = 0;
15796 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15797 ++Attempt) {
15798
15799 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15800 FoldingStrategies) {
15801 std::optional<CombineResult> Res =
15802 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15803 if (Res) {
15804 Matched = true;
15805 CombinesToApply.push_back(*Res);
15806 // All the inputs that are extended need to be folded, otherwise
15807 // we would be leaving the old input (since it is may still be used),
15808 // and the new one.
15809 if (Res->LHSExt.has_value())
15810 if (!AppendUsersIfNeeded(LHS))
15811 return SDValue();
15812 if (Res->RHSExt.has_value())
15813 if (!AppendUsersIfNeeded(RHS))
15814 return SDValue();
15815 break;
15816 }
15817 }
15818 std::swap(LHS, RHS);
15819 }
15820 // Right now we do an all or nothing approach.
15821 if (!Matched)
15822 return SDValue();
15823 }
15824 // Store the value for the replacement of the input node separately.
15825 SDValue InputRootReplacement;
15826 // We do the RAUW after we materialize all the combines, because some replaced
15827 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15828 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15829 // yet-to-be-visited CombinesToApply roots.
15831 ValuesToReplace.reserve(CombinesToApply.size());
15832 for (CombineResult Res : CombinesToApply) {
15833 SDValue NewValue = Res.materialize(DAG, Subtarget);
15834 if (!InputRootReplacement) {
15835 assert(Res.Root == N &&
15836 "First element is expected to be the current node");
15837 InputRootReplacement = NewValue;
15838 } else {
15839 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15840 }
15841 }
15842 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15843 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15844 DCI.AddToWorklist(OldNewValues.second.getNode());
15845 }
15846 return InputRootReplacement;
15847}
15848
15849// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15850// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15851// y will be the Passthru and cond will be the Mask.
15853 unsigned Opc = N->getOpcode();
15856
15857 SDValue Y = N->getOperand(0);
15858 SDValue MergeOp = N->getOperand(1);
15859 unsigned MergeOpc = MergeOp.getOpcode();
15860
15861 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15862 return SDValue();
15863
15864 SDValue X = MergeOp->getOperand(1);
15865
15866 if (!MergeOp.hasOneUse())
15867 return SDValue();
15868
15869 // Passthru should be undef
15870 SDValue Passthru = N->getOperand(2);
15871 if (!Passthru.isUndef())
15872 return SDValue();
15873
15874 // Mask should be all ones
15875 SDValue Mask = N->getOperand(3);
15876 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15877 return SDValue();
15878
15879 // False value of MergeOp should be all zeros
15880 SDValue Z = MergeOp->getOperand(2);
15881
15882 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15883 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15884 Z = Z.getOperand(1);
15885
15886 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15887 return SDValue();
15888
15889 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15890 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15891 N->getFlags());
15892}
15893
15896 const RISCVSubtarget &Subtarget) {
15897 [[maybe_unused]] unsigned Opc = N->getOpcode();
15900
15901 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15902 return V;
15903
15904 return combineVWADDSUBWSelect(N, DCI.DAG);
15905}
15906
15907// Helper function for performMemPairCombine.
15908// Try to combine the memory loads/stores LSNode1 and LSNode2
15909// into a single memory pair operation.
15911 LSBaseSDNode *LSNode2, SDValue BasePtr,
15912 uint64_t Imm) {
15914 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15915
15916 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15917 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15918 return SDValue();
15919
15921 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15922
15923 // The new operation has twice the width.
15924 MVT XLenVT = Subtarget.getXLenVT();
15925 EVT MemVT = LSNode1->getMemoryVT();
15926 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15927 MachineMemOperand *MMO = LSNode1->getMemOperand();
15929 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15930
15931 if (LSNode1->getOpcode() == ISD::LOAD) {
15932 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15933 unsigned Opcode;
15934 if (MemVT == MVT::i32)
15935 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15936 else
15937 Opcode = RISCVISD::TH_LDD;
15938
15939 SDValue Res = DAG.getMemIntrinsicNode(
15940 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15941 {LSNode1->getChain(), BasePtr,
15942 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15943 NewMemVT, NewMMO);
15944
15945 SDValue Node1 =
15946 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15947 SDValue Node2 =
15948 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15949
15950 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15951 return Node1;
15952 } else {
15953 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15954
15955 SDValue Res = DAG.getMemIntrinsicNode(
15956 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15957 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15958 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15959 NewMemVT, NewMMO);
15960
15961 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15962 return Res;
15963 }
15964}
15965
15966// Try to combine two adjacent loads/stores to a single pair instruction from
15967// the XTHeadMemPair vendor extension.
15970 SelectionDAG &DAG = DCI.DAG;
15972 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15973
15974 // Target does not support load/store pair.
15975 if (!Subtarget.hasVendorXTHeadMemPair())
15976 return SDValue();
15977
15978 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15979 EVT MemVT = LSNode1->getMemoryVT();
15980 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15981
15982 // No volatile, indexed or atomic loads/stores.
15983 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15984 return SDValue();
15985
15986 // Function to get a base + constant representation from a memory value.
15987 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15988 if (Ptr->getOpcode() == ISD::ADD)
15989 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15990 return {Ptr->getOperand(0), C1->getZExtValue()};
15991 return {Ptr, 0};
15992 };
15993
15994 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15995
15996 SDValue Chain = N->getOperand(0);
15997 for (SDUse &Use : Chain->uses()) {
15998 if (Use.getUser() != N && Use.getResNo() == 0 &&
15999 Use.getUser()->getOpcode() == N->getOpcode()) {
16000 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16001
16002 // No volatile, indexed or atomic loads/stores.
16003 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16004 continue;
16005
16006 // Check if LSNode1 and LSNode2 have the same type and extension.
16007 if (LSNode1->getOpcode() == ISD::LOAD)
16008 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16009 cast<LoadSDNode>(LSNode1)->getExtensionType())
16010 continue;
16011
16012 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16013 continue;
16014
16015 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16016
16017 // Check if the base pointer is the same for both instruction.
16018 if (Base1 != Base2)
16019 continue;
16020
16021 // Check if the offsets match the XTHeadMemPair encoding contraints.
16022 bool Valid = false;
16023 if (MemVT == MVT::i32) {
16024 // Check for adjacent i32 values and a 2-bit index.
16025 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16026 Valid = true;
16027 } else if (MemVT == MVT::i64) {
16028 // Check for adjacent i64 values and a 2-bit index.
16029 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16030 Valid = true;
16031 }
16032
16033 if (!Valid)
16034 continue;
16035
16036 // Try to combine.
16037 if (SDValue Res =
16038 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16039 return Res;
16040 }
16041 }
16042
16043 return SDValue();
16044}
16045
16046// Fold
16047// (fp_to_int (froundeven X)) -> fcvt X, rne
16048// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16049// (fp_to_int (ffloor X)) -> fcvt X, rdn
16050// (fp_to_int (fceil X)) -> fcvt X, rup
16051// (fp_to_int (fround X)) -> fcvt X, rmm
16052// (fp_to_int (frint X)) -> fcvt X
16055 const RISCVSubtarget &Subtarget) {
16056 SelectionDAG &DAG = DCI.DAG;
16057 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16058 MVT XLenVT = Subtarget.getXLenVT();
16059
16060 SDValue Src = N->getOperand(0);
16061
16062 // Don't do this for strict-fp Src.
16063 if (Src->isStrictFPOpcode())
16064 return SDValue();
16065
16066 // Ensure the FP type is legal.
16067 if (!TLI.isTypeLegal(Src.getValueType()))
16068 return SDValue();
16069
16070 // Don't do this for f16 with Zfhmin and not Zfh.
16071 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16072 return SDValue();
16073
16074 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16075 // If the result is invalid, we didn't find a foldable instruction.
16076 if (FRM == RISCVFPRndMode::Invalid)
16077 return SDValue();
16078
16079 SDLoc DL(N);
16080 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16081 EVT VT = N->getValueType(0);
16082
16083 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16084 MVT SrcVT = Src.getSimpleValueType();
16085 MVT SrcContainerVT = SrcVT;
16086 MVT ContainerVT = VT.getSimpleVT();
16087 SDValue XVal = Src.getOperand(0);
16088
16089 // For widening and narrowing conversions we just combine it into a
16090 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16091 // end up getting lowered to their appropriate pseudo instructions based on
16092 // their operand types
16093 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16094 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16095 return SDValue();
16096
16097 // Make fixed-length vectors scalable first
16098 if (SrcVT.isFixedLengthVector()) {
16099 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16100 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16101 ContainerVT =
16102 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16103 }
16104
16105 auto [Mask, VL] =
16106 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16107
16108 SDValue FpToInt;
16109 if (FRM == RISCVFPRndMode::RTZ) {
16110 // Use the dedicated trunc static rounding mode if we're truncating so we
16111 // don't need to generate calls to fsrmi/fsrm
16112 unsigned Opc =
16114 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16115 } else {
16116 unsigned Opc =
16118 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16119 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16120 }
16121
16122 // If converted from fixed-length to scalable, convert back
16123 if (VT.isFixedLengthVector())
16124 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16125
16126 return FpToInt;
16127 }
16128
16129 // Only handle XLen or i32 types. Other types narrower than XLen will
16130 // eventually be legalized to XLenVT.
16131 if (VT != MVT::i32 && VT != XLenVT)
16132 return SDValue();
16133
16134 unsigned Opc;
16135 if (VT == XLenVT)
16136 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16137 else
16139
16140 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16141 DAG.getTargetConstant(FRM, DL, XLenVT));
16142 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16143}
16144
16145// Fold
16146// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16147// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16148// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16149// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16150// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16151// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16154 const RISCVSubtarget &Subtarget) {
16155 SelectionDAG &DAG = DCI.DAG;
16156 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16157 MVT XLenVT = Subtarget.getXLenVT();
16158
16159 // Only handle XLen types. Other types narrower than XLen will eventually be
16160 // legalized to XLenVT.
16161 EVT DstVT = N->getValueType(0);
16162 if (DstVT != XLenVT)
16163 return SDValue();
16164
16165 SDValue Src = N->getOperand(0);
16166
16167 // Don't do this for strict-fp Src.
16168 if (Src->isStrictFPOpcode())
16169 return SDValue();
16170
16171 // Ensure the FP type is also legal.
16172 if (!TLI.isTypeLegal(Src.getValueType()))
16173 return SDValue();
16174
16175 // Don't do this for f16 with Zfhmin and not Zfh.
16176 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16177 return SDValue();
16178
16179 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16180
16181 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16182 if (FRM == RISCVFPRndMode::Invalid)
16183 return SDValue();
16184
16185 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16186
16187 unsigned Opc;
16188 if (SatVT == DstVT)
16189 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16190 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16192 else
16193 return SDValue();
16194 // FIXME: Support other SatVTs by clamping before or after the conversion.
16195
16196 Src = Src.getOperand(0);
16197
16198 SDLoc DL(N);
16199 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16200 DAG.getTargetConstant(FRM, DL, XLenVT));
16201
16202 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16203 // extend.
16204 if (Opc == RISCVISD::FCVT_WU_RV64)
16205 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16206
16207 // RISC-V FP-to-int conversions saturate to the destination register size, but
16208 // don't produce 0 for nan.
16209 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16210 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16211}
16212
16213// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16214// smaller than XLenVT.
16216 const RISCVSubtarget &Subtarget) {
16217 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16218
16219 SDValue Src = N->getOperand(0);
16220 if (Src.getOpcode() != ISD::BSWAP)
16221 return SDValue();
16222
16223 EVT VT = N->getValueType(0);
16224 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16225 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16226 return SDValue();
16227
16228 SDLoc DL(N);
16229 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16230}
16231
16233 const RISCVSubtarget &Subtarget) {
16234 // Fold:
16235 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16236
16237 // Check if its first operand is a vp.load.
16238 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16239 if (!VPLoad)
16240 return SDValue();
16241
16242 EVT LoadVT = VPLoad->getValueType(0);
16243 // We do not have a strided_load version for masks, and the evl of vp.reverse
16244 // and vp.load should always be the same.
16245 if (!LoadVT.getVectorElementType().isByteSized() ||
16246 N->getOperand(2) != VPLoad->getVectorLength() ||
16247 !N->getOperand(0).hasOneUse())
16248 return SDValue();
16249
16250 // Check if the mask of outer vp.reverse are all 1's.
16251 if (!isOneOrOneSplat(N->getOperand(1)))
16252 return SDValue();
16253
16254 SDValue LoadMask = VPLoad->getMask();
16255 // If Mask is all ones, then load is unmasked and can be reversed.
16256 if (!isOneOrOneSplat(LoadMask)) {
16257 // If the mask is not all ones, we can reverse the load if the mask was also
16258 // reversed by an unmasked vp.reverse with the same EVL.
16259 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16260 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16261 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16262 return SDValue();
16263 LoadMask = LoadMask.getOperand(0);
16264 }
16265
16266 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16267 SDLoc DL(N);
16268 MVT XLenVT = Subtarget.getXLenVT();
16269 SDValue NumElem = VPLoad->getVectorLength();
16270 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16271
16272 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16273 DAG.getConstant(1, DL, XLenVT));
16274 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16275 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16276 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16277 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16278
16280 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16282 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16283 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16284
16285 SDValue Ret = DAG.getStridedLoadVP(
16286 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16287 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16288
16289 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16290
16291 return Ret;
16292}
16293
16294// Convert from one FMA opcode to another based on whether we are negating the
16295// multiply result and/or the accumulator.
16296// NOTE: Only supports RVV operations with VL.
16297static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16298 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16299 if (NegMul) {
16300 // clang-format off
16301 switch (Opcode) {
16302 default: llvm_unreachable("Unexpected opcode");
16303 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16304 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16305 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16306 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16311 }
16312 // clang-format on
16313 }
16314
16315 // Negating the accumulator changes ADD<->SUB.
16316 if (NegAcc) {
16317 // clang-format off
16318 switch (Opcode) {
16319 default: llvm_unreachable("Unexpected opcode");
16320 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16321 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16322 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16323 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16328 }
16329 // clang-format on
16330 }
16331
16332 return Opcode;
16333}
16334
16336 // Fold FNEG_VL into FMA opcodes.
16337 // The first operand of strict-fp is chain.
16338 bool IsStrict =
16339 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16340 unsigned Offset = IsStrict ? 1 : 0;
16341 SDValue A = N->getOperand(0 + Offset);
16342 SDValue B = N->getOperand(1 + Offset);
16343 SDValue C = N->getOperand(2 + Offset);
16344 SDValue Mask = N->getOperand(3 + Offset);
16345 SDValue VL = N->getOperand(4 + Offset);
16346
16347 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16348 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16349 V.getOperand(2) == VL) {
16350 // Return the negated input.
16351 V = V.getOperand(0);
16352 return true;
16353 }
16354
16355 return false;
16356 };
16357
16358 bool NegA = invertIfNegative(A);
16359 bool NegB = invertIfNegative(B);
16360 bool NegC = invertIfNegative(C);
16361
16362 // If no operands are negated, we're done.
16363 if (!NegA && !NegB && !NegC)
16364 return SDValue();
16365
16366 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16367 if (IsStrict)
16368 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16369 {N->getOperand(0), A, B, C, Mask, VL});
16370 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16371 VL);
16372}
16373
16376 const RISCVSubtarget &Subtarget) {
16377 SelectionDAG &DAG = DCI.DAG;
16378
16380 return V;
16381
16382 // FIXME: Ignore strict opcodes for now.
16383 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16384 return SDValue();
16385
16386 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16387}
16388
16390 const RISCVSubtarget &Subtarget) {
16391 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16392
16393 EVT VT = N->getValueType(0);
16394
16395 if (VT != Subtarget.getXLenVT())
16396 return SDValue();
16397
16398 if (!isa<ConstantSDNode>(N->getOperand(1)))
16399 return SDValue();
16400 uint64_t ShAmt = N->getConstantOperandVal(1);
16401
16402 SDValue N0 = N->getOperand(0);
16403
16404 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16405 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16406 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16407 unsigned ExtSize =
16408 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16409 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16410 N0.getOperand(0).hasOneUse() &&
16411 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16412 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16413 if (LShAmt < ExtSize) {
16414 unsigned Size = VT.getSizeInBits();
16415 SDLoc ShlDL(N0.getOperand(0));
16416 SDValue Shl =
16417 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16418 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16419 SDLoc DL(N);
16420 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16421 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16422 }
16423 }
16424 }
16425
16426 if (ShAmt > 32 || VT != MVT::i64)
16427 return SDValue();
16428
16429 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16430 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16431 //
16432 // Also try these folds where an add or sub is in the middle.
16433 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16434 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16435 SDValue Shl;
16436 ConstantSDNode *AddC = nullptr;
16437
16438 // We might have an ADD or SUB between the SRA and SHL.
16439 bool IsAdd = N0.getOpcode() == ISD::ADD;
16440 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16441 // Other operand needs to be a constant we can modify.
16442 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16443 if (!AddC)
16444 return SDValue();
16445
16446 // AddC needs to have at least 32 trailing zeros.
16447 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16448 return SDValue();
16449
16450 // All users should be a shift by constant less than or equal to 32. This
16451 // ensures we'll do this optimization for each of them to produce an
16452 // add/sub+sext_inreg they can all share.
16453 for (SDNode *U : N0->users()) {
16454 if (U->getOpcode() != ISD::SRA ||
16455 !isa<ConstantSDNode>(U->getOperand(1)) ||
16456 U->getConstantOperandVal(1) > 32)
16457 return SDValue();
16458 }
16459
16460 Shl = N0.getOperand(IsAdd ? 0 : 1);
16461 } else {
16462 // Not an ADD or SUB.
16463 Shl = N0;
16464 }
16465
16466 // Look for a shift left by 32.
16467 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16468 Shl.getConstantOperandVal(1) != 32)
16469 return SDValue();
16470
16471 // We if we didn't look through an add/sub, then the shl should have one use.
16472 // If we did look through an add/sub, the sext_inreg we create is free so
16473 // we're only creating 2 new instructions. It's enough to only remove the
16474 // original sra+add/sub.
16475 if (!AddC && !Shl.hasOneUse())
16476 return SDValue();
16477
16478 SDLoc DL(N);
16479 SDValue In = Shl.getOperand(0);
16480
16481 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16482 // constant.
16483 if (AddC) {
16484 SDValue ShiftedAddC =
16485 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16486 if (IsAdd)
16487 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16488 else
16489 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16490 }
16491
16492 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16493 DAG.getValueType(MVT::i32));
16494 if (ShAmt == 32)
16495 return SExt;
16496
16497 return DAG.getNode(
16498 ISD::SHL, DL, MVT::i64, SExt,
16499 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16500}
16501
16502// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16503// the result is used as the conditon of a br_cc or select_cc we can invert,
16504// inverting the setcc is free, and Z is 0/1. Caller will invert the
16505// br_cc/select_cc.
16507 bool IsAnd = Cond.getOpcode() == ISD::AND;
16508 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16509 return SDValue();
16510
16511 if (!Cond.hasOneUse())
16512 return SDValue();
16513
16514 SDValue Setcc = Cond.getOperand(0);
16515 SDValue Xor = Cond.getOperand(1);
16516 // Canonicalize setcc to LHS.
16517 if (Setcc.getOpcode() != ISD::SETCC)
16518 std::swap(Setcc, Xor);
16519 // LHS should be a setcc and RHS should be an xor.
16520 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16521 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16522 return SDValue();
16523
16524 // If the condition is an And, SimplifyDemandedBits may have changed
16525 // (xor Z, 1) to (not Z).
16526 SDValue Xor1 = Xor.getOperand(1);
16527 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16528 return SDValue();
16529
16530 EVT VT = Cond.getValueType();
16531 SDValue Xor0 = Xor.getOperand(0);
16532
16533 // The LHS of the xor needs to be 0/1.
16535 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16536 return SDValue();
16537
16538 // We can only invert integer setccs.
16539 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16540 if (!SetCCOpVT.isScalarInteger())
16541 return SDValue();
16542
16543 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16544 if (ISD::isIntEqualitySetCC(CCVal)) {
16545 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16546 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16547 Setcc.getOperand(1), CCVal);
16548 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16549 // Invert (setlt 0, X) by converting to (setlt X, 1).
16550 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16551 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16552 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16553 // (setlt X, 1) by converting to (setlt 0, X).
16554 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16555 DAG.getConstant(0, SDLoc(Setcc), VT),
16556 Setcc.getOperand(0), CCVal);
16557 } else
16558 return SDValue();
16559
16560 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16561 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16562}
16563
16564// Perform common combines for BR_CC and SELECT_CC condtions.
16565static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16566 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16567 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16568
16569 // As far as arithmetic right shift always saves the sign,
16570 // shift can be omitted.
16571 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16572 // setge (sra X, N), 0 -> setge X, 0
16573 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16574 LHS.getOpcode() == ISD::SRA) {
16575 LHS = LHS.getOperand(0);
16576 return true;
16577 }
16578
16579 if (!ISD::isIntEqualitySetCC(CCVal))
16580 return false;
16581
16582 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16583 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16584 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16585 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16586 // If we're looking for eq 0 instead of ne 0, we need to invert the
16587 // condition.
16588 bool Invert = CCVal == ISD::SETEQ;
16589 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16590 if (Invert)
16591 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16592
16593 RHS = LHS.getOperand(1);
16594 LHS = LHS.getOperand(0);
16595 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16596
16597 CC = DAG.getCondCode(CCVal);
16598 return true;
16599 }
16600
16601 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16602 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16603 RHS = LHS.getOperand(1);
16604 LHS = LHS.getOperand(0);
16605 return true;
16606 }
16607
16608 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16609 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16610 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16611 SDValue LHS0 = LHS.getOperand(0);
16612 if (LHS0.getOpcode() == ISD::AND &&
16613 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16614 uint64_t Mask = LHS0.getConstantOperandVal(1);
16615 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16616 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16617 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16618 CC = DAG.getCondCode(CCVal);
16619
16620 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16621 LHS = LHS0.getOperand(0);
16622 if (ShAmt != 0)
16623 LHS =
16624 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16625 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16626 return true;
16627 }
16628 }
16629 }
16630
16631 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16632 // This can occur when legalizing some floating point comparisons.
16633 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16634 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16635 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16636 CC = DAG.getCondCode(CCVal);
16637 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16638 return true;
16639 }
16640
16641 if (isNullConstant(RHS)) {
16642 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16643 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16644 CC = DAG.getCondCode(CCVal);
16645 LHS = NewCond;
16646 return true;
16647 }
16648 }
16649
16650 return false;
16651}
16652
16653// Fold
16654// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16655// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16656// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16657// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16659 SDValue TrueVal, SDValue FalseVal,
16660 bool Swapped) {
16661 bool Commutative = true;
16662 unsigned Opc = TrueVal.getOpcode();
16663 switch (Opc) {
16664 default:
16665 return SDValue();
16666 case ISD::SHL:
16667 case ISD::SRA:
16668 case ISD::SRL:
16669 case ISD::SUB:
16670 Commutative = false;
16671 break;
16672 case ISD::ADD:
16673 case ISD::OR:
16674 case ISD::XOR:
16675 break;
16676 }
16677
16678 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16679 return SDValue();
16680
16681 unsigned OpToFold;
16682 if (FalseVal == TrueVal.getOperand(0))
16683 OpToFold = 0;
16684 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16685 OpToFold = 1;
16686 else
16687 return SDValue();
16688
16689 EVT VT = N->getValueType(0);
16690 SDLoc DL(N);
16691 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16692 EVT OtherOpVT = OtherOp.getValueType();
16693 SDValue IdentityOperand =
16694 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16695 if (!Commutative)
16696 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16697 assert(IdentityOperand && "No identity operand!");
16698
16699 if (Swapped)
16700 std::swap(OtherOp, IdentityOperand);
16701 SDValue NewSel =
16702 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16703 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16704}
16705
16706// This tries to get rid of `select` and `icmp` that are being used to handle
16707// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16709 SDValue Cond = N->getOperand(0);
16710
16711 // This represents either CTTZ or CTLZ instruction.
16712 SDValue CountZeroes;
16713
16714 SDValue ValOnZero;
16715
16716 if (Cond.getOpcode() != ISD::SETCC)
16717 return SDValue();
16718
16719 if (!isNullConstant(Cond->getOperand(1)))
16720 return SDValue();
16721
16722 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16723 if (CCVal == ISD::CondCode::SETEQ) {
16724 CountZeroes = N->getOperand(2);
16725 ValOnZero = N->getOperand(1);
16726 } else if (CCVal == ISD::CondCode::SETNE) {
16727 CountZeroes = N->getOperand(1);
16728 ValOnZero = N->getOperand(2);
16729 } else {
16730 return SDValue();
16731 }
16732
16733 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16734 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16735 CountZeroes = CountZeroes.getOperand(0);
16736
16737 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16738 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16739 CountZeroes.getOpcode() != ISD::CTLZ &&
16740 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16741 return SDValue();
16742
16743 if (!isNullConstant(ValOnZero))
16744 return SDValue();
16745
16746 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16747 if (Cond->getOperand(0) != CountZeroesArgument)
16748 return SDValue();
16749
16750 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16751 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16752 CountZeroes.getValueType(), CountZeroesArgument);
16753 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16754 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16755 CountZeroes.getValueType(), CountZeroesArgument);
16756 }
16757
16758 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16759 SDValue BitWidthMinusOne =
16760 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16761
16762 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16763 CountZeroes, BitWidthMinusOne);
16764 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16765}
16766
16768 const RISCVSubtarget &Subtarget) {
16769 SDValue Cond = N->getOperand(0);
16770 SDValue True = N->getOperand(1);
16771 SDValue False = N->getOperand(2);
16772 SDLoc DL(N);
16773 EVT VT = N->getValueType(0);
16774 EVT CondVT = Cond.getValueType();
16775
16776 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16777 return SDValue();
16778
16779 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16780 // BEXTI, where C is power of 2.
16781 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16782 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16783 SDValue LHS = Cond.getOperand(0);
16784 SDValue RHS = Cond.getOperand(1);
16785 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16786 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16787 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16788 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16789 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16790 return DAG.getSelect(DL, VT,
16791 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16792 False, True);
16793 }
16794 }
16795 return SDValue();
16796}
16797
16799 const RISCVSubtarget &Subtarget) {
16800 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16801 return Folded;
16802
16803 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16804 return V;
16805
16806 if (Subtarget.hasConditionalMoveFusion())
16807 return SDValue();
16808
16809 SDValue TrueVal = N->getOperand(1);
16810 SDValue FalseVal = N->getOperand(2);
16811 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16812 return V;
16813 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16814}
16815
16816/// If we have a build_vector where each lane is binop X, C, where C
16817/// is a constant (but not necessarily the same constant on all lanes),
16818/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16819/// We assume that materializing a constant build vector will be no more
16820/// expensive that performing O(n) binops.
16822 const RISCVSubtarget &Subtarget,
16823 const RISCVTargetLowering &TLI) {
16824 SDLoc DL(N);
16825 EVT VT = N->getValueType(0);
16826
16827 assert(!VT.isScalableVector() && "unexpected build vector");
16828
16829 if (VT.getVectorNumElements() == 1)
16830 return SDValue();
16831
16832 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16833 if (!TLI.isBinOp(Opcode))
16834 return SDValue();
16835
16836 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16837 return SDValue();
16838
16839 // This BUILD_VECTOR involves an implicit truncation, and sinking
16840 // truncates through binops is non-trivial.
16841 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16842 return SDValue();
16843
16844 SmallVector<SDValue> LHSOps;
16845 SmallVector<SDValue> RHSOps;
16846 for (SDValue Op : N->ops()) {
16847 if (Op.isUndef()) {
16848 // We can't form a divide or remainder from undef.
16849 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16850 return SDValue();
16851
16852 LHSOps.push_back(Op);
16853 RHSOps.push_back(Op);
16854 continue;
16855 }
16856
16857 // TODO: We can handle operations which have an neutral rhs value
16858 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16859 // of profit in a more explicit manner.
16860 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16861 return SDValue();
16862
16863 LHSOps.push_back(Op.getOperand(0));
16864 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16865 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16866 return SDValue();
16867 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16868 // have different LHS and RHS types.
16869 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16870 return SDValue();
16871
16872 RHSOps.push_back(Op.getOperand(1));
16873 }
16874
16875 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16876 DAG.getBuildVector(VT, DL, RHSOps));
16877}
16878
16880 const RISCVSubtarget &Subtarget,
16881 const RISCVTargetLowering &TLI) {
16882 SDValue InVec = N->getOperand(0);
16883 SDValue InVal = N->getOperand(1);
16884 SDValue EltNo = N->getOperand(2);
16885 SDLoc DL(N);
16886
16887 EVT VT = InVec.getValueType();
16888 if (VT.isScalableVector())
16889 return SDValue();
16890
16891 if (!InVec.hasOneUse())
16892 return SDValue();
16893
16894 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16895 // move the insert_vector_elts into the arms of the binop. Note that
16896 // the new RHS must be a constant.
16897 const unsigned InVecOpcode = InVec->getOpcode();
16898 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16899 InVal.hasOneUse()) {
16900 SDValue InVecLHS = InVec->getOperand(0);
16901 SDValue InVecRHS = InVec->getOperand(1);
16902 SDValue InValLHS = InVal->getOperand(0);
16903 SDValue InValRHS = InVal->getOperand(1);
16904
16906 return SDValue();
16907 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16908 return SDValue();
16909 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16910 // have different LHS and RHS types.
16911 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16912 return SDValue();
16914 InVecLHS, InValLHS, EltNo);
16916 InVecRHS, InValRHS, EltNo);
16917 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16918 }
16919
16920 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16921 // move the insert_vector_elt to the source operand of the concat_vector.
16922 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16923 return SDValue();
16924
16925 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16926 if (!IndexC)
16927 return SDValue();
16928 unsigned Elt = IndexC->getZExtValue();
16929
16930 EVT ConcatVT = InVec.getOperand(0).getValueType();
16931 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16932 return SDValue();
16933 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16934 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16935
16936 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16937 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16938 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16939 ConcatOp, InVal, NewIdx);
16940
16941 SmallVector<SDValue> ConcatOps;
16942 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16943 ConcatOps[ConcatOpIdx] = ConcatOp;
16944 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16945}
16946
16947// If we're concatenating a series of vector loads like
16948// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16949// Then we can turn this into a strided load by widening the vector elements
16950// vlse32 p, stride=n
16952 const RISCVSubtarget &Subtarget,
16953 const RISCVTargetLowering &TLI) {
16954 SDLoc DL(N);
16955 EVT VT = N->getValueType(0);
16956
16957 // Only perform this combine on legal MVTs.
16958 if (!TLI.isTypeLegal(VT))
16959 return SDValue();
16960
16961 // TODO: Potentially extend this to scalable vectors
16962 if (VT.isScalableVector())
16963 return SDValue();
16964
16965 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16966 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16967 !SDValue(BaseLd, 0).hasOneUse())
16968 return SDValue();
16969
16970 EVT BaseLdVT = BaseLd->getValueType(0);
16971
16972 // Go through the loads and check that they're strided
16974 Lds.push_back(BaseLd);
16975 Align Align = BaseLd->getAlign();
16976 for (SDValue Op : N->ops().drop_front()) {
16977 auto *Ld = dyn_cast<LoadSDNode>(Op);
16978 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16979 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16980 Ld->getValueType(0) != BaseLdVT)
16981 return SDValue();
16982
16983 Lds.push_back(Ld);
16984
16985 // The common alignment is the most restrictive (smallest) of all the loads
16986 Align = std::min(Align, Ld->getAlign());
16987 }
16988
16989 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16990 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16991 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16992 // If the load ptrs can be decomposed into a common (Base + Index) with a
16993 // common constant stride, then return the constant stride.
16994 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16995 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16996 if (BIO1.equalBaseIndex(BIO2, DAG))
16997 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16998
16999 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17000 SDValue P1 = Ld1->getBasePtr();
17001 SDValue P2 = Ld2->getBasePtr();
17002 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17003 return {{P2.getOperand(1), false}};
17004 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17005 return {{P1.getOperand(1), true}};
17006
17007 return std::nullopt;
17008 };
17009
17010 // Get the distance between the first and second loads
17011 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17012 if (!BaseDiff)
17013 return SDValue();
17014
17015 // Check all the loads are the same distance apart
17016 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17017 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17018 return SDValue();
17019
17020 // TODO: At this point, we've successfully matched a generalized gather
17021 // load. Maybe we should emit that, and then move the specialized
17022 // matchers above and below into a DAG combine?
17023
17024 // Get the widened scalar type, e.g. v4i8 -> i64
17025 unsigned WideScalarBitWidth =
17026 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17027 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17028
17029 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17030 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17031 if (!TLI.isTypeLegal(WideVecVT))
17032 return SDValue();
17033
17034 // Check that the operation is legal
17035 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17036 return SDValue();
17037
17038 auto [StrideVariant, MustNegateStride] = *BaseDiff;
17039 SDValue Stride =
17040 std::holds_alternative<SDValue>(StrideVariant)
17041 ? std::get<SDValue>(StrideVariant)
17042 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17043 Lds[0]->getOffset().getValueType());
17044 if (MustNegateStride)
17045 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17046
17047 SDValue AllOneMask =
17048 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17049 DAG.getConstant(1, DL, MVT::i1));
17050
17051 uint64_t MemSize;
17052 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17053 ConstStride && ConstStride->getSExtValue() >= 0)
17054 // total size = (elsize * n) + (stride - elsize) * (n-1)
17055 // = elsize + stride * (n-1)
17056 MemSize = WideScalarVT.getSizeInBits() +
17057 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17058 else
17059 // If Stride isn't constant, then we can't know how much it will load
17061
17063 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17064 Align);
17065
17066 SDValue StridedLoad = DAG.getStridedLoadVP(
17067 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17068 AllOneMask,
17069 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17070
17071 for (SDValue Ld : N->ops())
17072 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17073
17074 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17075}
17076
17078 const RISCVSubtarget &Subtarget) {
17079
17080 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17081
17082 if (N->getValueType(0).isFixedLengthVector())
17083 return SDValue();
17084
17085 SDValue Addend = N->getOperand(0);
17086 SDValue MulOp = N->getOperand(1);
17087
17088 if (N->getOpcode() == RISCVISD::ADD_VL) {
17089 SDValue AddPassthruOp = N->getOperand(2);
17090 if (!AddPassthruOp.isUndef())
17091 return SDValue();
17092 }
17093
17094 auto IsVWMulOpc = [](unsigned Opc) {
17095 switch (Opc) {
17096 case RISCVISD::VWMUL_VL:
17099 return true;
17100 default:
17101 return false;
17102 }
17103 };
17104
17105 if (!IsVWMulOpc(MulOp.getOpcode()))
17106 std::swap(Addend, MulOp);
17107
17108 if (!IsVWMulOpc(MulOp.getOpcode()))
17109 return SDValue();
17110
17111 SDValue MulPassthruOp = MulOp.getOperand(2);
17112
17113 if (!MulPassthruOp.isUndef())
17114 return SDValue();
17115
17116 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17117 const RISCVSubtarget &Subtarget) {
17118 if (N->getOpcode() == ISD::ADD) {
17119 SDLoc DL(N);
17120 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17121 Subtarget);
17122 }
17123 return std::make_pair(N->getOperand(3), N->getOperand(4));
17124 }(N, DAG, Subtarget);
17125
17126 SDValue MulMask = MulOp.getOperand(3);
17127 SDValue MulVL = MulOp.getOperand(4);
17128
17129 if (AddMask != MulMask || AddVL != MulVL)
17130 return SDValue();
17131
17132 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17133 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17134 "Unexpected opcode after VWMACC_VL");
17135 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17136 "Unexpected opcode after VWMACC_VL!");
17137 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17138 "Unexpected opcode after VWMUL_VL!");
17139 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17140 "Unexpected opcode after VWMUL_VL!");
17141
17142 SDLoc DL(N);
17143 EVT VT = N->getValueType(0);
17144 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17145 AddVL};
17146 return DAG.getNode(Opc, DL, VT, Ops);
17147}
17148
17150 ISD::MemIndexType &IndexType,
17152 if (!DCI.isBeforeLegalize())
17153 return false;
17154
17155 SelectionDAG &DAG = DCI.DAG;
17156 const MVT XLenVT =
17157 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17158
17159 const EVT IndexVT = Index.getValueType();
17160
17161 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17162 // mode, so anything else must be manually legalized.
17163 if (!isIndexTypeSigned(IndexType))
17164 return false;
17165
17166 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17167 // Any index legalization should first promote to XLenVT, so we don't lose
17168 // bits when scaling. This may create an illegal index type so we let
17169 // LLVM's legalization take care of the splitting.
17170 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17171 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17172 IndexVT.changeVectorElementType(XLenVT), Index);
17173 }
17174 IndexType = ISD::UNSIGNED_SCALED;
17175 return true;
17176}
17177
17178/// Match the index vector of a scatter or gather node as the shuffle mask
17179/// which performs the rearrangement if possible. Will only match if
17180/// all lanes are touched, and thus replacing the scatter or gather with
17181/// a unit strided access and shuffle is legal.
17182static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17183 SmallVector<int> &ShuffleMask) {
17184 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17185 return false;
17186 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17187 return false;
17188
17189 const unsigned ElementSize = VT.getScalarStoreSize();
17190 const unsigned NumElems = VT.getVectorNumElements();
17191
17192 // Create the shuffle mask and check all bits active
17193 assert(ShuffleMask.empty());
17194 BitVector ActiveLanes(NumElems);
17195 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17196 // TODO: We've found an active bit of UB, and could be
17197 // more aggressive here if desired.
17198 if (Index->getOperand(i)->isUndef())
17199 return false;
17200 uint64_t C = Index->getConstantOperandVal(i);
17201 if (C % ElementSize != 0)
17202 return false;
17203 C = C / ElementSize;
17204 if (C >= NumElems)
17205 return false;
17206 ShuffleMask.push_back(C);
17207 ActiveLanes.set(C);
17208 }
17209 return ActiveLanes.all();
17210}
17211
17212/// Match the index of a gather or scatter operation as an operation
17213/// with twice the element width and half the number of elements. This is
17214/// generally profitable (if legal) because these operations are linear
17215/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17216/// come out ahead.
17217static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17218 Align BaseAlign, const RISCVSubtarget &ST) {
17219 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17220 return false;
17221 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17222 return false;
17223
17224 // Attempt a doubling. If we can use a element type 4x or 8x in
17225 // size, this will happen via multiply iterations of the transform.
17226 const unsigned NumElems = VT.getVectorNumElements();
17227 if (NumElems % 2 != 0)
17228 return false;
17229
17230 const unsigned ElementSize = VT.getScalarStoreSize();
17231 const unsigned WiderElementSize = ElementSize * 2;
17232 if (WiderElementSize > ST.getELen()/8)
17233 return false;
17234
17235 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17236 return false;
17237
17238 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17239 // TODO: We've found an active bit of UB, and could be
17240 // more aggressive here if desired.
17241 if (Index->getOperand(i)->isUndef())
17242 return false;
17243 // TODO: This offset check is too strict if we support fully
17244 // misaligned memory operations.
17245 uint64_t C = Index->getConstantOperandVal(i);
17246 if (i % 2 == 0) {
17247 if (C % WiderElementSize != 0)
17248 return false;
17249 continue;
17250 }
17251 uint64_t Last = Index->getConstantOperandVal(i-1);
17252 if (C != Last + ElementSize)
17253 return false;
17254 }
17255 return true;
17256}
17257
17258// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17259// This would be benefit for the cases where X and Y are both the same value
17260// type of low precision vectors. Since the truncate would be lowered into
17261// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17262// restriction, such pattern would be expanded into a series of "vsetvli"
17263// and "vnsrl" instructions later to reach this point.
17265 SDValue Mask = N->getOperand(1);
17266 SDValue VL = N->getOperand(2);
17267
17268 bool IsVLMAX = isAllOnesConstant(VL) ||
17269 (isa<RegisterSDNode>(VL) &&
17270 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17271 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17272 Mask.getOperand(0) != VL)
17273 return SDValue();
17274
17275 auto IsTruncNode = [&](SDValue V) {
17276 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17277 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17278 };
17279
17280 SDValue Op = N->getOperand(0);
17281
17282 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17283 // to distinguish such pattern.
17284 while (IsTruncNode(Op)) {
17285 if (!Op.hasOneUse())
17286 return SDValue();
17287 Op = Op.getOperand(0);
17288 }
17289
17290 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17291 return SDValue();
17292
17293 SDValue N0 = Op.getOperand(0);
17294 SDValue N1 = Op.getOperand(1);
17295 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17296 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17297 return SDValue();
17298
17299 SDValue N00 = N0.getOperand(0);
17300 SDValue N10 = N1.getOperand(0);
17301 if (!N00.getValueType().isVector() ||
17302 N00.getValueType() != N10.getValueType() ||
17303 N->getValueType(0) != N10.getValueType())
17304 return SDValue();
17305
17306 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17307 SDValue SMin =
17308 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17309 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17310 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17311}
17312
17313// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17314// maximum value for the truncated type.
17315// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17316// is the signed maximum value for the truncated type and C2 is the signed
17317// minimum value.
17319 const RISCVSubtarget &Subtarget) {
17320 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17321
17322 MVT VT = N->getSimpleValueType(0);
17323
17324 SDValue Mask = N->getOperand(1);
17325 SDValue VL = N->getOperand(2);
17326
17327 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17328 APInt &SplatVal) {
17329 if (V.getOpcode() != Opc &&
17330 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17331 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17332 return SDValue();
17333
17334 SDValue Op = V.getOperand(1);
17335
17336 // Peek through conversion between fixed and scalable vectors.
17337 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17338 isNullConstant(Op.getOperand(2)) &&
17339 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17340 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17341 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17342 isNullConstant(Op.getOperand(1).getOperand(1)))
17343 Op = Op.getOperand(1).getOperand(0);
17344
17345 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17346 return V.getOperand(0);
17347
17348 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17349 Op.getOperand(2) == VL) {
17350 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17351 SplatVal =
17352 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17353 return V.getOperand(0);
17354 }
17355 }
17356
17357 return SDValue();
17358 };
17359
17360 SDLoc DL(N);
17361
17362 auto DetectUSatPattern = [&](SDValue V) {
17363 APInt LoC, HiC;
17364
17365 // Simple case, V is a UMIN.
17366 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17367 if (HiC.isMask(VT.getScalarSizeInBits()))
17368 return UMinOp;
17369
17370 // If we have an SMAX that removes negative numbers first, then we can match
17371 // SMIN instead of UMIN.
17372 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17373 if (SDValue SMaxOp =
17374 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17375 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17376 return SMinOp;
17377
17378 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17379 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17380 // first.
17381 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17382 if (SDValue SMinOp =
17383 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17384 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17385 HiC.uge(LoC))
17386 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17387 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17388 Mask, VL);
17389
17390 return SDValue();
17391 };
17392
17393 auto DetectSSatPattern = [&](SDValue V) {
17394 unsigned NumDstBits = VT.getScalarSizeInBits();
17395 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17396 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17397 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17398
17399 APInt HiC, LoC;
17400 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17401 if (SDValue SMaxOp =
17402 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17403 if (HiC == SignedMax && LoC == SignedMin)
17404 return SMaxOp;
17405
17406 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17407 if (SDValue SMinOp =
17408 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17409 if (HiC == SignedMax && LoC == SignedMin)
17410 return SMinOp;
17411
17412 return SDValue();
17413 };
17414
17415 SDValue Src = N->getOperand(0);
17416
17417 // Look through multiple layers of truncates.
17418 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17419 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17420 Src.hasOneUse())
17421 Src = Src.getOperand(0);
17422
17423 SDValue Val;
17424 unsigned ClipOpc;
17425 if ((Val = DetectUSatPattern(Src)))
17427 else if ((Val = DetectSSatPattern(Src)))
17429 else
17430 return SDValue();
17431
17432 MVT ValVT = Val.getSimpleValueType();
17433
17434 do {
17435 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17436 ValVT = ValVT.changeVectorElementType(ValEltVT);
17437 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17438 } while (ValVT != VT);
17439
17440 return Val;
17441}
17442
17443// Convert
17444// (iX ctpop (bitcast (vXi1 A)))
17445// ->
17446// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17447// FIXME: It's complicated to match all the variations of this after type
17448// legalization so we only handle the pre-type legalization pattern, but that
17449// requires the fixed vector type to be legal.
17451 const RISCVSubtarget &Subtarget) {
17452 EVT VT = N->getValueType(0);
17453 if (!VT.isScalarInteger())
17454 return SDValue();
17455
17456 SDValue Src = N->getOperand(0);
17457
17458 // Peek through zero_extend. It doesn't change the count.
17459 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17460 Src = Src.getOperand(0);
17461
17462 if (Src.getOpcode() != ISD::BITCAST)
17463 return SDValue();
17464
17465 Src = Src.getOperand(0);
17466 EVT SrcEVT = Src.getValueType();
17467 if (!SrcEVT.isSimple())
17468 return SDValue();
17469
17470 MVT SrcMVT = SrcEVT.getSimpleVT();
17471 // Make sure the input is an i1 vector.
17472 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17473 return SDValue();
17474
17475 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17476 return SDValue();
17477
17478 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17479 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17480
17481 SDLoc DL(N);
17482 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17483
17484 MVT XLenVT = Subtarget.getXLenVT();
17485 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17486 return DAG.getZExtOrTrunc(Pop, DL, VT);
17487}
17488
17490 DAGCombinerInfo &DCI) const {
17491 SelectionDAG &DAG = DCI.DAG;
17492 const MVT XLenVT = Subtarget.getXLenVT();
17493 SDLoc DL(N);
17494
17495 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17496 // bits are demanded. N will be added to the Worklist if it was not deleted.
17497 // Caller should return SDValue(N, 0) if this returns true.
17498 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17499 SDValue Op = N->getOperand(OpNo);
17500 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17501 if (!SimplifyDemandedBits(Op, Mask, DCI))
17502 return false;
17503
17504 if (N->getOpcode() != ISD::DELETED_NODE)
17505 DCI.AddToWorklist(N);
17506 return true;
17507 };
17508
17509 switch (N->getOpcode()) {
17510 default:
17511 break;
17512 case RISCVISD::SplitF64: {
17513 SDValue Op0 = N->getOperand(0);
17514 // If the input to SplitF64 is just BuildPairF64 then the operation is
17515 // redundant. Instead, use BuildPairF64's operands directly.
17516 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17517 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17518
17519 if (Op0->isUndef()) {
17520 SDValue Lo = DAG.getUNDEF(MVT::i32);
17521 SDValue Hi = DAG.getUNDEF(MVT::i32);
17522 return DCI.CombineTo(N, Lo, Hi);
17523 }
17524
17525 // It's cheaper to materialise two 32-bit integers than to load a double
17526 // from the constant pool and transfer it to integer registers through the
17527 // stack.
17528 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17529 APInt V = C->getValueAPF().bitcastToAPInt();
17530 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17531 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17532 return DCI.CombineTo(N, Lo, Hi);
17533 }
17534
17535 // This is a target-specific version of a DAGCombine performed in
17536 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17537 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17538 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17539 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17540 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17541 break;
17542 SDValue NewSplitF64 =
17543 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17544 Op0.getOperand(0));
17545 SDValue Lo = NewSplitF64.getValue(0);
17546 SDValue Hi = NewSplitF64.getValue(1);
17547 APInt SignBit = APInt::getSignMask(32);
17548 if (Op0.getOpcode() == ISD::FNEG) {
17549 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17550 DAG.getConstant(SignBit, DL, MVT::i32));
17551 return DCI.CombineTo(N, Lo, NewHi);
17552 }
17553 assert(Op0.getOpcode() == ISD::FABS);
17554 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17555 DAG.getConstant(~SignBit, DL, MVT::i32));
17556 return DCI.CombineTo(N, Lo, NewHi);
17557 }
17558 case RISCVISD::SLLW:
17559 case RISCVISD::SRAW:
17560 case RISCVISD::SRLW:
17561 case RISCVISD::RORW:
17562 case RISCVISD::ROLW: {
17563 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17564 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17565 SimplifyDemandedLowBitsHelper(1, 5))
17566 return SDValue(N, 0);
17567
17568 break;
17569 }
17570 case RISCVISD::CLZW:
17571 case RISCVISD::CTZW: {
17572 // Only the lower 32 bits of the first operand are read
17573 if (SimplifyDemandedLowBitsHelper(0, 32))
17574 return SDValue(N, 0);
17575 break;
17576 }
17578 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17579 // conversion is unnecessary and can be replaced with the
17580 // FMV_X_ANYEXTW_RV64 operand.
17581 SDValue Op0 = N->getOperand(0);
17583 return Op0.getOperand(0);
17584 break;
17585 }
17588 SDLoc DL(N);
17589 SDValue Op0 = N->getOperand(0);
17590 MVT VT = N->getSimpleValueType(0);
17591
17592 // Constant fold.
17593 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17594 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17595 return DAG.getConstant(Val, DL, VT);
17596 }
17597
17598 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17599 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17600 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17601 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17602 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17603 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17604 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17605 assert(Op0.getOperand(0).getValueType() == VT &&
17606 "Unexpected value type!");
17607 return Op0.getOperand(0);
17608 }
17609
17610 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17611 cast<LoadSDNode>(Op0)->isSimple()) {
17613 auto *LN0 = cast<LoadSDNode>(Op0);
17614 SDValue Load =
17615 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17616 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17617 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17618 return Load;
17619 }
17620
17621 // This is a target-specific version of a DAGCombine performed in
17622 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17623 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17624 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17625 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17626 !Op0.getNode()->hasOneUse())
17627 break;
17628 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17629 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17630 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17631 if (Op0.getOpcode() == ISD::FNEG)
17632 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17633 DAG.getConstant(SignBit, DL, VT));
17634
17635 assert(Op0.getOpcode() == ISD::FABS);
17636 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17637 DAG.getConstant(~SignBit, DL, VT));
17638 }
17639 case ISD::ABS: {
17640 EVT VT = N->getValueType(0);
17641 SDValue N0 = N->getOperand(0);
17642 // abs (sext) -> zext (abs)
17643 // abs (zext) -> zext (handled elsewhere)
17644 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17645 SDValue Src = N0.getOperand(0);
17646 SDLoc DL(N);
17647 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17648 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17649 }
17650 break;
17651 }
17652 case ISD::ADD: {
17653 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17654 return V;
17655 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17656 return V;
17657 return performADDCombine(N, DCI, Subtarget);
17658 }
17659 case ISD::SUB: {
17660 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17661 return V;
17662 return performSUBCombine(N, DAG, Subtarget);
17663 }
17664 case ISD::AND:
17665 return performANDCombine(N, DCI, Subtarget);
17666 case ISD::OR: {
17667 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17668 return V;
17669 return performORCombine(N, DCI, Subtarget);
17670 }
17671 case ISD::XOR:
17672 return performXORCombine(N, DAG, Subtarget);
17673 case ISD::MUL:
17674 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17675 return V;
17676 return performMULCombine(N, DAG, DCI, Subtarget);
17677 case ISD::SDIV:
17678 case ISD::UDIV:
17679 case ISD::SREM:
17680 case ISD::UREM:
17681 if (SDValue V = combineBinOpOfZExt(N, DAG))
17682 return V;
17683 break;
17684 case ISD::FMUL: {
17685 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17686 SDValue N0 = N->getOperand(0);
17687 SDValue N1 = N->getOperand(1);
17688 if (N0->getOpcode() != ISD::FCOPYSIGN)
17689 std::swap(N0, N1);
17690 if (N0->getOpcode() != ISD::FCOPYSIGN)
17691 return SDValue();
17692 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17693 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17694 return SDValue();
17695 EVT VT = N->getValueType(0);
17696 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17697 return SDValue();
17698 SDValue Sign = N0->getOperand(1);
17699 if (Sign.getValueType() != VT)
17700 return SDValue();
17701 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17702 }
17703 case ISD::FADD:
17704 case ISD::UMAX:
17705 case ISD::UMIN:
17706 case ISD::SMAX:
17707 case ISD::SMIN:
17708 case ISD::FMAXNUM:
17709 case ISD::FMINNUM: {
17710 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17711 return V;
17712 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17713 return V;
17714 return SDValue();
17715 }
17716 case ISD::SETCC:
17717 return performSETCCCombine(N, DAG, Subtarget);
17719 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17720 case ISD::ZERO_EXTEND:
17721 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17722 // type legalization. This is safe because fp_to_uint produces poison if
17723 // it overflows.
17724 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17725 SDValue Src = N->getOperand(0);
17726 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17727 isTypeLegal(Src.getOperand(0).getValueType()))
17728 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17729 Src.getOperand(0));
17730 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17731 isTypeLegal(Src.getOperand(1).getValueType())) {
17732 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17733 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17734 Src.getOperand(0), Src.getOperand(1));
17735 DCI.CombineTo(N, Res);
17736 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17737 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17738 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17739 }
17740 }
17741 return SDValue();
17743 if (SDValue V = combineTruncOfSraSext(N, DAG))
17744 return V;
17745 return combineTruncToVnclip(N, DAG, Subtarget);
17746 case ISD::TRUNCATE:
17747 return performTRUNCATECombine(N, DAG, Subtarget);
17748 case ISD::SELECT:
17749 return performSELECTCombine(N, DAG, Subtarget);
17751 case RISCVISD::CZERO_NEZ: {
17752 SDValue Val = N->getOperand(0);
17753 SDValue Cond = N->getOperand(1);
17754
17755 unsigned Opc = N->getOpcode();
17756
17757 // czero_eqz x, x -> x
17758 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17759 return Val;
17760
17761 unsigned InvOpc =
17763
17764 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17765 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17766 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17767 SDValue NewCond = Cond.getOperand(0);
17768 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17769 if (DAG.MaskedValueIsZero(NewCond, Mask))
17770 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17771 }
17772 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17773 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17774 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17775 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17776 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17777 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17778 if (ISD::isIntEqualitySetCC(CCVal))
17779 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17780 N->getValueType(0), Val, Cond.getOperand(0));
17781 }
17782 return SDValue();
17783 }
17784 case RISCVISD::SELECT_CC: {
17785 // Transform
17786 SDValue LHS = N->getOperand(0);
17787 SDValue RHS = N->getOperand(1);
17788 SDValue CC = N->getOperand(2);
17789 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17790 SDValue TrueV = N->getOperand(3);
17791 SDValue FalseV = N->getOperand(4);
17792 SDLoc DL(N);
17793 EVT VT = N->getValueType(0);
17794
17795 // If the True and False values are the same, we don't need a select_cc.
17796 if (TrueV == FalseV)
17797 return TrueV;
17798
17799 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17800 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17801 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17802 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17803 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17804 if (CCVal == ISD::CondCode::SETGE)
17805 std::swap(TrueV, FalseV);
17806
17807 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17808 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17809 // Only handle simm12, if it is not in this range, it can be considered as
17810 // register.
17811 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17812 isInt<12>(TrueSImm - FalseSImm)) {
17813 SDValue SRA =
17814 DAG.getNode(ISD::SRA, DL, VT, LHS,
17815 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17816 SDValue AND =
17817 DAG.getNode(ISD::AND, DL, VT, SRA,
17818 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17819 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17820 }
17821
17822 if (CCVal == ISD::CondCode::SETGE)
17823 std::swap(TrueV, FalseV);
17824 }
17825
17826 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17827 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17828 {LHS, RHS, CC, TrueV, FalseV});
17829
17830 if (!Subtarget.hasConditionalMoveFusion()) {
17831 // (select c, -1, y) -> -c | y
17832 if (isAllOnesConstant(TrueV)) {
17833 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17834 SDValue Neg = DAG.getNegative(C, DL, VT);
17835 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17836 }
17837 // (select c, y, -1) -> -!c | y
17838 if (isAllOnesConstant(FalseV)) {
17839 SDValue C =
17840 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17841 SDValue Neg = DAG.getNegative(C, DL, VT);
17842 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17843 }
17844
17845 // (select c, 0, y) -> -!c & y
17846 if (isNullConstant(TrueV)) {
17847 SDValue C =
17848 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17849 SDValue Neg = DAG.getNegative(C, DL, VT);
17850 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17851 }
17852 // (select c, y, 0) -> -c & y
17853 if (isNullConstant(FalseV)) {
17854 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17855 SDValue Neg = DAG.getNegative(C, DL, VT);
17856 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17857 }
17858 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17859 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17860 if (((isOneConstant(FalseV) && LHS == TrueV &&
17861 CCVal == ISD::CondCode::SETNE) ||
17862 (isOneConstant(TrueV) && LHS == FalseV &&
17863 CCVal == ISD::CondCode::SETEQ)) &&
17865 // freeze it to be safe.
17866 LHS = DAG.getFreeze(LHS);
17868 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17869 }
17870 }
17871
17872 // If both true/false are an xor with 1, pull through the select.
17873 // This can occur after op legalization if both operands are setccs that
17874 // require an xor to invert.
17875 // FIXME: Generalize to other binary ops with identical operand?
17876 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17877 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17878 isOneConstant(TrueV.getOperand(1)) &&
17879 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17880 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17881 TrueV.getOperand(0), FalseV.getOperand(0));
17882 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17883 }
17884
17885 return SDValue();
17886 }
17887 case RISCVISD::BR_CC: {
17888 SDValue LHS = N->getOperand(1);
17889 SDValue RHS = N->getOperand(2);
17890 SDValue CC = N->getOperand(3);
17891 SDLoc DL(N);
17892
17893 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17894 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17895 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17896
17897 return SDValue();
17898 }
17899 case ISD::BITREVERSE:
17900 return performBITREVERSECombine(N, DAG, Subtarget);
17901 case ISD::FP_TO_SINT:
17902 case ISD::FP_TO_UINT:
17903 return performFP_TO_INTCombine(N, DCI, Subtarget);
17906 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17907 case ISD::FCOPYSIGN: {
17908 EVT VT = N->getValueType(0);
17909 if (!VT.isVector())
17910 break;
17911 // There is a form of VFSGNJ which injects the negated sign of its second
17912 // operand. Try and bubble any FNEG up after the extend/round to produce
17913 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17914 // TRUNC=1.
17915 SDValue In2 = N->getOperand(1);
17916 // Avoid cases where the extend/round has multiple uses, as duplicating
17917 // those is typically more expensive than removing a fneg.
17918 if (!In2.hasOneUse())
17919 break;
17920 if (In2.getOpcode() != ISD::FP_EXTEND &&
17921 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17922 break;
17923 In2 = In2.getOperand(0);
17924 if (In2.getOpcode() != ISD::FNEG)
17925 break;
17926 SDLoc DL(N);
17927 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17928 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17929 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17930 }
17931 case ISD::MGATHER: {
17932 const auto *MGN = cast<MaskedGatherSDNode>(N);
17933 const EVT VT = N->getValueType(0);
17934 SDValue Index = MGN->getIndex();
17935 SDValue ScaleOp = MGN->getScale();
17936 ISD::MemIndexType IndexType = MGN->getIndexType();
17937 assert(!MGN->isIndexScaled() &&
17938 "Scaled gather/scatter should not be formed");
17939
17940 SDLoc DL(N);
17941 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17942 return DAG.getMaskedGather(
17943 N->getVTList(), MGN->getMemoryVT(), DL,
17944 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17945 MGN->getBasePtr(), Index, ScaleOp},
17946 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17947
17948 if (narrowIndex(Index, IndexType, DAG))
17949 return DAG.getMaskedGather(
17950 N->getVTList(), MGN->getMemoryVT(), DL,
17951 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17952 MGN->getBasePtr(), Index, ScaleOp},
17953 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17954
17955 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17956 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17957 // The sequence will be XLenVT, not the type of Index. Tell
17958 // isSimpleVIDSequence this so we avoid overflow.
17959 if (std::optional<VIDSequence> SimpleVID =
17960 isSimpleVIDSequence(Index, Subtarget.getXLen());
17961 SimpleVID && SimpleVID->StepDenominator == 1) {
17962 const int64_t StepNumerator = SimpleVID->StepNumerator;
17963 const int64_t Addend = SimpleVID->Addend;
17964
17965 // Note: We don't need to check alignment here since (by assumption
17966 // from the existance of the gather), our offsets must be sufficiently
17967 // aligned.
17968
17969 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17970 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17971 assert(IndexType == ISD::UNSIGNED_SCALED);
17972 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17973 DAG.getSignedConstant(Addend, DL, PtrVT));
17974
17975 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17977 SDValue StridedLoad = DAG.getStridedLoadVP(
17978 VT, DL, MGN->getChain(), BasePtr,
17979 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
17980 EVL, MGN->getMemOperand());
17981 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17982 StridedLoad, MGN->getPassThru(), EVL);
17983 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17984 DL);
17985 }
17986 }
17987
17988 SmallVector<int> ShuffleMask;
17989 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17990 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17991 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17992 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17993 MGN->getMask(), DAG.getUNDEF(VT),
17994 MGN->getMemoryVT(), MGN->getMemOperand(),
17996 SDValue Shuffle =
17997 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17998 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17999 }
18000
18001 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18002 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18003 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18004 SmallVector<SDValue> NewIndices;
18005 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18006 NewIndices.push_back(Index.getOperand(i));
18007 EVT IndexVT = Index.getValueType()
18008 .getHalfNumVectorElementsVT(*DAG.getContext());
18009 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18010
18011 unsigned ElementSize = VT.getScalarStoreSize();
18012 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18013 auto EltCnt = VT.getVectorElementCount();
18014 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18015 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18016 EltCnt.divideCoefficientBy(2));
18017 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18018 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18019 EltCnt.divideCoefficientBy(2));
18020 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18021
18022 SDValue Gather =
18023 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18024 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18025 Index, ScaleOp},
18026 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18027 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18028 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18029 }
18030 break;
18031 }
18032 case ISD::MSCATTER:{
18033 const auto *MSN = cast<MaskedScatterSDNode>(N);
18034 SDValue Index = MSN->getIndex();
18035 SDValue ScaleOp = MSN->getScale();
18036 ISD::MemIndexType IndexType = MSN->getIndexType();
18037 assert(!MSN->isIndexScaled() &&
18038 "Scaled gather/scatter should not be formed");
18039
18040 SDLoc DL(N);
18041 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18042 return DAG.getMaskedScatter(
18043 N->getVTList(), MSN->getMemoryVT(), DL,
18044 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18045 Index, ScaleOp},
18046 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18047
18048 if (narrowIndex(Index, IndexType, DAG))
18049 return DAG.getMaskedScatter(
18050 N->getVTList(), MSN->getMemoryVT(), DL,
18051 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18052 Index, ScaleOp},
18053 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18054
18055 EVT VT = MSN->getValue()->getValueType(0);
18056 SmallVector<int> ShuffleMask;
18057 if (!MSN->isTruncatingStore() &&
18058 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18059 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18060 DAG.getUNDEF(VT), ShuffleMask);
18061 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18062 DAG.getUNDEF(XLenVT), MSN->getMask(),
18063 MSN->getMemoryVT(), MSN->getMemOperand(),
18064 ISD::UNINDEXED, false);
18065 }
18066 break;
18067 }
18068 case ISD::VP_GATHER: {
18069 const auto *VPGN = cast<VPGatherSDNode>(N);
18070 SDValue Index = VPGN->getIndex();
18071 SDValue ScaleOp = VPGN->getScale();
18072 ISD::MemIndexType IndexType = VPGN->getIndexType();
18073 assert(!VPGN->isIndexScaled() &&
18074 "Scaled gather/scatter should not be formed");
18075
18076 SDLoc DL(N);
18077 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18078 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18079 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18080 ScaleOp, VPGN->getMask(),
18081 VPGN->getVectorLength()},
18082 VPGN->getMemOperand(), IndexType);
18083
18084 if (narrowIndex(Index, IndexType, DAG))
18085 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18086 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18087 ScaleOp, VPGN->getMask(),
18088 VPGN->getVectorLength()},
18089 VPGN->getMemOperand(), IndexType);
18090
18091 break;
18092 }
18093 case ISD::VP_SCATTER: {
18094 const auto *VPSN = cast<VPScatterSDNode>(N);
18095 SDValue Index = VPSN->getIndex();
18096 SDValue ScaleOp = VPSN->getScale();
18097 ISD::MemIndexType IndexType = VPSN->getIndexType();
18098 assert(!VPSN->isIndexScaled() &&
18099 "Scaled gather/scatter should not be formed");
18100
18101 SDLoc DL(N);
18102 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18103 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18104 {VPSN->getChain(), VPSN->getValue(),
18105 VPSN->getBasePtr(), Index, ScaleOp,
18106 VPSN->getMask(), VPSN->getVectorLength()},
18107 VPSN->getMemOperand(), IndexType);
18108
18109 if (narrowIndex(Index, IndexType, DAG))
18110 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18111 {VPSN->getChain(), VPSN->getValue(),
18112 VPSN->getBasePtr(), Index, ScaleOp,
18113 VPSN->getMask(), VPSN->getVectorLength()},
18114 VPSN->getMemOperand(), IndexType);
18115 break;
18116 }
18117 case RISCVISD::SHL_VL:
18118 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18119 return V;
18120 [[fallthrough]];
18121 case RISCVISD::SRA_VL:
18122 case RISCVISD::SRL_VL: {
18123 SDValue ShAmt = N->getOperand(1);
18125 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18126 SDLoc DL(N);
18127 SDValue VL = N->getOperand(4);
18128 EVT VT = N->getValueType(0);
18129 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18130 ShAmt.getOperand(1), VL);
18131 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18132 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18133 }
18134 break;
18135 }
18136 case ISD::SRA:
18137 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18138 return V;
18139 [[fallthrough]];
18140 case ISD::SRL:
18141 case ISD::SHL: {
18142 if (N->getOpcode() == ISD::SHL) {
18143 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18144 return V;
18145 }
18146 SDValue ShAmt = N->getOperand(1);
18148 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18149 SDLoc DL(N);
18150 EVT VT = N->getValueType(0);
18151 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18152 ShAmt.getOperand(1),
18153 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18154 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18155 }
18156 break;
18157 }
18158 case RISCVISD::ADD_VL:
18159 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18160 return V;
18161 return combineToVWMACC(N, DAG, Subtarget);
18166 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18167 case RISCVISD::SUB_VL:
18168 case RISCVISD::MUL_VL:
18169 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18178 return performVFMADD_VLCombine(N, DCI, Subtarget);
18179 case RISCVISD::FADD_VL:
18180 case RISCVISD::FSUB_VL:
18181 case RISCVISD::FMUL_VL:
18184 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18185 case ISD::LOAD:
18186 case ISD::STORE: {
18187 if (DCI.isAfterLegalizeDAG())
18188 if (SDValue V = performMemPairCombine(N, DCI))
18189 return V;
18190
18191 if (N->getOpcode() != ISD::STORE)
18192 break;
18193
18194 auto *Store = cast<StoreSDNode>(N);
18195 SDValue Chain = Store->getChain();
18196 EVT MemVT = Store->getMemoryVT();
18197 SDValue Val = Store->getValue();
18198 SDLoc DL(N);
18199
18200 bool IsScalarizable =
18201 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18202 Store->isSimple() &&
18203 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18204 isPowerOf2_64(MemVT.getSizeInBits()) &&
18205 MemVT.getSizeInBits() <= Subtarget.getXLen();
18206
18207 // If sufficiently aligned we can scalarize stores of constant vectors of
18208 // any power-of-two size up to XLen bits, provided that they aren't too
18209 // expensive to materialize.
18210 // vsetivli zero, 2, e8, m1, ta, ma
18211 // vmv.v.i v8, 4
18212 // vse64.v v8, (a0)
18213 // ->
18214 // li a1, 1028
18215 // sh a1, 0(a0)
18216 if (DCI.isBeforeLegalize() && IsScalarizable &&
18218 // Get the constant vector bits
18219 APInt NewC(Val.getValueSizeInBits(), 0);
18220 uint64_t EltSize = Val.getScalarValueSizeInBits();
18221 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18222 if (Val.getOperand(i).isUndef())
18223 continue;
18224 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18225 i * EltSize);
18226 }
18227 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18228
18229 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18230 true) <= 2 &&
18232 NewVT, *Store->getMemOperand())) {
18233 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18234 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18235 Store->getPointerInfo(), Store->getOriginalAlign(),
18236 Store->getMemOperand()->getFlags());
18237 }
18238 }
18239
18240 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18241 // vsetivli zero, 2, e16, m1, ta, ma
18242 // vle16.v v8, (a0)
18243 // vse16.v v8, (a1)
18244 if (auto *L = dyn_cast<LoadSDNode>(Val);
18245 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18246 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18247 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18248 L->getMemoryVT() == MemVT) {
18249 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18251 NewVT, *Store->getMemOperand()) &&
18253 NewVT, *L->getMemOperand())) {
18254 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18255 L->getPointerInfo(), L->getOriginalAlign(),
18256 L->getMemOperand()->getFlags());
18257 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18258 Store->getPointerInfo(), Store->getOriginalAlign(),
18259 Store->getMemOperand()->getFlags());
18260 }
18261 }
18262
18263 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18264 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18265 // any illegal types.
18266 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18267 (DCI.isAfterLegalizeDAG() &&
18269 isNullConstant(Val.getOperand(1)))) {
18270 SDValue Src = Val.getOperand(0);
18271 MVT VecVT = Src.getSimpleValueType();
18272 // VecVT should be scalable and memory VT should match the element type.
18273 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18274 MemVT == VecVT.getVectorElementType()) {
18275 SDLoc DL(N);
18276 MVT MaskVT = getMaskTypeFor(VecVT);
18277 return DAG.getStoreVP(
18278 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18279 DAG.getConstant(1, DL, MaskVT),
18280 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18281 Store->getMemOperand(), Store->getAddressingMode(),
18282 Store->isTruncatingStore(), /*IsCompress*/ false);
18283 }
18284 }
18285
18286 break;
18287 }
18288 case ISD::SPLAT_VECTOR: {
18289 EVT VT = N->getValueType(0);
18290 // Only perform this combine on legal MVT types.
18291 if (!isTypeLegal(VT))
18292 break;
18293 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18294 DAG, Subtarget))
18295 return Gather;
18296 break;
18297 }
18298 case ISD::BUILD_VECTOR:
18299 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18300 return V;
18301 break;
18303 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18304 return V;
18305 break;
18307 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18308 return V;
18309 break;
18310 case RISCVISD::VFMV_V_F_VL: {
18311 const MVT VT = N->getSimpleValueType(0);
18312 SDValue Passthru = N->getOperand(0);
18313 SDValue Scalar = N->getOperand(1);
18314 SDValue VL = N->getOperand(2);
18315
18316 // If VL is 1, we can use vfmv.s.f.
18317 if (isOneConstant(VL))
18318 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18319 break;
18320 }
18321 case RISCVISD::VMV_V_X_VL: {
18322 const MVT VT = N->getSimpleValueType(0);
18323 SDValue Passthru = N->getOperand(0);
18324 SDValue Scalar = N->getOperand(1);
18325 SDValue VL = N->getOperand(2);
18326
18327 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18328 // scalar input.
18329 unsigned ScalarSize = Scalar.getValueSizeInBits();
18330 unsigned EltWidth = VT.getScalarSizeInBits();
18331 if (ScalarSize > EltWidth && Passthru.isUndef())
18332 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18333 return SDValue(N, 0);
18334
18335 // If VL is 1 and the scalar value won't benefit from immediate, we can
18336 // use vmv.s.x.
18337 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18338 if (isOneConstant(VL) &&
18339 (!Const || Const->isZero() ||
18340 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18341 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18342
18343 break;
18344 }
18345 case RISCVISD::VFMV_S_F_VL: {
18346 SDValue Src = N->getOperand(1);
18347 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18348 // into an undef vector.
18349 // TODO: Could use a vslide or vmv.v.v for non-undef.
18350 if (N->getOperand(0).isUndef() &&
18351 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18352 isNullConstant(Src.getOperand(1)) &&
18353 Src.getOperand(0).getValueType().isScalableVector()) {
18354 EVT VT = N->getValueType(0);
18355 EVT SrcVT = Src.getOperand(0).getValueType();
18357 // Widths match, just return the original vector.
18358 if (SrcVT == VT)
18359 return Src.getOperand(0);
18360 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18361 }
18362 [[fallthrough]];
18363 }
18364 case RISCVISD::VMV_S_X_VL: {
18365 const MVT VT = N->getSimpleValueType(0);
18366 SDValue Passthru = N->getOperand(0);
18367 SDValue Scalar = N->getOperand(1);
18368 SDValue VL = N->getOperand(2);
18369
18370 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18371 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18372 return Scalar.getOperand(0);
18373
18374 // Use M1 or smaller to avoid over constraining register allocation
18375 const MVT M1VT = getLMUL1VT(VT);
18376 if (M1VT.bitsLT(VT)) {
18377 SDValue M1Passthru =
18378 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18379 DAG.getVectorIdxConstant(0, DL));
18380 SDValue Result =
18381 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18382 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18383 DAG.getVectorIdxConstant(0, DL));
18384 return Result;
18385 }
18386
18387 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18388 // higher would involve overly constraining the register allocator for
18389 // no purpose.
18390 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18391 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18392 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18393 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18394
18395 break;
18396 }
18397 case RISCVISD::VMV_X_S: {
18398 SDValue Vec = N->getOperand(0);
18399 MVT VecVT = N->getOperand(0).getSimpleValueType();
18400 const MVT M1VT = getLMUL1VT(VecVT);
18401 if (M1VT.bitsLT(VecVT)) {
18402 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18403 DAG.getVectorIdxConstant(0, DL));
18404 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18405 }
18406 break;
18407 }
18411 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18412 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18413 switch (IntNo) {
18414 // By default we do not combine any intrinsic.
18415 default:
18416 return SDValue();
18417 case Intrinsic::riscv_vcpop:
18418 case Intrinsic::riscv_vcpop_mask:
18419 case Intrinsic::riscv_vfirst:
18420 case Intrinsic::riscv_vfirst_mask: {
18421 SDValue VL = N->getOperand(2);
18422 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18423 IntNo == Intrinsic::riscv_vfirst_mask)
18424 VL = N->getOperand(3);
18425 if (!isNullConstant(VL))
18426 return SDValue();
18427 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18428 SDLoc DL(N);
18429 EVT VT = N->getValueType(0);
18430 if (IntNo == Intrinsic::riscv_vfirst ||
18431 IntNo == Intrinsic::riscv_vfirst_mask)
18432 return DAG.getAllOnesConstant(DL, VT);
18433 return DAG.getConstant(0, DL, VT);
18434 }
18435 }
18436 }
18437 case ISD::EXPERIMENTAL_VP_REVERSE:
18438 return performVP_REVERSECombine(N, DAG, Subtarget);
18439 case ISD::BITCAST: {
18441 SDValue N0 = N->getOperand(0);
18442 EVT VT = N->getValueType(0);
18443 EVT SrcVT = N0.getValueType();
18444 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18445 unsigned NF = VT.getRISCVVectorTupleNumFields();
18446 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18447 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18448 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18449
18450 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18451
18452 SDValue Result = DAG.getUNDEF(VT);
18453 for (unsigned i = 0; i < NF; ++i)
18454 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18455 DAG.getVectorIdxConstant(i, DL));
18456 return Result;
18457 }
18458 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18459 // type, widen both sides to avoid a trip through memory.
18460 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18461 VT.isScalarInteger()) {
18462 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18463 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18464 Ops[0] = N0;
18465 SDLoc DL(N);
18466 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18467 N0 = DAG.getBitcast(MVT::i8, N0);
18468 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18469 }
18470
18471 return SDValue();
18472 }
18473 case ISD::CTPOP:
18474 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18475 return V;
18476 break;
18477 }
18478
18479 return SDValue();
18480}
18481
18483 EVT XVT, unsigned KeptBits) const {
18484 // For vectors, we don't have a preference..
18485 if (XVT.isVector())
18486 return false;
18487
18488 if (XVT != MVT::i32 && XVT != MVT::i64)
18489 return false;
18490
18491 // We can use sext.w for RV64 or an srai 31 on RV32.
18492 if (KeptBits == 32 || KeptBits == 64)
18493 return true;
18494
18495 // With Zbb we can use sext.h/sext.b.
18496 return Subtarget.hasStdExtZbb() &&
18497 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18498 KeptBits == 16);
18499}
18500
18502 const SDNode *N, CombineLevel Level) const {
18503 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18504 N->getOpcode() == ISD::SRL) &&
18505 "Expected shift op");
18506
18507 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18508 // materialised in fewer instructions than `(OP _, c1)`:
18509 //
18510 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18511 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18512 SDValue N0 = N->getOperand(0);
18513 EVT Ty = N0.getValueType();
18514
18515 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18516 // LD/ST, it can still complete the folding optimization operation performed
18517 // above.
18518 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18519 for (SDNode *Use : X->users()) {
18520 // This use is the one we're on right now. Skip it
18521 if (Use == User || Use->getOpcode() == ISD::SELECT)
18522 continue;
18523 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18524 return false;
18525 }
18526 return true;
18527 };
18528
18529 if (Ty.isScalarInteger() &&
18530 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18531 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18532 return isUsedByLdSt(N0.getNode(), N);
18533
18534 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18535 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18536
18537 // Bail if we might break a sh{1,2,3}add pattern.
18538 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18539 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18540 N->user_begin()->getOpcode() == ISD::ADD &&
18541 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18542 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18543 return false;
18544
18545 if (C1 && C2) {
18546 const APInt &C1Int = C1->getAPIntValue();
18547 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18548
18549 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18550 // and the combine should happen, to potentially allow further combines
18551 // later.
18552 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18553 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18554 return true;
18555
18556 // We can materialise `c1` in an add immediate, so it's "free", and the
18557 // combine should be prevented.
18558 if (C1Int.getSignificantBits() <= 64 &&
18560 return false;
18561
18562 // Neither constant will fit into an immediate, so find materialisation
18563 // costs.
18564 int C1Cost =
18565 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18566 /*CompressionCost*/ true);
18567 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18568 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18569 /*CompressionCost*/ true);
18570
18571 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18572 // combine should be prevented.
18573 if (C1Cost < ShiftedC1Cost)
18574 return false;
18575 }
18576 }
18577
18578 if (!N0->hasOneUse())
18579 return false;
18580
18581 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18582 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18583 !N0->getOperand(0)->hasOneUse())
18584 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18585
18586 return true;
18587}
18588
18590 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18591 TargetLoweringOpt &TLO) const {
18592 // Delay this optimization as late as possible.
18593 if (!TLO.LegalOps)
18594 return false;
18595
18596 EVT VT = Op.getValueType();
18597 if (VT.isVector())
18598 return false;
18599
18600 unsigned Opcode = Op.getOpcode();
18601 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18602 return false;
18603
18604 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18605 if (!C)
18606 return false;
18607
18608 const APInt &Mask = C->getAPIntValue();
18609
18610 // Clear all non-demanded bits initially.
18611 APInt ShrunkMask = Mask & DemandedBits;
18612
18613 // Try to make a smaller immediate by setting undemanded bits.
18614
18615 APInt ExpandedMask = Mask | ~DemandedBits;
18616
18617 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18618 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18619 };
18620 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18621 if (NewMask == Mask)
18622 return true;
18623 SDLoc DL(Op);
18624 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18625 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18626 Op.getOperand(0), NewC);
18627 return TLO.CombineTo(Op, NewOp);
18628 };
18629
18630 // If the shrunk mask fits in sign extended 12 bits, let the target
18631 // independent code apply it.
18632 if (ShrunkMask.isSignedIntN(12))
18633 return false;
18634
18635 // And has a few special cases for zext.
18636 if (Opcode == ISD::AND) {
18637 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18638 // otherwise use SLLI + SRLI.
18639 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18640 if (IsLegalMask(NewMask))
18641 return UseMask(NewMask);
18642
18643 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18644 if (VT == MVT::i64) {
18645 APInt NewMask = APInt(64, 0xffffffff);
18646 if (IsLegalMask(NewMask))
18647 return UseMask(NewMask);
18648 }
18649 }
18650
18651 // For the remaining optimizations, we need to be able to make a negative
18652 // number through a combination of mask and undemanded bits.
18653 if (!ExpandedMask.isNegative())
18654 return false;
18655
18656 // What is the fewest number of bits we need to represent the negative number.
18657 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18658
18659 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18660 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18661 // If we can't create a simm12, we shouldn't change opaque constants.
18662 APInt NewMask = ShrunkMask;
18663 if (MinSignedBits <= 12)
18664 NewMask.setBitsFrom(11);
18665 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18666 NewMask.setBitsFrom(31);
18667 else
18668 return false;
18669
18670 // Check that our new mask is a subset of the demanded mask.
18671 assert(IsLegalMask(NewMask));
18672 return UseMask(NewMask);
18673}
18674
18675static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18676 static const uint64_t GREVMasks[] = {
18677 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18678 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18679
18680 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18681 unsigned Shift = 1 << Stage;
18682 if (ShAmt & Shift) {
18683 uint64_t Mask = GREVMasks[Stage];
18684 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18685 if (IsGORC)
18686 Res |= x;
18687 x = Res;
18688 }
18689 }
18690
18691 return x;
18692}
18693
18695 KnownBits &Known,
18696 const APInt &DemandedElts,
18697 const SelectionDAG &DAG,
18698 unsigned Depth) const {
18699 unsigned BitWidth = Known.getBitWidth();
18700 unsigned Opc = Op.getOpcode();
18701 assert((Opc >= ISD::BUILTIN_OP_END ||
18702 Opc == ISD::INTRINSIC_WO_CHAIN ||
18703 Opc == ISD::INTRINSIC_W_CHAIN ||
18704 Opc == ISD::INTRINSIC_VOID) &&
18705 "Should use MaskedValueIsZero if you don't know whether Op"
18706 " is a target node!");
18707
18708 Known.resetAll();
18709 switch (Opc) {
18710 default: break;
18711 case RISCVISD::SELECT_CC: {
18712 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18713 // If we don't know any bits, early out.
18714 if (Known.isUnknown())
18715 break;
18716 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18717
18718 // Only known if known in both the LHS and RHS.
18719 Known = Known.intersectWith(Known2);
18720 break;
18721 }
18724 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18725 // Result is either all zero or operand 0. We can propagate zeros, but not
18726 // ones.
18727 Known.One.clearAllBits();
18728 break;
18729 case RISCVISD::REMUW: {
18730 KnownBits Known2;
18731 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18732 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18733 // We only care about the lower 32 bits.
18734 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18735 // Restore the original width by sign extending.
18736 Known = Known.sext(BitWidth);
18737 break;
18738 }
18739 case RISCVISD::DIVUW: {
18740 KnownBits Known2;
18741 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18742 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18743 // We only care about the lower 32 bits.
18744 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18745 // Restore the original width by sign extending.
18746 Known = Known.sext(BitWidth);
18747 break;
18748 }
18749 case RISCVISD::SLLW: {
18750 KnownBits Known2;
18751 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18752 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18753 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18754 // Restore the original width by sign extending.
18755 Known = Known.sext(BitWidth);
18756 break;
18757 }
18758 case RISCVISD::CTZW: {
18759 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18760 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18761 unsigned LowBits = llvm::bit_width(PossibleTZ);
18762 Known.Zero.setBitsFrom(LowBits);
18763 break;
18764 }
18765 case RISCVISD::CLZW: {
18766 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18767 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18768 unsigned LowBits = llvm::bit_width(PossibleLZ);
18769 Known.Zero.setBitsFrom(LowBits);
18770 break;
18771 }
18772 case RISCVISD::BREV8:
18773 case RISCVISD::ORC_B: {
18774 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18775 // control value of 7 is equivalent to brev8 and orc.b.
18776 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18777 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18778 // To compute zeros, we need to invert the value and invert it back after.
18779 Known.Zero =
18780 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18781 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18782 break;
18783 }
18784 case RISCVISD::READ_VLENB: {
18785 // We can use the minimum and maximum VLEN values to bound VLENB. We
18786 // know VLEN must be a power of two.
18787 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18788 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18789 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18790 Known.Zero.setLowBits(Log2_32(MinVLenB));
18791 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18792 if (MaxVLenB == MinVLenB)
18793 Known.One.setBit(Log2_32(MinVLenB));
18794 break;
18795 }
18796 case RISCVISD::FCLASS: {
18797 // fclass will only set one of the low 10 bits.
18798 Known.Zero.setBitsFrom(10);
18799 break;
18800 }
18803 unsigned IntNo =
18804 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18805 switch (IntNo) {
18806 default:
18807 // We can't do anything for most intrinsics.
18808 break;
18809 case Intrinsic::riscv_vsetvli:
18810 case Intrinsic::riscv_vsetvlimax: {
18811 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18812 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18813 RISCVII::VLMUL VLMUL =
18814 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18815 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18816 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18817 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18818 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18819
18820 // Result of vsetvli must be not larger than AVL.
18821 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18822 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18823
18824 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18825 if (BitWidth > KnownZeroFirstBit)
18826 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18827 break;
18828 }
18829 }
18830 break;
18831 }
18832 }
18833}
18834
18836 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18837 unsigned Depth) const {
18838 switch (Op.getOpcode()) {
18839 default:
18840 break;
18841 case RISCVISD::SELECT_CC: {
18842 unsigned Tmp =
18843 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18844 if (Tmp == 1) return 1; // Early out.
18845 unsigned Tmp2 =
18846 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18847 return std::min(Tmp, Tmp2);
18848 }
18851 // Output is either all zero or operand 0. We can propagate sign bit count
18852 // from operand 0.
18853 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18854 case RISCVISD::ABSW: {
18855 // We expand this at isel to negw+max. The result will have 33 sign bits
18856 // if the input has at least 33 sign bits.
18857 unsigned Tmp =
18858 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18859 if (Tmp < 33) return 1;
18860 return 33;
18861 }
18862 case RISCVISD::SLLW:
18863 case RISCVISD::SRAW:
18864 case RISCVISD::SRLW:
18865 case RISCVISD::DIVW:
18866 case RISCVISD::DIVUW:
18867 case RISCVISD::REMUW:
18868 case RISCVISD::ROLW:
18869 case RISCVISD::RORW:
18874 // TODO: As the result is sign-extended, this is conservatively correct. A
18875 // more precise answer could be calculated for SRAW depending on known
18876 // bits in the shift amount.
18877 return 33;
18878 case RISCVISD::VMV_X_S: {
18879 // The number of sign bits of the scalar result is computed by obtaining the
18880 // element type of the input vector operand, subtracting its width from the
18881 // XLEN, and then adding one (sign bit within the element type). If the
18882 // element type is wider than XLen, the least-significant XLEN bits are
18883 // taken.
18884 unsigned XLen = Subtarget.getXLen();
18885 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
18886 if (EltBits <= XLen)
18887 return XLen - EltBits + 1;
18888 break;
18889 }
18891 unsigned IntNo = Op.getConstantOperandVal(1);
18892 switch (IntNo) {
18893 default:
18894 break;
18895 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18896 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18897 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18898 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18899 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18900 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18901 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18902 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18903 case Intrinsic::riscv_masked_cmpxchg_i64:
18904 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18905 // narrow atomic operation. These are implemented using atomic
18906 // operations at the minimum supported atomicrmw/cmpxchg width whose
18907 // result is then sign extended to XLEN. With +A, the minimum width is
18908 // 32 for both 64 and 32.
18909 assert(Subtarget.getXLen() == 64);
18911 assert(Subtarget.hasStdExtA());
18912 return 33;
18913 }
18914 break;
18915 }
18916 }
18917
18918 return 1;
18919}
18920
18922 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18923 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18924
18925 // TODO: Add more target nodes.
18926 switch (Op.getOpcode()) {
18928 // Integer select_cc cannot create poison.
18929 // TODO: What are the FP poison semantics?
18930 // TODO: This instruction blocks poison from the unselected operand, can
18931 // we do anything with that?
18932 return !Op.getValueType().isInteger();
18933 }
18935 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18936}
18937
18938const Constant *
18940 assert(Ld && "Unexpected null LoadSDNode");
18941 if (!ISD::isNormalLoad(Ld))
18942 return nullptr;
18943
18944 SDValue Ptr = Ld->getBasePtr();
18945
18946 // Only constant pools with no offset are supported.
18947 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18948 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18949 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18950 CNode->getOffset() != 0)
18951 return nullptr;
18952
18953 return CNode;
18954 };
18955
18956 // Simple case, LLA.
18957 if (Ptr.getOpcode() == RISCVISD::LLA) {
18958 auto *CNode = GetSupportedConstantPool(Ptr);
18959 if (!CNode || CNode->getTargetFlags() != 0)
18960 return nullptr;
18961
18962 return CNode->getConstVal();
18963 }
18964
18965 // Look for a HI and ADD_LO pair.
18966 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18967 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18968 return nullptr;
18969
18970 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18971 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18972
18973 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18974 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18975 return nullptr;
18976
18977 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18978 return nullptr;
18979
18980 return CNodeLo->getConstVal();
18981}
18982
18984 MachineBasicBlock *BB) {
18985 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18986
18987 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18988 // Should the count have wrapped while it was being read, we need to try
18989 // again.
18990 // For example:
18991 // ```
18992 // read:
18993 // csrrs x3, counterh # load high word of counter
18994 // csrrs x2, counter # load low word of counter
18995 // csrrs x4, counterh # load high word of counter
18996 // bne x3, x4, read # check if high word reads match, otherwise try again
18997 // ```
18998
18999 MachineFunction &MF = *BB->getParent();
19000 const BasicBlock *LLVMBB = BB->getBasicBlock();
19002
19003 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19004 MF.insert(It, LoopMBB);
19005
19006 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19007 MF.insert(It, DoneMBB);
19008
19009 // Transfer the remainder of BB and its successor edges to DoneMBB.
19010 DoneMBB->splice(DoneMBB->begin(), BB,
19011 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19013
19014 BB->addSuccessor(LoopMBB);
19015
19017 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19018 Register LoReg = MI.getOperand(0).getReg();
19019 Register HiReg = MI.getOperand(1).getReg();
19020 int64_t LoCounter = MI.getOperand(2).getImm();
19021 int64_t HiCounter = MI.getOperand(3).getImm();
19022 DebugLoc DL = MI.getDebugLoc();
19023
19025 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19026 .addImm(HiCounter)
19027 .addReg(RISCV::X0);
19028 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19029 .addImm(LoCounter)
19030 .addReg(RISCV::X0);
19031 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19032 .addImm(HiCounter)
19033 .addReg(RISCV::X0);
19034
19035 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19036 .addReg(HiReg)
19037 .addReg(ReadAgainReg)
19038 .addMBB(LoopMBB);
19039
19040 LoopMBB->addSuccessor(LoopMBB);
19041 LoopMBB->addSuccessor(DoneMBB);
19042
19043 MI.eraseFromParent();
19044
19045 return DoneMBB;
19046}
19047
19050 const RISCVSubtarget &Subtarget) {
19051 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19052
19053 MachineFunction &MF = *BB->getParent();
19054 DebugLoc DL = MI.getDebugLoc();
19057 Register LoReg = MI.getOperand(0).getReg();
19058 Register HiReg = MI.getOperand(1).getReg();
19059 Register SrcReg = MI.getOperand(2).getReg();
19060
19061 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19062 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19063
19064 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19065 RI, Register());
19067 MachineMemOperand *MMOLo =
19071 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19072 .addFrameIndex(FI)
19073 .addImm(0)
19074 .addMemOperand(MMOLo);
19075 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19076 .addFrameIndex(FI)
19077 .addImm(4)
19078 .addMemOperand(MMOHi);
19079 MI.eraseFromParent(); // The pseudo instruction is gone now.
19080 return BB;
19081}
19082
19085 const RISCVSubtarget &Subtarget) {
19086 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19087 "Unexpected instruction");
19088
19089 MachineFunction &MF = *BB->getParent();
19090 DebugLoc DL = MI.getDebugLoc();
19093 Register DstReg = MI.getOperand(0).getReg();
19094 Register LoReg = MI.getOperand(1).getReg();
19095 Register HiReg = MI.getOperand(2).getReg();
19096
19097 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19098 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19099
19101 MachineMemOperand *MMOLo =
19105 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19106 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19107 .addFrameIndex(FI)
19108 .addImm(0)
19109 .addMemOperand(MMOLo);
19110 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19111 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19112 .addFrameIndex(FI)
19113 .addImm(4)
19114 .addMemOperand(MMOHi);
19115 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19116 MI.eraseFromParent(); // The pseudo instruction is gone now.
19117 return BB;
19118}
19119
19121 switch (MI.getOpcode()) {
19122 default:
19123 return false;
19124 case RISCV::Select_GPR_Using_CC_GPR:
19125 case RISCV::Select_GPR_Using_CC_Imm:
19126 case RISCV::Select_FPR16_Using_CC_GPR:
19127 case RISCV::Select_FPR16INX_Using_CC_GPR:
19128 case RISCV::Select_FPR32_Using_CC_GPR:
19129 case RISCV::Select_FPR32INX_Using_CC_GPR:
19130 case RISCV::Select_FPR64_Using_CC_GPR:
19131 case RISCV::Select_FPR64INX_Using_CC_GPR:
19132 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19133 return true;
19134 }
19135}
19136
19138 unsigned RelOpcode, unsigned EqOpcode,
19139 const RISCVSubtarget &Subtarget) {
19140 DebugLoc DL = MI.getDebugLoc();
19141 Register DstReg = MI.getOperand(0).getReg();
19142 Register Src1Reg = MI.getOperand(1).getReg();
19143 Register Src2Reg = MI.getOperand(2).getReg();
19145 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19147
19148 // Save the current FFLAGS.
19149 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19150
19151 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19152 .addReg(Src1Reg)
19153 .addReg(Src2Reg);
19156
19157 // Restore the FFLAGS.
19158 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19159 .addReg(SavedFFlags, RegState::Kill);
19160
19161 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19162 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19163 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19164 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19167
19168 // Erase the pseudoinstruction.
19169 MI.eraseFromParent();
19170 return BB;
19171}
19172
19173static MachineBasicBlock *
19175 MachineBasicBlock *ThisMBB,
19176 const RISCVSubtarget &Subtarget) {
19177 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19178 // Without this, custom-inserter would have generated:
19179 //
19180 // A
19181 // | \
19182 // | B
19183 // | /
19184 // C
19185 // | \
19186 // | D
19187 // | /
19188 // E
19189 //
19190 // A: X = ...; Y = ...
19191 // B: empty
19192 // C: Z = PHI [X, A], [Y, B]
19193 // D: empty
19194 // E: PHI [X, C], [Z, D]
19195 //
19196 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19197 //
19198 // A
19199 // | \
19200 // | C
19201 // | /|
19202 // |/ |
19203 // | |
19204 // | D
19205 // | /
19206 // E
19207 //
19208 // A: X = ...; Y = ...
19209 // D: empty
19210 // E: PHI [X, A], [X, C], [Y, D]
19211
19212 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19213 const DebugLoc &DL = First.getDebugLoc();
19214 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19215 MachineFunction *F = ThisMBB->getParent();
19216 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19217 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19218 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19219 MachineFunction::iterator It = ++ThisMBB->getIterator();
19220 F->insert(It, FirstMBB);
19221 F->insert(It, SecondMBB);
19222 F->insert(It, SinkMBB);
19223
19224 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19225 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19227 ThisMBB->end());
19228 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19229
19230 // Fallthrough block for ThisMBB.
19231 ThisMBB->addSuccessor(FirstMBB);
19232 // Fallthrough block for FirstMBB.
19233 FirstMBB->addSuccessor(SecondMBB);
19234 ThisMBB->addSuccessor(SinkMBB);
19235 FirstMBB->addSuccessor(SinkMBB);
19236 // This is fallthrough.
19237 SecondMBB->addSuccessor(SinkMBB);
19238
19239 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19240 Register FLHS = First.getOperand(1).getReg();
19241 Register FRHS = First.getOperand(2).getReg();
19242 // Insert appropriate branch.
19243 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19244 .addReg(FLHS)
19245 .addReg(FRHS)
19246 .addMBB(SinkMBB);
19247
19248 Register SLHS = Second.getOperand(1).getReg();
19249 Register SRHS = Second.getOperand(2).getReg();
19250 Register Op1Reg4 = First.getOperand(4).getReg();
19251 Register Op1Reg5 = First.getOperand(5).getReg();
19252
19253 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19254 // Insert appropriate branch.
19255 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19256 .addReg(SLHS)
19257 .addReg(SRHS)
19258 .addMBB(SinkMBB);
19259
19260 Register DestReg = Second.getOperand(0).getReg();
19261 Register Op2Reg4 = Second.getOperand(4).getReg();
19262 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19263 .addReg(Op2Reg4)
19264 .addMBB(ThisMBB)
19265 .addReg(Op1Reg4)
19266 .addMBB(FirstMBB)
19267 .addReg(Op1Reg5)
19268 .addMBB(SecondMBB);
19269
19270 // Now remove the Select_FPRX_s.
19271 First.eraseFromParent();
19272 Second.eraseFromParent();
19273 return SinkMBB;
19274}
19275
19278 const RISCVSubtarget &Subtarget) {
19279 // To "insert" Select_* instructions, we actually have to insert the triangle
19280 // control-flow pattern. The incoming instructions know the destination vreg
19281 // to set, the condition code register to branch on, the true/false values to
19282 // select between, and the condcode to use to select the appropriate branch.
19283 //
19284 // We produce the following control flow:
19285 // HeadMBB
19286 // | \
19287 // | IfFalseMBB
19288 // | /
19289 // TailMBB
19290 //
19291 // When we find a sequence of selects we attempt to optimize their emission
19292 // by sharing the control flow. Currently we only handle cases where we have
19293 // multiple selects with the exact same condition (same LHS, RHS and CC).
19294 // The selects may be interleaved with other instructions if the other
19295 // instructions meet some requirements we deem safe:
19296 // - They are not pseudo instructions.
19297 // - They are debug instructions. Otherwise,
19298 // - They do not have side-effects, do not access memory and their inputs do
19299 // not depend on the results of the select pseudo-instructions.
19300 // The TrueV/FalseV operands of the selects cannot depend on the result of
19301 // previous selects in the sequence.
19302 // These conditions could be further relaxed. See the X86 target for a
19303 // related approach and more information.
19304 //
19305 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19306 // is checked here and handled by a separate function -
19307 // EmitLoweredCascadedSelect.
19308
19309 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19310 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19311 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19312 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19313 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19314 Next->getOperand(5).isKill())
19315 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19316
19317 Register LHS = MI.getOperand(1).getReg();
19318 Register RHS;
19319 if (MI.getOperand(2).isReg())
19320 RHS = MI.getOperand(2).getReg();
19321 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19322
19323 SmallVector<MachineInstr *, 4> SelectDebugValues;
19324 SmallSet<Register, 4> SelectDests;
19325 SelectDests.insert(MI.getOperand(0).getReg());
19326
19327 MachineInstr *LastSelectPseudo = &MI;
19328 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19329 SequenceMBBI != E; ++SequenceMBBI) {
19330 if (SequenceMBBI->isDebugInstr())
19331 continue;
19332 if (isSelectPseudo(*SequenceMBBI)) {
19333 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19334 !SequenceMBBI->getOperand(2).isReg() ||
19335 SequenceMBBI->getOperand(2).getReg() != RHS ||
19336 SequenceMBBI->getOperand(3).getImm() != CC ||
19337 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19338 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19339 break;
19340 LastSelectPseudo = &*SequenceMBBI;
19341 SequenceMBBI->collectDebugValues(SelectDebugValues);
19342 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19343 continue;
19344 }
19345 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19346 SequenceMBBI->mayLoadOrStore() ||
19347 SequenceMBBI->usesCustomInsertionHook())
19348 break;
19349 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19350 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19351 }))
19352 break;
19353 }
19354
19355 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19356 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19357 DebugLoc DL = MI.getDebugLoc();
19359
19360 MachineBasicBlock *HeadMBB = BB;
19361 MachineFunction *F = BB->getParent();
19362 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19363 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19364
19365 F->insert(I, IfFalseMBB);
19366 F->insert(I, TailMBB);
19367
19368 // Set the call frame size on entry to the new basic blocks.
19369 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19370 IfFalseMBB->setCallFrameSize(CallFrameSize);
19371 TailMBB->setCallFrameSize(CallFrameSize);
19372
19373 // Transfer debug instructions associated with the selects to TailMBB.
19374 for (MachineInstr *DebugInstr : SelectDebugValues) {
19375 TailMBB->push_back(DebugInstr->removeFromParent());
19376 }
19377
19378 // Move all instructions after the sequence to TailMBB.
19379 TailMBB->splice(TailMBB->end(), HeadMBB,
19380 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19381 // Update machine-CFG edges by transferring all successors of the current
19382 // block to the new block which will contain the Phi nodes for the selects.
19383 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19384 // Set the successors for HeadMBB.
19385 HeadMBB->addSuccessor(IfFalseMBB);
19386 HeadMBB->addSuccessor(TailMBB);
19387
19388 // Insert appropriate branch.
19389 if (MI.getOperand(2).isImm())
19390 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19391 .addReg(LHS)
19392 .addImm(MI.getOperand(2).getImm())
19393 .addMBB(TailMBB);
19394 else
19395 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19396 .addReg(LHS)
19397 .addReg(RHS)
19398 .addMBB(TailMBB);
19399
19400 // IfFalseMBB just falls through to TailMBB.
19401 IfFalseMBB->addSuccessor(TailMBB);
19402
19403 // Create PHIs for all of the select pseudo-instructions.
19404 auto SelectMBBI = MI.getIterator();
19405 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19406 auto InsertionPoint = TailMBB->begin();
19407 while (SelectMBBI != SelectEnd) {
19408 auto Next = std::next(SelectMBBI);
19409 if (isSelectPseudo(*SelectMBBI)) {
19410 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19411 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19412 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19413 .addReg(SelectMBBI->getOperand(4).getReg())
19414 .addMBB(HeadMBB)
19415 .addReg(SelectMBBI->getOperand(5).getReg())
19416 .addMBB(IfFalseMBB);
19417 SelectMBBI->eraseFromParent();
19418 }
19419 SelectMBBI = Next;
19420 }
19421
19422 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19423 return TailMBB;
19424}
19425
19426// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19427static const RISCV::RISCVMaskedPseudoInfo *
19428lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19430 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19431 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19433 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19434 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19435 return Masked;
19436}
19437
19440 unsigned CVTXOpc) {
19441 DebugLoc DL = MI.getDebugLoc();
19442
19444
19446 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19447
19448 // Save the old value of FFLAGS.
19449 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19450
19451 assert(MI.getNumOperands() == 7);
19452
19453 // Emit a VFCVT_X_F
19454 const TargetRegisterInfo *TRI =
19456 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19457 Register Tmp = MRI.createVirtualRegister(RC);
19458 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19459 .add(MI.getOperand(1))
19460 .add(MI.getOperand(2))
19461 .add(MI.getOperand(3))
19462 .add(MachineOperand::CreateImm(7)) // frm = DYN
19463 .add(MI.getOperand(4))
19464 .add(MI.getOperand(5))
19465 .add(MI.getOperand(6))
19466 .add(MachineOperand::CreateReg(RISCV::FRM,
19467 /*IsDef*/ false,
19468 /*IsImp*/ true));
19469
19470 // Emit a VFCVT_F_X
19471 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19472 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19473 // There is no E8 variant for VFCVT_F_X.
19474 assert(Log2SEW >= 4);
19475 unsigned CVTFOpc =
19476 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19477 ->MaskedPseudo;
19478
19479 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19480 .add(MI.getOperand(0))
19481 .add(MI.getOperand(1))
19482 .addReg(Tmp)
19483 .add(MI.getOperand(3))
19484 .add(MachineOperand::CreateImm(7)) // frm = DYN
19485 .add(MI.getOperand(4))
19486 .add(MI.getOperand(5))
19487 .add(MI.getOperand(6))
19488 .add(MachineOperand::CreateReg(RISCV::FRM,
19489 /*IsDef*/ false,
19490 /*IsImp*/ true));
19491
19492 // Restore FFLAGS.
19493 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19494 .addReg(SavedFFLAGS, RegState::Kill);
19495
19496 // Erase the pseudoinstruction.
19497 MI.eraseFromParent();
19498 return BB;
19499}
19500
19502 const RISCVSubtarget &Subtarget) {
19503 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19504 const TargetRegisterClass *RC;
19505 switch (MI.getOpcode()) {
19506 default:
19507 llvm_unreachable("Unexpected opcode");
19508 case RISCV::PseudoFROUND_H:
19509 CmpOpc = RISCV::FLT_H;
19510 F2IOpc = RISCV::FCVT_W_H;
19511 I2FOpc = RISCV::FCVT_H_W;
19512 FSGNJOpc = RISCV::FSGNJ_H;
19513 FSGNJXOpc = RISCV::FSGNJX_H;
19514 RC = &RISCV::FPR16RegClass;
19515 break;
19516 case RISCV::PseudoFROUND_H_INX:
19517 CmpOpc = RISCV::FLT_H_INX;
19518 F2IOpc = RISCV::FCVT_W_H_INX;
19519 I2FOpc = RISCV::FCVT_H_W_INX;
19520 FSGNJOpc = RISCV::FSGNJ_H_INX;
19521 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19522 RC = &RISCV::GPRF16RegClass;
19523 break;
19524 case RISCV::PseudoFROUND_S:
19525 CmpOpc = RISCV::FLT_S;
19526 F2IOpc = RISCV::FCVT_W_S;
19527 I2FOpc = RISCV::FCVT_S_W;
19528 FSGNJOpc = RISCV::FSGNJ_S;
19529 FSGNJXOpc = RISCV::FSGNJX_S;
19530 RC = &RISCV::FPR32RegClass;
19531 break;
19532 case RISCV::PseudoFROUND_S_INX:
19533 CmpOpc = RISCV::FLT_S_INX;
19534 F2IOpc = RISCV::FCVT_W_S_INX;
19535 I2FOpc = RISCV::FCVT_S_W_INX;
19536 FSGNJOpc = RISCV::FSGNJ_S_INX;
19537 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19538 RC = &RISCV::GPRF32RegClass;
19539 break;
19540 case RISCV::PseudoFROUND_D:
19541 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19542 CmpOpc = RISCV::FLT_D;
19543 F2IOpc = RISCV::FCVT_L_D;
19544 I2FOpc = RISCV::FCVT_D_L;
19545 FSGNJOpc = RISCV::FSGNJ_D;
19546 FSGNJXOpc = RISCV::FSGNJX_D;
19547 RC = &RISCV::FPR64RegClass;
19548 break;
19549 case RISCV::PseudoFROUND_D_INX:
19550 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19551 CmpOpc = RISCV::FLT_D_INX;
19552 F2IOpc = RISCV::FCVT_L_D_INX;
19553 I2FOpc = RISCV::FCVT_D_L_INX;
19554 FSGNJOpc = RISCV::FSGNJ_D_INX;
19555 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19556 RC = &RISCV::GPRRegClass;
19557 break;
19558 }
19559
19560 const BasicBlock *BB = MBB->getBasicBlock();
19561 DebugLoc DL = MI.getDebugLoc();
19563
19565 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19566 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19567
19568 F->insert(I, CvtMBB);
19569 F->insert(I, DoneMBB);
19570 // Move all instructions after the sequence to DoneMBB.
19571 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19572 MBB->end());
19573 // Update machine-CFG edges by transferring all successors of the current
19574 // block to the new block which will contain the Phi nodes for the selects.
19576 // Set the successors for MBB.
19577 MBB->addSuccessor(CvtMBB);
19578 MBB->addSuccessor(DoneMBB);
19579
19580 Register DstReg = MI.getOperand(0).getReg();
19581 Register SrcReg = MI.getOperand(1).getReg();
19582 Register MaxReg = MI.getOperand(2).getReg();
19583 int64_t FRM = MI.getOperand(3).getImm();
19584
19585 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19587
19588 Register FabsReg = MRI.createVirtualRegister(RC);
19589 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19590
19591 // Compare the FP value to the max value.
19592 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19593 auto MIB =
19594 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19597
19598 // Insert branch.
19599 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19600 .addReg(CmpReg)
19601 .addReg(RISCV::X0)
19602 .addMBB(DoneMBB);
19603
19604 CvtMBB->addSuccessor(DoneMBB);
19605
19606 // Convert to integer.
19607 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19608 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19611
19612 // Convert back to FP.
19613 Register I2FReg = MRI.createVirtualRegister(RC);
19614 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19617
19618 // Restore the sign bit.
19619 Register CvtReg = MRI.createVirtualRegister(RC);
19620 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19621
19622 // Merge the results.
19623 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19624 .addReg(SrcReg)
19625 .addMBB(MBB)
19626 .addReg(CvtReg)
19627 .addMBB(CvtMBB);
19628
19629 MI.eraseFromParent();
19630 return DoneMBB;
19631}
19632
19635 MachineBasicBlock *BB) const {
19636 switch (MI.getOpcode()) {
19637 default:
19638 llvm_unreachable("Unexpected instr type to insert");
19639 case RISCV::ReadCounterWide:
19640 assert(!Subtarget.is64Bit() &&
19641 "ReadCounterWide is only to be used on riscv32");
19642 return emitReadCounterWidePseudo(MI, BB);
19643 case RISCV::Select_GPR_Using_CC_GPR:
19644 case RISCV::Select_GPR_Using_CC_Imm:
19645 case RISCV::Select_FPR16_Using_CC_GPR:
19646 case RISCV::Select_FPR16INX_Using_CC_GPR:
19647 case RISCV::Select_FPR32_Using_CC_GPR:
19648 case RISCV::Select_FPR32INX_Using_CC_GPR:
19649 case RISCV::Select_FPR64_Using_CC_GPR:
19650 case RISCV::Select_FPR64INX_Using_CC_GPR:
19651 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19652 return emitSelectPseudo(MI, BB, Subtarget);
19653 case RISCV::BuildPairF64Pseudo:
19654 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19655 case RISCV::SplitF64Pseudo:
19656 return emitSplitF64Pseudo(MI, BB, Subtarget);
19657 case RISCV::PseudoQuietFLE_H:
19658 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19659 case RISCV::PseudoQuietFLE_H_INX:
19660 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19661 case RISCV::PseudoQuietFLT_H:
19662 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19663 case RISCV::PseudoQuietFLT_H_INX:
19664 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19665 case RISCV::PseudoQuietFLE_S:
19666 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19667 case RISCV::PseudoQuietFLE_S_INX:
19668 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19669 case RISCV::PseudoQuietFLT_S:
19670 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19671 case RISCV::PseudoQuietFLT_S_INX:
19672 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19673 case RISCV::PseudoQuietFLE_D:
19674 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19675 case RISCV::PseudoQuietFLE_D_INX:
19676 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19677 case RISCV::PseudoQuietFLE_D_IN32X:
19678 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19679 Subtarget);
19680 case RISCV::PseudoQuietFLT_D:
19681 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19682 case RISCV::PseudoQuietFLT_D_INX:
19683 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19684 case RISCV::PseudoQuietFLT_D_IN32X:
19685 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19686 Subtarget);
19687
19688 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19689 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19690 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19691 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19692 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19693 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19694 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19695 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19696 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19697 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19698 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19699 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19700 case RISCV::PseudoFROUND_H:
19701 case RISCV::PseudoFROUND_H_INX:
19702 case RISCV::PseudoFROUND_S:
19703 case RISCV::PseudoFROUND_S_INX:
19704 case RISCV::PseudoFROUND_D:
19705 case RISCV::PseudoFROUND_D_INX:
19706 case RISCV::PseudoFROUND_D_IN32X:
19707 return emitFROUND(MI, BB, Subtarget);
19708 case TargetOpcode::STATEPOINT:
19709 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19710 // while jal call instruction (where statepoint will be lowered at the end)
19711 // has implicit def. This def is early-clobber as it will be set at
19712 // the moment of the call and earlier than any use is read.
19713 // Add this implicit dead def here as a workaround.
19714 MI.addOperand(*MI.getMF(),
19716 RISCV::X1, /*isDef*/ true,
19717 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19718 /*isUndef*/ false, /*isEarlyClobber*/ true));
19719 [[fallthrough]];
19720 case TargetOpcode::STACKMAP:
19721 case TargetOpcode::PATCHPOINT:
19722 if (!Subtarget.is64Bit())
19723 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19724 "supported on 64-bit targets");
19725 return emitPatchPoint(MI, BB);
19726 }
19727}
19728
19730 SDNode *Node) const {
19731 // Add FRM dependency to any instructions with dynamic rounding mode.
19732 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19733 if (Idx < 0) {
19734 // Vector pseudos have FRM index indicated by TSFlags.
19735 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19736 if (Idx < 0)
19737 return;
19738 }
19739 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19740 return;
19741 // If the instruction already reads FRM, don't add another read.
19742 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19743 return;
19744 MI.addOperand(
19745 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19746}
19747
19748void RISCVTargetLowering::analyzeInputArgs(
19749 MachineFunction &MF, CCState &CCInfo,
19750 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19751 RISCVCCAssignFn Fn) const {
19752 unsigned NumArgs = Ins.size();
19754
19755 for (unsigned i = 0; i != NumArgs; ++i) {
19756 MVT ArgVT = Ins[i].VT;
19757 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19758
19759 Type *ArgTy = nullptr;
19760 if (IsRet)
19761 ArgTy = FType->getReturnType();
19762 else if (Ins[i].isOrigArg())
19763 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19764
19765 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19766 /*IsFixed=*/true, IsRet, ArgTy)) {
19767 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19768 << ArgVT << '\n');
19769 llvm_unreachable(nullptr);
19770 }
19771 }
19772}
19773
19774void RISCVTargetLowering::analyzeOutputArgs(
19775 MachineFunction &MF, CCState &CCInfo,
19776 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19777 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19778 unsigned NumArgs = Outs.size();
19779
19780 for (unsigned i = 0; i != NumArgs; i++) {
19781 MVT ArgVT = Outs[i].VT;
19782 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19783 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19784
19785 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19786 Outs[i].IsFixed, IsRet, OrigTy)) {
19787 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19788 << ArgVT << "\n");
19789 llvm_unreachable(nullptr);
19790 }
19791 }
19792}
19793
19794// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19795// values.
19797 const CCValAssign &VA, const SDLoc &DL,
19798 const RISCVSubtarget &Subtarget) {
19799 if (VA.needsCustom()) {
19800 if (VA.getLocVT().isInteger() &&
19801 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19802 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19803 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19804 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19806 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19807 llvm_unreachable("Unexpected Custom handling.");
19808 }
19809
19810 switch (VA.getLocInfo()) {
19811 default:
19812 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19813 case CCValAssign::Full:
19814 break;
19815 case CCValAssign::BCvt:
19816 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19817 break;
19818 }
19819 return Val;
19820}
19821
19822// The caller is responsible for loading the full value if the argument is
19823// passed with CCValAssign::Indirect.
19825 const CCValAssign &VA, const SDLoc &DL,
19826 const ISD::InputArg &In,
19827 const RISCVTargetLowering &TLI) {
19830 EVT LocVT = VA.getLocVT();
19831 SDValue Val;
19832 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19833 Register VReg = RegInfo.createVirtualRegister(RC);
19834 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19835 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19836
19837 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19838 if (In.isOrigArg()) {
19839 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19840 if (OrigArg->getType()->isIntegerTy()) {
19841 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19842 // An input zero extended from i31 can also be considered sign extended.
19843 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19844 (BitWidth < 32 && In.Flags.isZExt())) {
19846 RVFI->addSExt32Register(VReg);
19847 }
19848 }
19849 }
19850
19852 return Val;
19853
19854 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19855}
19856
19858 const CCValAssign &VA, const SDLoc &DL,
19859 const RISCVSubtarget &Subtarget) {
19860 EVT LocVT = VA.getLocVT();
19861
19862 if (VA.needsCustom()) {
19863 if (LocVT.isInteger() &&
19864 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19865 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19866 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19867 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19868 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19869 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19870 llvm_unreachable("Unexpected Custom handling.");
19871 }
19872
19873 switch (VA.getLocInfo()) {
19874 default:
19875 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19876 case CCValAssign::Full:
19877 break;
19878 case CCValAssign::BCvt:
19879 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19880 break;
19881 }
19882 return Val;
19883}
19884
19885// The caller is responsible for loading the full value if the argument is
19886// passed with CCValAssign::Indirect.
19888 const CCValAssign &VA, const SDLoc &DL) {
19890 MachineFrameInfo &MFI = MF.getFrameInfo();
19891 EVT LocVT = VA.getLocVT();
19892 EVT ValVT = VA.getValVT();
19894 if (VA.getLocInfo() == CCValAssign::Indirect) {
19895 // When the value is a scalable vector, we save the pointer which points to
19896 // the scalable vector value in the stack. The ValVT will be the pointer
19897 // type, instead of the scalable vector type.
19898 ValVT = LocVT;
19899 }
19900 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19901 /*IsImmutable=*/true);
19902 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19903 SDValue Val;
19904
19906 switch (VA.getLocInfo()) {
19907 default:
19908 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19909 case CCValAssign::Full:
19911 case CCValAssign::BCvt:
19912 break;
19913 }
19914 Val = DAG.getExtLoad(
19915 ExtType, DL, LocVT, Chain, FIN,
19917 return Val;
19918}
19919
19921 const CCValAssign &VA,
19922 const CCValAssign &HiVA,
19923 const SDLoc &DL) {
19924 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19925 "Unexpected VA");
19927 MachineFrameInfo &MFI = MF.getFrameInfo();
19929
19930 assert(VA.isRegLoc() && "Expected register VA assignment");
19931
19932 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19933 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19934 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19935 SDValue Hi;
19936 if (HiVA.isMemLoc()) {
19937 // Second half of f64 is passed on the stack.
19938 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19939 /*IsImmutable=*/true);
19940 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19941 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19943 } else {
19944 // Second half of f64 is passed in another GPR.
19945 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19946 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19947 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19948 }
19949 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19950}
19951
19952// Transform physical registers into virtual registers.
19954 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19955 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19956 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19957
19959
19960 switch (CallConv) {
19961 default:
19962 report_fatal_error("Unsupported calling convention");
19963 case CallingConv::C:
19964 case CallingConv::Fast:
19966 case CallingConv::GRAAL:
19968 break;
19969 case CallingConv::GHC:
19970 if (Subtarget.hasStdExtE())
19971 report_fatal_error("GHC calling convention is not supported on RVE!");
19972 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19973 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19974 "(Zdinx/D) instruction set extensions");
19975 }
19976
19977 const Function &Func = MF.getFunction();
19978 if (Func.hasFnAttribute("interrupt")) {
19979 if (!Func.arg_empty())
19981 "Functions with the interrupt attribute cannot have arguments!");
19982
19983 StringRef Kind =
19984 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19985
19986 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19988 "Function interrupt attribute argument not supported!");
19989 }
19990
19991 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19992 MVT XLenVT = Subtarget.getXLenVT();
19993 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19994 // Used with vargs to acumulate store chains.
19995 std::vector<SDValue> OutChains;
19996
19997 // Assign locations to all of the incoming arguments.
19999 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20000
20001 if (CallConv == CallingConv::GHC)
20003 else
20004 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20006 : CC_RISCV);
20007
20008 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20009 CCValAssign &VA = ArgLocs[i];
20010 SDValue ArgValue;
20011 // Passing f64 on RV32D with a soft float ABI must be handled as a special
20012 // case.
20013 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20014 assert(VA.needsCustom());
20015 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20016 } else if (VA.isRegLoc())
20017 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20018 else
20019 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20020
20021 if (VA.getLocInfo() == CCValAssign::Indirect) {
20022 // If the original argument was split and passed by reference (e.g. i128
20023 // on RV32), we need to load all parts of it here (using the same
20024 // address). Vectors may be partly split to registers and partly to the
20025 // stack, in which case the base address is partly offset and subsequent
20026 // stores are relative to that.
20027 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20029 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20030 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20031 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20032 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20033 CCValAssign &PartVA = ArgLocs[i + 1];
20034 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20035 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20036 if (PartVA.getValVT().isScalableVector())
20037 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20038 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20039 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20041 ++i;
20042 ++InsIdx;
20043 }
20044 continue;
20045 }
20046 InVals.push_back(ArgValue);
20047 }
20048
20049 if (any_of(ArgLocs,
20050 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20051 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20052
20053 if (IsVarArg) {
20054 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20055 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20056 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20057 MachineFrameInfo &MFI = MF.getFrameInfo();
20058 MachineRegisterInfo &RegInfo = MF.getRegInfo();
20060
20061 // Size of the vararg save area. For now, the varargs save area is either
20062 // zero or large enough to hold a0-a7.
20063 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20064 int FI;
20065
20066 // If all registers are allocated, then all varargs must be passed on the
20067 // stack and we don't need to save any argregs.
20068 if (VarArgsSaveSize == 0) {
20069 int VaArgOffset = CCInfo.getStackSize();
20070 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20071 } else {
20072 int VaArgOffset = -VarArgsSaveSize;
20073 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20074
20075 // If saving an odd number of registers then create an extra stack slot to
20076 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20077 // offsets to even-numbered registered remain 2*XLEN-aligned.
20078 if (Idx % 2) {
20080 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20081 VarArgsSaveSize += XLenInBytes;
20082 }
20083
20084 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20085
20086 // Copy the integer registers that may have been used for passing varargs
20087 // to the vararg save area.
20088 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20089 const Register Reg = RegInfo.createVirtualRegister(RC);
20090 RegInfo.addLiveIn(ArgRegs[I], Reg);
20091 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20092 SDValue Store = DAG.getStore(
20093 Chain, DL, ArgValue, FIN,
20094 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20095 OutChains.push_back(Store);
20096 FIN =
20097 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20098 }
20099 }
20100
20101 // Record the frame index of the first variable argument
20102 // which is a value necessary to VASTART.
20103 RVFI->setVarArgsFrameIndex(FI);
20104 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20105 }
20106
20107 // All stores are grouped in one node to allow the matching between
20108 // the size of Ins and InVals. This only happens for vararg functions.
20109 if (!OutChains.empty()) {
20110 OutChains.push_back(Chain);
20111 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20112 }
20113
20114 return Chain;
20115}
20116
20117/// isEligibleForTailCallOptimization - Check whether the call is eligible
20118/// for tail call optimization.
20119/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20120bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20121 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20122 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20123
20124 auto CalleeCC = CLI.CallConv;
20125 auto &Outs = CLI.Outs;
20126 auto &Caller = MF.getFunction();
20127 auto CallerCC = Caller.getCallingConv();
20128
20129 // Exception-handling functions need a special set of instructions to
20130 // indicate a return to the hardware. Tail-calling another function would
20131 // probably break this.
20132 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20133 // should be expanded as new function attributes are introduced.
20134 if (Caller.hasFnAttribute("interrupt"))
20135 return false;
20136
20137 // Do not tail call opt if the stack is used to pass parameters.
20138 if (CCInfo.getStackSize() != 0)
20139 return false;
20140
20141 // Do not tail call opt if any parameters need to be passed indirectly.
20142 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20143 // passed indirectly. So the address of the value will be passed in a
20144 // register, or if not available, then the address is put on the stack. In
20145 // order to pass indirectly, space on the stack often needs to be allocated
20146 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20147 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20148 // are passed CCValAssign::Indirect.
20149 for (auto &VA : ArgLocs)
20150 if (VA.getLocInfo() == CCValAssign::Indirect)
20151 return false;
20152
20153 // Do not tail call opt if either caller or callee uses struct return
20154 // semantics.
20155 auto IsCallerStructRet = Caller.hasStructRetAttr();
20156 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20157 if (IsCallerStructRet || IsCalleeStructRet)
20158 return false;
20159
20160 // The callee has to preserve all registers the caller needs to preserve.
20161 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20162 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20163 if (CalleeCC != CallerCC) {
20164 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20165 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20166 return false;
20167 }
20168
20169 // Byval parameters hand the function a pointer directly into the stack area
20170 // we want to reuse during a tail call. Working around this *is* possible
20171 // but less efficient and uglier in LowerCall.
20172 for (auto &Arg : Outs)
20173 if (Arg.Flags.isByVal())
20174 return false;
20175
20176 return true;
20177}
20178
20180 return DAG.getDataLayout().getPrefTypeAlign(
20181 VT.getTypeForEVT(*DAG.getContext()));
20182}
20183
20184// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20185// and output parameter nodes.
20187 SmallVectorImpl<SDValue> &InVals) const {
20188 SelectionDAG &DAG = CLI.DAG;
20189 SDLoc &DL = CLI.DL;
20191 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20193 SDValue Chain = CLI.Chain;
20194 SDValue Callee = CLI.Callee;
20195 bool &IsTailCall = CLI.IsTailCall;
20196 CallingConv::ID CallConv = CLI.CallConv;
20197 bool IsVarArg = CLI.IsVarArg;
20198 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20199 MVT XLenVT = Subtarget.getXLenVT();
20200
20202
20203 // Analyze the operands of the call, assigning locations to each operand.
20205 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20206
20207 if (CallConv == CallingConv::GHC) {
20208 if (Subtarget.hasStdExtE())
20209 report_fatal_error("GHC calling convention is not supported on RVE!");
20210 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20211 } else
20212 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20214 : CC_RISCV);
20215
20216 // Check if it's really possible to do a tail call.
20217 if (IsTailCall)
20218 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20219
20220 if (IsTailCall)
20221 ++NumTailCalls;
20222 else if (CLI.CB && CLI.CB->isMustTailCall())
20223 report_fatal_error("failed to perform tail call elimination on a call "
20224 "site marked musttail");
20225
20226 // Get a count of how many bytes are to be pushed on the stack.
20227 unsigned NumBytes = ArgCCInfo.getStackSize();
20228
20229 // Create local copies for byval args
20230 SmallVector<SDValue, 8> ByValArgs;
20231 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20232 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20233 if (!Flags.isByVal())
20234 continue;
20235
20236 SDValue Arg = OutVals[i];
20237 unsigned Size = Flags.getByValSize();
20238 Align Alignment = Flags.getNonZeroByValAlign();
20239
20240 int FI =
20241 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20242 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20243 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20244
20245 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20246 /*IsVolatile=*/false,
20247 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20249 ByValArgs.push_back(FIPtr);
20250 }
20251
20252 if (!IsTailCall)
20253 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20254
20255 // Copy argument values to their designated locations.
20257 SmallVector<SDValue, 8> MemOpChains;
20258 SDValue StackPtr;
20259 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20260 ++i, ++OutIdx) {
20261 CCValAssign &VA = ArgLocs[i];
20262 SDValue ArgValue = OutVals[OutIdx];
20263 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20264
20265 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20266 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20267 assert(VA.isRegLoc() && "Expected register VA assignment");
20268 assert(VA.needsCustom());
20269 SDValue SplitF64 = DAG.getNode(
20270 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20271 SDValue Lo = SplitF64.getValue(0);
20272 SDValue Hi = SplitF64.getValue(1);
20273
20274 Register RegLo = VA.getLocReg();
20275 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20276
20277 // Get the CCValAssign for the Hi part.
20278 CCValAssign &HiVA = ArgLocs[++i];
20279
20280 if (HiVA.isMemLoc()) {
20281 // Second half of f64 is passed on the stack.
20282 if (!StackPtr.getNode())
20283 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20285 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20286 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20287 // Emit the store.
20288 MemOpChains.push_back(DAG.getStore(
20289 Chain, DL, Hi, Address,
20291 } else {
20292 // Second half of f64 is passed in another GPR.
20293 Register RegHigh = HiVA.getLocReg();
20294 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20295 }
20296 continue;
20297 }
20298
20299 // Promote the value if needed.
20300 // For now, only handle fully promoted and indirect arguments.
20301 if (VA.getLocInfo() == CCValAssign::Indirect) {
20302 // Store the argument in a stack slot and pass its address.
20303 Align StackAlign =
20304 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20305 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20306 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20307 // If the original argument was split (e.g. i128), we need
20308 // to store the required parts of it here (and pass just one address).
20309 // Vectors may be partly split to registers and partly to the stack, in
20310 // which case the base address is partly offset and subsequent stores are
20311 // relative to that.
20312 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20313 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20314 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20315 // Calculate the total size to store. We don't have access to what we're
20316 // actually storing other than performing the loop and collecting the
20317 // info.
20319 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20320 SDValue PartValue = OutVals[OutIdx + 1];
20321 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20322 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20323 EVT PartVT = PartValue.getValueType();
20324 if (PartVT.isScalableVector())
20325 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20326 StoredSize += PartVT.getStoreSize();
20327 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20328 Parts.push_back(std::make_pair(PartValue, Offset));
20329 ++i;
20330 ++OutIdx;
20331 }
20332 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20333 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20334 MemOpChains.push_back(
20335 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20337 for (const auto &Part : Parts) {
20338 SDValue PartValue = Part.first;
20339 SDValue PartOffset = Part.second;
20341 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20342 MemOpChains.push_back(
20343 DAG.getStore(Chain, DL, PartValue, Address,
20345 }
20346 ArgValue = SpillSlot;
20347 } else {
20348 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20349 }
20350
20351 // Use local copy if it is a byval arg.
20352 if (Flags.isByVal())
20353 ArgValue = ByValArgs[j++];
20354
20355 if (VA.isRegLoc()) {
20356 // Queue up the argument copies and emit them at the end.
20357 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20358 } else {
20359 assert(VA.isMemLoc() && "Argument not register or memory");
20360 assert(!IsTailCall && "Tail call not allowed if stack is used "
20361 "for passing parameters");
20362
20363 // Work out the address of the stack slot.
20364 if (!StackPtr.getNode())
20365 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20367 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20369
20370 // Emit the store.
20371 MemOpChains.push_back(
20372 DAG.getStore(Chain, DL, ArgValue, Address,
20374 }
20375 }
20376
20377 // Join the stores, which are independent of one another.
20378 if (!MemOpChains.empty())
20379 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20380
20381 SDValue Glue;
20382
20383 // Build a sequence of copy-to-reg nodes, chained and glued together.
20384 for (auto &Reg : RegsToPass) {
20385 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20386 Glue = Chain.getValue(1);
20387 }
20388
20389 // Validate that none of the argument registers have been marked as
20390 // reserved, if so report an error. Do the same for the return address if this
20391 // is not a tailcall.
20392 validateCCReservedRegs(RegsToPass, MF);
20393 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20395 MF.getFunction(),
20396 "Return address register required, but has been reserved."});
20397
20398 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20399 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20400 // split it and then direct call can be matched by PseudoCALL.
20401 bool CalleeIsLargeExternalSymbol = false;
20403 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20404 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20405 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20406 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20407 CalleeIsLargeExternalSymbol = true;
20408 }
20409 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20410 const GlobalValue *GV = S->getGlobal();
20411 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20412 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20413 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20414 }
20415
20416 // The first call operand is the chain and the second is the target address.
20418 Ops.push_back(Chain);
20419 Ops.push_back(Callee);
20420
20421 // Add argument registers to the end of the list so that they are
20422 // known live into the call.
20423 for (auto &Reg : RegsToPass)
20424 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20425
20426 // Add a register mask operand representing the call-preserved registers.
20427 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20428 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20429 assert(Mask && "Missing call preserved mask for calling convention");
20430 Ops.push_back(DAG.getRegisterMask(Mask));
20431
20432 // Glue the call to the argument copies, if any.
20433 if (Glue.getNode())
20434 Ops.push_back(Glue);
20435
20436 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20437 "Unexpected CFI type for a direct call");
20438
20439 // Emit the call.
20440 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20441
20442 // Use software guarded branch for large code model non-indirect calls
20443 // Tail call to external symbol will have a null CLI.CB and we need another
20444 // way to determine the callsite type
20445 bool NeedSWGuarded = false;
20447 Subtarget.hasStdExtZicfilp() &&
20448 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20449 NeedSWGuarded = true;
20450
20451 if (IsTailCall) {
20453 unsigned CallOpc =
20454 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20455 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20456 if (CLI.CFIType)
20457 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20458 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20459 return Ret;
20460 }
20461
20462 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20463 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20464 if (CLI.CFIType)
20465 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20466 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20467 Glue = Chain.getValue(1);
20468
20469 // Mark the end of the call, which is glued to the call itself.
20470 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20471 Glue = Chain.getValue(1);
20472
20473 // Assign locations to each value returned by this call.
20475 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20476 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20477
20478 // Copy all of the result registers out of their specified physreg.
20479 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20480 auto &VA = RVLocs[i];
20481 // Copy the value out
20482 SDValue RetValue =
20483 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20484 // Glue the RetValue to the end of the call sequence
20485 Chain = RetValue.getValue(1);
20486 Glue = RetValue.getValue(2);
20487
20488 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20489 assert(VA.needsCustom());
20490 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20491 MVT::i32, Glue);
20492 Chain = RetValue2.getValue(1);
20493 Glue = RetValue2.getValue(2);
20494 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20495 RetValue2);
20496 } else
20497 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20498
20499 InVals.push_back(RetValue);
20500 }
20501
20502 return Chain;
20503}
20504
20506 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20507 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20509 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20510
20511 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20512 MVT VT = Outs[i].VT;
20513 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20514 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20515 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20516 return false;
20517 }
20518 return true;
20519}
20520
20521SDValue
20523 bool IsVarArg,
20525 const SmallVectorImpl<SDValue> &OutVals,
20526 const SDLoc &DL, SelectionDAG &DAG) const {
20528 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20529
20530 // Stores the assignment of the return value to a location.
20532
20533 // Info about the registers and stack slot.
20534 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20535 *DAG.getContext());
20536
20537 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20538 nullptr, CC_RISCV);
20539
20540 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20541 report_fatal_error("GHC functions return void only");
20542
20543 SDValue Glue;
20544 SmallVector<SDValue, 4> RetOps(1, Chain);
20545
20546 // Copy the result values into the output registers.
20547 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20548 SDValue Val = OutVals[OutIdx];
20549 CCValAssign &VA = RVLocs[i];
20550 assert(VA.isRegLoc() && "Can only return in registers!");
20551
20552 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20553 // Handle returning f64 on RV32D with a soft float ABI.
20554 assert(VA.isRegLoc() && "Expected return via registers");
20555 assert(VA.needsCustom());
20556 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20557 DAG.getVTList(MVT::i32, MVT::i32), Val);
20558 SDValue Lo = SplitF64.getValue(0);
20559 SDValue Hi = SplitF64.getValue(1);
20560 Register RegLo = VA.getLocReg();
20561 Register RegHi = RVLocs[++i].getLocReg();
20562
20563 if (STI.isRegisterReservedByUser(RegLo) ||
20564 STI.isRegisterReservedByUser(RegHi))
20566 MF.getFunction(),
20567 "Return value register required, but has been reserved."});
20568
20569 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20570 Glue = Chain.getValue(1);
20571 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20572 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20573 Glue = Chain.getValue(1);
20574 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20575 } else {
20576 // Handle a 'normal' return.
20577 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20578 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20579
20580 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20582 MF.getFunction(),
20583 "Return value register required, but has been reserved."});
20584
20585 // Guarantee that all emitted copies are stuck together.
20586 Glue = Chain.getValue(1);
20587 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20588 }
20589 }
20590
20591 RetOps[0] = Chain; // Update chain.
20592
20593 // Add the glue node if we have it.
20594 if (Glue.getNode()) {
20595 RetOps.push_back(Glue);
20596 }
20597
20598 if (any_of(RVLocs,
20599 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20600 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20601
20602 unsigned RetOpc = RISCVISD::RET_GLUE;
20603 // Interrupt service routines use different return instructions.
20604 const Function &Func = DAG.getMachineFunction().getFunction();
20605 if (Func.hasFnAttribute("interrupt")) {
20606 if (!Func.getReturnType()->isVoidTy())
20608 "Functions with the interrupt attribute must have void return type!");
20609
20611 StringRef Kind =
20612 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20613
20614 if (Kind == "supervisor")
20615 RetOpc = RISCVISD::SRET_GLUE;
20616 else
20617 RetOpc = RISCVISD::MRET_GLUE;
20618 }
20619
20620 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20621}
20622
20623void RISCVTargetLowering::validateCCReservedRegs(
20624 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20625 MachineFunction &MF) const {
20626 const Function &F = MF.getFunction();
20627 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20628
20629 if (llvm::any_of(Regs, [&STI](auto Reg) {
20630 return STI.isRegisterReservedByUser(Reg.first);
20631 }))
20632 F.getContext().diagnose(DiagnosticInfoUnsupported{
20633 F, "Argument register required, but has been reserved."});
20634}
20635
20636// Check if the result of the node is only used as a return value, as
20637// otherwise we can't perform a tail-call.
20639 if (N->getNumValues() != 1)
20640 return false;
20641 if (!N->hasNUsesOfValue(1, 0))
20642 return false;
20643
20644 SDNode *Copy = *N->user_begin();
20645
20646 if (Copy->getOpcode() == ISD::BITCAST) {
20647 return isUsedByReturnOnly(Copy, Chain);
20648 }
20649
20650 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20651 // with soft float ABIs.
20652 if (Copy->getOpcode() != ISD::CopyToReg) {
20653 return false;
20654 }
20655
20656 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20657 // isn't safe to perform a tail call.
20658 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20659 return false;
20660
20661 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20662 bool HasRet = false;
20663 for (SDNode *Node : Copy->users()) {
20664 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20665 return false;
20666 HasRet = true;
20667 }
20668 if (!HasRet)
20669 return false;
20670
20671 Chain = Copy->getOperand(0);
20672 return true;
20673}
20674
20676 return CI->isTailCall();
20677}
20678
20679const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20680#define NODE_NAME_CASE(NODE) \
20681 case RISCVISD::NODE: \
20682 return "RISCVISD::" #NODE;
20683 // clang-format off
20684 switch ((RISCVISD::NodeType)Opcode) {
20686 break;
20687 NODE_NAME_CASE(RET_GLUE)
20688 NODE_NAME_CASE(SRET_GLUE)
20689 NODE_NAME_CASE(MRET_GLUE)
20690 NODE_NAME_CASE(CALL)
20691 NODE_NAME_CASE(TAIL)
20692 NODE_NAME_CASE(SELECT_CC)
20693 NODE_NAME_CASE(BR_CC)
20694 NODE_NAME_CASE(BuildGPRPair)
20695 NODE_NAME_CASE(SplitGPRPair)
20696 NODE_NAME_CASE(BuildPairF64)
20697 NODE_NAME_CASE(SplitF64)
20698 NODE_NAME_CASE(ADD_LO)
20699 NODE_NAME_CASE(HI)
20700 NODE_NAME_CASE(LLA)
20701 NODE_NAME_CASE(ADD_TPREL)
20702 NODE_NAME_CASE(MULHSU)
20703 NODE_NAME_CASE(SHL_ADD)
20704 NODE_NAME_CASE(SLLW)
20705 NODE_NAME_CASE(SRAW)
20706 NODE_NAME_CASE(SRLW)
20707 NODE_NAME_CASE(DIVW)
20708 NODE_NAME_CASE(DIVUW)
20709 NODE_NAME_CASE(REMUW)
20710 NODE_NAME_CASE(ROLW)
20711 NODE_NAME_CASE(RORW)
20712 NODE_NAME_CASE(CLZW)
20713 NODE_NAME_CASE(CTZW)
20714 NODE_NAME_CASE(ABSW)
20715 NODE_NAME_CASE(FMV_H_X)
20716 NODE_NAME_CASE(FMV_X_ANYEXTH)
20717 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20718 NODE_NAME_CASE(FMV_W_X_RV64)
20719 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20720 NODE_NAME_CASE(FCVT_X)
20721 NODE_NAME_CASE(FCVT_XU)
20722 NODE_NAME_CASE(FCVT_W_RV64)
20723 NODE_NAME_CASE(FCVT_WU_RV64)
20724 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20725 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20726 NODE_NAME_CASE(FROUND)
20727 NODE_NAME_CASE(FCLASS)
20728 NODE_NAME_CASE(FSGNJX)
20729 NODE_NAME_CASE(FMAX)
20730 NODE_NAME_CASE(FMIN)
20731 NODE_NAME_CASE(FLI)
20732 NODE_NAME_CASE(READ_COUNTER_WIDE)
20733 NODE_NAME_CASE(BREV8)
20734 NODE_NAME_CASE(ORC_B)
20735 NODE_NAME_CASE(ZIP)
20736 NODE_NAME_CASE(UNZIP)
20737 NODE_NAME_CASE(CLMUL)
20738 NODE_NAME_CASE(CLMULH)
20739 NODE_NAME_CASE(CLMULR)
20740 NODE_NAME_CASE(MOPR)
20741 NODE_NAME_CASE(MOPRR)
20742 NODE_NAME_CASE(SHA256SIG0)
20743 NODE_NAME_CASE(SHA256SIG1)
20744 NODE_NAME_CASE(SHA256SUM0)
20745 NODE_NAME_CASE(SHA256SUM1)
20746 NODE_NAME_CASE(SM4KS)
20747 NODE_NAME_CASE(SM4ED)
20748 NODE_NAME_CASE(SM3P0)
20749 NODE_NAME_CASE(SM3P1)
20750 NODE_NAME_CASE(TH_LWD)
20751 NODE_NAME_CASE(TH_LWUD)
20752 NODE_NAME_CASE(TH_LDD)
20753 NODE_NAME_CASE(TH_SWD)
20754 NODE_NAME_CASE(TH_SDD)
20755 NODE_NAME_CASE(VMV_V_V_VL)
20756 NODE_NAME_CASE(VMV_V_X_VL)
20757 NODE_NAME_CASE(VFMV_V_F_VL)
20758 NODE_NAME_CASE(VMV_X_S)
20759 NODE_NAME_CASE(VMV_S_X_VL)
20760 NODE_NAME_CASE(VFMV_S_F_VL)
20761 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20762 NODE_NAME_CASE(READ_VLENB)
20763 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20764 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20765 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20766 NODE_NAME_CASE(VSLIDEUP_VL)
20767 NODE_NAME_CASE(VSLIDE1UP_VL)
20768 NODE_NAME_CASE(VSLIDEDOWN_VL)
20769 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20770 NODE_NAME_CASE(VFSLIDE1UP_VL)
20771 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20772 NODE_NAME_CASE(VID_VL)
20773 NODE_NAME_CASE(VFNCVT_ROD_VL)
20774 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20775 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20776 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20777 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20778 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20779 NODE_NAME_CASE(VECREDUCE_AND_VL)
20780 NODE_NAME_CASE(VECREDUCE_OR_VL)
20781 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20782 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20783 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20784 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20785 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20786 NODE_NAME_CASE(ADD_VL)
20787 NODE_NAME_CASE(AND_VL)
20788 NODE_NAME_CASE(MUL_VL)
20789 NODE_NAME_CASE(OR_VL)
20790 NODE_NAME_CASE(SDIV_VL)
20791 NODE_NAME_CASE(SHL_VL)
20792 NODE_NAME_CASE(SREM_VL)
20793 NODE_NAME_CASE(SRA_VL)
20794 NODE_NAME_CASE(SRL_VL)
20795 NODE_NAME_CASE(ROTL_VL)
20796 NODE_NAME_CASE(ROTR_VL)
20797 NODE_NAME_CASE(SUB_VL)
20798 NODE_NAME_CASE(UDIV_VL)
20799 NODE_NAME_CASE(UREM_VL)
20800 NODE_NAME_CASE(XOR_VL)
20801 NODE_NAME_CASE(AVGFLOORS_VL)
20802 NODE_NAME_CASE(AVGFLOORU_VL)
20803 NODE_NAME_CASE(AVGCEILS_VL)
20804 NODE_NAME_CASE(AVGCEILU_VL)
20805 NODE_NAME_CASE(SADDSAT_VL)
20806 NODE_NAME_CASE(UADDSAT_VL)
20807 NODE_NAME_CASE(SSUBSAT_VL)
20808 NODE_NAME_CASE(USUBSAT_VL)
20809 NODE_NAME_CASE(FADD_VL)
20810 NODE_NAME_CASE(FSUB_VL)
20811 NODE_NAME_CASE(FMUL_VL)
20812 NODE_NAME_CASE(FDIV_VL)
20813 NODE_NAME_CASE(FNEG_VL)
20814 NODE_NAME_CASE(FABS_VL)
20815 NODE_NAME_CASE(FSQRT_VL)
20816 NODE_NAME_CASE(FCLASS_VL)
20817 NODE_NAME_CASE(VFMADD_VL)
20818 NODE_NAME_CASE(VFNMADD_VL)
20819 NODE_NAME_CASE(VFMSUB_VL)
20820 NODE_NAME_CASE(VFNMSUB_VL)
20821 NODE_NAME_CASE(VFWMADD_VL)
20822 NODE_NAME_CASE(VFWNMADD_VL)
20823 NODE_NAME_CASE(VFWMSUB_VL)
20824 NODE_NAME_CASE(VFWNMSUB_VL)
20825 NODE_NAME_CASE(FCOPYSIGN_VL)
20826 NODE_NAME_CASE(SMIN_VL)
20827 NODE_NAME_CASE(SMAX_VL)
20828 NODE_NAME_CASE(UMIN_VL)
20829 NODE_NAME_CASE(UMAX_VL)
20830 NODE_NAME_CASE(BITREVERSE_VL)
20831 NODE_NAME_CASE(BSWAP_VL)
20832 NODE_NAME_CASE(CTLZ_VL)
20833 NODE_NAME_CASE(CTTZ_VL)
20834 NODE_NAME_CASE(CTPOP_VL)
20835 NODE_NAME_CASE(VFMIN_VL)
20836 NODE_NAME_CASE(VFMAX_VL)
20837 NODE_NAME_CASE(MULHS_VL)
20838 NODE_NAME_CASE(MULHU_VL)
20839 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20840 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20841 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20842 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20843 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20844 NODE_NAME_CASE(SINT_TO_FP_VL)
20845 NODE_NAME_CASE(UINT_TO_FP_VL)
20846 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20847 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20848 NODE_NAME_CASE(FP_EXTEND_VL)
20849 NODE_NAME_CASE(FP_ROUND_VL)
20850 NODE_NAME_CASE(STRICT_FADD_VL)
20851 NODE_NAME_CASE(STRICT_FSUB_VL)
20852 NODE_NAME_CASE(STRICT_FMUL_VL)
20853 NODE_NAME_CASE(STRICT_FDIV_VL)
20854 NODE_NAME_CASE(STRICT_FSQRT_VL)
20855 NODE_NAME_CASE(STRICT_VFMADD_VL)
20856 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20857 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20858 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20859 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20860 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20861 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20862 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20863 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20864 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20865 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20866 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20867 NODE_NAME_CASE(STRICT_FSETCC_VL)
20868 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20869 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20870 NODE_NAME_CASE(VWMUL_VL)
20871 NODE_NAME_CASE(VWMULU_VL)
20872 NODE_NAME_CASE(VWMULSU_VL)
20873 NODE_NAME_CASE(VWADD_VL)
20874 NODE_NAME_CASE(VWADDU_VL)
20875 NODE_NAME_CASE(VWSUB_VL)
20876 NODE_NAME_CASE(VWSUBU_VL)
20877 NODE_NAME_CASE(VWADD_W_VL)
20878 NODE_NAME_CASE(VWADDU_W_VL)
20879 NODE_NAME_CASE(VWSUB_W_VL)
20880 NODE_NAME_CASE(VWSUBU_W_VL)
20881 NODE_NAME_CASE(VWSLL_VL)
20882 NODE_NAME_CASE(VFWMUL_VL)
20883 NODE_NAME_CASE(VFWADD_VL)
20884 NODE_NAME_CASE(VFWSUB_VL)
20885 NODE_NAME_CASE(VFWADD_W_VL)
20886 NODE_NAME_CASE(VFWSUB_W_VL)
20887 NODE_NAME_CASE(VWMACC_VL)
20888 NODE_NAME_CASE(VWMACCU_VL)
20889 NODE_NAME_CASE(VWMACCSU_VL)
20890 NODE_NAME_CASE(SETCC_VL)
20891 NODE_NAME_CASE(VMERGE_VL)
20892 NODE_NAME_CASE(VMAND_VL)
20893 NODE_NAME_CASE(VMOR_VL)
20894 NODE_NAME_CASE(VMXOR_VL)
20895 NODE_NAME_CASE(VMCLR_VL)
20896 NODE_NAME_CASE(VMSET_VL)
20897 NODE_NAME_CASE(VRGATHER_VX_VL)
20898 NODE_NAME_CASE(VRGATHER_VV_VL)
20899 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20900 NODE_NAME_CASE(VSEXT_VL)
20901 NODE_NAME_CASE(VZEXT_VL)
20902 NODE_NAME_CASE(VCPOP_VL)
20903 NODE_NAME_CASE(VFIRST_VL)
20904 NODE_NAME_CASE(READ_CSR)
20905 NODE_NAME_CASE(WRITE_CSR)
20906 NODE_NAME_CASE(SWAP_CSR)
20907 NODE_NAME_CASE(CZERO_EQZ)
20908 NODE_NAME_CASE(CZERO_NEZ)
20909 NODE_NAME_CASE(SW_GUARDED_BRIND)
20910 NODE_NAME_CASE(SW_GUARDED_CALL)
20911 NODE_NAME_CASE(SW_GUARDED_TAIL)
20912 NODE_NAME_CASE(TUPLE_INSERT)
20913 NODE_NAME_CASE(TUPLE_EXTRACT)
20914 NODE_NAME_CASE(SF_VC_XV_SE)
20915 NODE_NAME_CASE(SF_VC_IV_SE)
20916 NODE_NAME_CASE(SF_VC_VV_SE)
20917 NODE_NAME_CASE(SF_VC_FV_SE)
20918 NODE_NAME_CASE(SF_VC_XVV_SE)
20919 NODE_NAME_CASE(SF_VC_IVV_SE)
20920 NODE_NAME_CASE(SF_VC_VVV_SE)
20921 NODE_NAME_CASE(SF_VC_FVV_SE)
20922 NODE_NAME_CASE(SF_VC_XVW_SE)
20923 NODE_NAME_CASE(SF_VC_IVW_SE)
20924 NODE_NAME_CASE(SF_VC_VVW_SE)
20925 NODE_NAME_CASE(SF_VC_FVW_SE)
20926 NODE_NAME_CASE(SF_VC_V_X_SE)
20927 NODE_NAME_CASE(SF_VC_V_I_SE)
20928 NODE_NAME_CASE(SF_VC_V_XV_SE)
20929 NODE_NAME_CASE(SF_VC_V_IV_SE)
20930 NODE_NAME_CASE(SF_VC_V_VV_SE)
20931 NODE_NAME_CASE(SF_VC_V_FV_SE)
20932 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20933 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20934 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20935 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20936 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20937 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20938 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20939 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20940 }
20941 // clang-format on
20942 return nullptr;
20943#undef NODE_NAME_CASE
20944}
20945
20946/// getConstraintType - Given a constraint letter, return the type of
20947/// constraint it is for this target.
20950 if (Constraint.size() == 1) {
20951 switch (Constraint[0]) {
20952 default:
20953 break;
20954 case 'f':
20955 case 'R':
20956 return C_RegisterClass;
20957 case 'I':
20958 case 'J':
20959 case 'K':
20960 return C_Immediate;
20961 case 'A':
20962 return C_Memory;
20963 case 's':
20964 case 'S': // A symbolic address
20965 return C_Other;
20966 }
20967 } else {
20968 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
20969 return C_RegisterClass;
20970 if (Constraint == "cr" || Constraint == "cf")
20971 return C_RegisterClass;
20972 }
20973 return TargetLowering::getConstraintType(Constraint);
20974}
20975
20976std::pair<unsigned, const TargetRegisterClass *>
20978 StringRef Constraint,
20979 MVT VT) const {
20980 // First, see if this is a constraint that directly corresponds to a RISC-V
20981 // register class.
20982 if (Constraint.size() == 1) {
20983 switch (Constraint[0]) {
20984 case 'r':
20985 // TODO: Support fixed vectors up to XLen for P extension?
20986 if (VT.isVector())
20987 break;
20988 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20989 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20990 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20991 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20992 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20993 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20994 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20995 case 'f':
20996 if (VT == MVT::f16) {
20997 if (Subtarget.hasStdExtZfhmin())
20998 return std::make_pair(0U, &RISCV::FPR16RegClass);
20999 if (Subtarget.hasStdExtZhinxmin())
21000 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21001 } else if (VT == MVT::f32) {
21002 if (Subtarget.hasStdExtF())
21003 return std::make_pair(0U, &RISCV::FPR32RegClass);
21004 if (Subtarget.hasStdExtZfinx())
21005 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21006 } else if (VT == MVT::f64) {
21007 if (Subtarget.hasStdExtD())
21008 return std::make_pair(0U, &RISCV::FPR64RegClass);
21009 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21010 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21011 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21012 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21013 }
21014 break;
21015 case 'R':
21016 if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
21017 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21018 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21019 default:
21020 break;
21021 }
21022 } else if (Constraint == "vr") {
21023 for (const auto *RC :
21024 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21025 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21026 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21027 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21028 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21029 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21030 &RISCV::VRN2M4RegClass}) {
21031 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21032 return std::make_pair(0U, RC);
21033 }
21034 } else if (Constraint == "vd") {
21035 for (const auto *RC :
21036 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21037 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21038 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21039 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21040 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21041 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21042 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21043 &RISCV::VRN2M4NoV0RegClass}) {
21044 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21045 return std::make_pair(0U, RC);
21046 }
21047 } else if (Constraint == "vm") {
21048 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
21049 return std::make_pair(0U, &RISCV::VMV0RegClass);
21050 } else if (Constraint == "cr") {
21051 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21052 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21053 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21054 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21055 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21056 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21057 if (!VT.isVector())
21058 return std::make_pair(0U, &RISCV::GPRCRegClass);
21059 } else if (Constraint == "cf") {
21060 if (VT == MVT::f16) {
21061 if (Subtarget.hasStdExtZfhmin())
21062 return std::make_pair(0U, &RISCV::FPR16CRegClass);
21063 if (Subtarget.hasStdExtZhinxmin())
21064 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21065 } else if (VT == MVT::f32) {
21066 if (Subtarget.hasStdExtF())
21067 return std::make_pair(0U, &RISCV::FPR32CRegClass);
21068 if (Subtarget.hasStdExtZfinx())
21069 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21070 } else if (VT == MVT::f64) {
21071 if (Subtarget.hasStdExtD())
21072 return std::make_pair(0U, &RISCV::FPR64CRegClass);
21073 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21074 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21075 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21076 return std::make_pair(0U, &RISCV::GPRCRegClass);
21077 }
21078 }
21079
21080 // Clang will correctly decode the usage of register name aliases into their
21081 // official names. However, other frontends like `rustc` do not. This allows
21082 // users of these frontends to use the ABI names for registers in LLVM-style
21083 // register constraints.
21084 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
21085 .Case("{zero}", RISCV::X0)
21086 .Case("{ra}", RISCV::X1)
21087 .Case("{sp}", RISCV::X2)
21088 .Case("{gp}", RISCV::X3)
21089 .Case("{tp}", RISCV::X4)
21090 .Case("{t0}", RISCV::X5)
21091 .Case("{t1}", RISCV::X6)
21092 .Case("{t2}", RISCV::X7)
21093 .Cases("{s0}", "{fp}", RISCV::X8)
21094 .Case("{s1}", RISCV::X9)
21095 .Case("{a0}", RISCV::X10)
21096 .Case("{a1}", RISCV::X11)
21097 .Case("{a2}", RISCV::X12)
21098 .Case("{a3}", RISCV::X13)
21099 .Case("{a4}", RISCV::X14)
21100 .Case("{a5}", RISCV::X15)
21101 .Case("{a6}", RISCV::X16)
21102 .Case("{a7}", RISCV::X17)
21103 .Case("{s2}", RISCV::X18)
21104 .Case("{s3}", RISCV::X19)
21105 .Case("{s4}", RISCV::X20)
21106 .Case("{s5}", RISCV::X21)
21107 .Case("{s6}", RISCV::X22)
21108 .Case("{s7}", RISCV::X23)
21109 .Case("{s8}", RISCV::X24)
21110 .Case("{s9}", RISCV::X25)
21111 .Case("{s10}", RISCV::X26)
21112 .Case("{s11}", RISCV::X27)
21113 .Case("{t3}", RISCV::X28)
21114 .Case("{t4}", RISCV::X29)
21115 .Case("{t5}", RISCV::X30)
21116 .Case("{t6}", RISCV::X31)
21117 .Default(RISCV::NoRegister);
21118 if (XRegFromAlias != RISCV::NoRegister)
21119 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21120
21121 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21122 // TableGen record rather than the AsmName to choose registers for InlineAsm
21123 // constraints, plus we want to match those names to the widest floating point
21124 // register type available, manually select floating point registers here.
21125 //
21126 // The second case is the ABI name of the register, so that frontends can also
21127 // use the ABI names in register constraint lists.
21128 if (Subtarget.hasStdExtF()) {
21129 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21130 .Cases("{f0}", "{ft0}", RISCV::F0_F)
21131 .Cases("{f1}", "{ft1}", RISCV::F1_F)
21132 .Cases("{f2}", "{ft2}", RISCV::F2_F)
21133 .Cases("{f3}", "{ft3}", RISCV::F3_F)
21134 .Cases("{f4}", "{ft4}", RISCV::F4_F)
21135 .Cases("{f5}", "{ft5}", RISCV::F5_F)
21136 .Cases("{f6}", "{ft6}", RISCV::F6_F)
21137 .Cases("{f7}", "{ft7}", RISCV::F7_F)
21138 .Cases("{f8}", "{fs0}", RISCV::F8_F)
21139 .Cases("{f9}", "{fs1}", RISCV::F9_F)
21140 .Cases("{f10}", "{fa0}", RISCV::F10_F)
21141 .Cases("{f11}", "{fa1}", RISCV::F11_F)
21142 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21143 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21144 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21145 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21146 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21147 .Cases("{f17}", "{fa7}", RISCV::F17_F)
21148 .Cases("{f18}", "{fs2}", RISCV::F18_F)
21149 .Cases("{f19}", "{fs3}", RISCV::F19_F)
21150 .Cases("{f20}", "{fs4}", RISCV::F20_F)
21151 .Cases("{f21}", "{fs5}", RISCV::F21_F)
21152 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21153 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21154 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21155 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21156 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21157 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21158 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21159 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21160 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21161 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21162 .Default(RISCV::NoRegister);
21163 if (FReg != RISCV::NoRegister) {
21164 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21165 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21166 unsigned RegNo = FReg - RISCV::F0_F;
21167 unsigned DReg = RISCV::F0_D + RegNo;
21168 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21169 }
21170 if (VT == MVT::f32 || VT == MVT::Other)
21171 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21172 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21173 unsigned RegNo = FReg - RISCV::F0_F;
21174 unsigned HReg = RISCV::F0_H + RegNo;
21175 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21176 }
21177 }
21178 }
21179
21180 if (Subtarget.hasVInstructions()) {
21181 Register VReg = StringSwitch<Register>(Constraint.lower())
21182 .Case("{v0}", RISCV::V0)
21183 .Case("{v1}", RISCV::V1)
21184 .Case("{v2}", RISCV::V2)
21185 .Case("{v3}", RISCV::V3)
21186 .Case("{v4}", RISCV::V4)
21187 .Case("{v5}", RISCV::V5)
21188 .Case("{v6}", RISCV::V6)
21189 .Case("{v7}", RISCV::V7)
21190 .Case("{v8}", RISCV::V8)
21191 .Case("{v9}", RISCV::V9)
21192 .Case("{v10}", RISCV::V10)
21193 .Case("{v11}", RISCV::V11)
21194 .Case("{v12}", RISCV::V12)
21195 .Case("{v13}", RISCV::V13)
21196 .Case("{v14}", RISCV::V14)
21197 .Case("{v15}", RISCV::V15)
21198 .Case("{v16}", RISCV::V16)
21199 .Case("{v17}", RISCV::V17)
21200 .Case("{v18}", RISCV::V18)
21201 .Case("{v19}", RISCV::V19)
21202 .Case("{v20}", RISCV::V20)
21203 .Case("{v21}", RISCV::V21)
21204 .Case("{v22}", RISCV::V22)
21205 .Case("{v23}", RISCV::V23)
21206 .Case("{v24}", RISCV::V24)
21207 .Case("{v25}", RISCV::V25)
21208 .Case("{v26}", RISCV::V26)
21209 .Case("{v27}", RISCV::V27)
21210 .Case("{v28}", RISCV::V28)
21211 .Case("{v29}", RISCV::V29)
21212 .Case("{v30}", RISCV::V30)
21213 .Case("{v31}", RISCV::V31)
21214 .Default(RISCV::NoRegister);
21215 if (VReg != RISCV::NoRegister) {
21216 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21217 return std::make_pair(VReg, &RISCV::VMRegClass);
21218 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21219 return std::make_pair(VReg, &RISCV::VRRegClass);
21220 for (const auto *RC :
21221 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21222 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21223 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21224 return std::make_pair(VReg, RC);
21225 }
21226 }
21227 }
21228 }
21229
21230 std::pair<Register, const TargetRegisterClass *> Res =
21232
21233 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21234 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21235 // Subtarget into account.
21236 if (Res.second == &RISCV::GPRF16RegClass ||
21237 Res.second == &RISCV::GPRF32RegClass ||
21238 Res.second == &RISCV::GPRPairRegClass)
21239 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21240
21241 return Res;
21242}
21243
21246 // Currently only support length 1 constraints.
21247 if (ConstraintCode.size() == 1) {
21248 switch (ConstraintCode[0]) {
21249 case 'A':
21251 default:
21252 break;
21253 }
21254 }
21255
21256 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21257}
21258
21260 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21261 SelectionDAG &DAG) const {
21262 // Currently only support length 1 constraints.
21263 if (Constraint.size() == 1) {
21264 switch (Constraint[0]) {
21265 case 'I':
21266 // Validate & create a 12-bit signed immediate operand.
21267 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21268 uint64_t CVal = C->getSExtValue();
21269 if (isInt<12>(CVal))
21270 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21271 Subtarget.getXLenVT()));
21272 }
21273 return;
21274 case 'J':
21275 // Validate & create an integer zero operand.
21276 if (isNullConstant(Op))
21277 Ops.push_back(
21278 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21279 return;
21280 case 'K':
21281 // Validate & create a 5-bit unsigned immediate operand.
21282 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21283 uint64_t CVal = C->getZExtValue();
21284 if (isUInt<5>(CVal))
21285 Ops.push_back(
21286 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21287 }
21288 return;
21289 case 'S':
21291 return;
21292 default:
21293 break;
21294 }
21295 }
21296 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21297}
21298
21300 Instruction *Inst,
21301 AtomicOrdering Ord) const {
21302 if (Subtarget.hasStdExtZtso()) {
21303 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21304 return Builder.CreateFence(Ord);
21305 return nullptr;
21306 }
21307
21308 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21309 return Builder.CreateFence(Ord);
21310 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21311 return Builder.CreateFence(AtomicOrdering::Release);
21312 return nullptr;
21313}
21314
21316 Instruction *Inst,
21317 AtomicOrdering Ord) const {
21318 if (Subtarget.hasStdExtZtso()) {
21319 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21320 return Builder.CreateFence(Ord);
21321 return nullptr;
21322 }
21323
21324 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21325 return Builder.CreateFence(AtomicOrdering::Acquire);
21326 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21329 return nullptr;
21330}
21331
21334 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21335 // point operations can't be used in an lr/sc sequence without breaking the
21336 // forward-progress guarantee.
21337 if (AI->isFloatingPointOperation() ||
21343
21344 // Don't expand forced atomics, we want to have __sync libcalls instead.
21345 if (Subtarget.hasForcedAtomics())
21347
21348 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21349 if (AI->getOperation() == AtomicRMWInst::Nand) {
21350 if (Subtarget.hasStdExtZacas() &&
21351 (Size >= 32 || Subtarget.hasStdExtZabha()))
21353 if (Size < 32)
21355 }
21356
21357 if (Size < 32 && !Subtarget.hasStdExtZabha())
21359
21361}
21362
21363static Intrinsic::ID
21365 if (XLen == 32) {
21366 switch (BinOp) {
21367 default:
21368 llvm_unreachable("Unexpected AtomicRMW BinOp");
21370 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21371 case AtomicRMWInst::Add:
21372 return Intrinsic::riscv_masked_atomicrmw_add_i32;
21373 case AtomicRMWInst::Sub:
21374 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21376 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21377 case AtomicRMWInst::Max:
21378 return Intrinsic::riscv_masked_atomicrmw_max_i32;
21379 case AtomicRMWInst::Min:
21380 return Intrinsic::riscv_masked_atomicrmw_min_i32;
21382 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21384 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21385 }
21386 }
21387
21388 if (XLen == 64) {
21389 switch (BinOp) {
21390 default:
21391 llvm_unreachable("Unexpected AtomicRMW BinOp");
21393 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21394 case AtomicRMWInst::Add:
21395 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21396 case AtomicRMWInst::Sub:
21397 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21399 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21400 case AtomicRMWInst::Max:
21401 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21402 case AtomicRMWInst::Min:
21403 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21405 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21407 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21408 }
21409 }
21410
21411 llvm_unreachable("Unexpected XLen\n");
21412}
21413
21415 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21416 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21417 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21418 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21419 // mask, as this produces better code than the LR/SC loop emitted by
21420 // int_riscv_masked_atomicrmw_xchg.
21421 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21422 isa<ConstantInt>(AI->getValOperand())) {
21423 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21424 if (CVal->isZero())
21425 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21426 Builder.CreateNot(Mask, "Inv_Mask"),
21427 AI->getAlign(), Ord);
21428 if (CVal->isMinusOne())
21429 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21430 AI->getAlign(), Ord);
21431 }
21432
21433 unsigned XLen = Subtarget.getXLen();
21434 Value *Ordering =
21435 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21436 Type *Tys[] = {AlignedAddr->getType()};
21438 AI->getModule(),
21440
21441 if (XLen == 64) {
21442 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21443 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21444 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21445 }
21446
21447 Value *Result;
21448
21449 // Must pass the shift amount needed to sign extend the loaded value prior
21450 // to performing a signed comparison for min/max. ShiftAmt is the number of
21451 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21452 // is the number of bits to left+right shift the value in order to
21453 // sign-extend.
21454 if (AI->getOperation() == AtomicRMWInst::Min ||
21456 const DataLayout &DL = AI->getDataLayout();
21457 unsigned ValWidth =
21458 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21459 Value *SextShamt =
21460 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21461 Result = Builder.CreateCall(LrwOpScwLoop,
21462 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21463 } else {
21464 Result =
21465 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21466 }
21467
21468 if (XLen == 64)
21469 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21470 return Result;
21471}
21472
21475 AtomicCmpXchgInst *CI) const {
21476 // Don't expand forced atomics, we want to have __sync libcalls instead.
21477 if (Subtarget.hasForcedAtomics())
21479
21481 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21482 (Size == 8 || Size == 16))
21485}
21486
21488 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21489 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21490 unsigned XLen = Subtarget.getXLen();
21491 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21492 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21493 if (XLen == 64) {
21494 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21495 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21496 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21497 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21498 }
21499 Type *Tys[] = {AlignedAddr->getType()};
21500 Value *Result = Builder.CreateIntrinsic(
21501 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21502 if (XLen == 64)
21503 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21504 return Result;
21505}
21506
21508 EVT DataVT) const {
21509 // We have indexed loads for all supported EEW types. Indices are always
21510 // zero extended.
21511 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21512 isTypeLegal(Extend.getValueType()) &&
21513 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21514 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21515}
21516
21518 EVT VT) const {
21519 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21520 return false;
21521
21522 switch (FPVT.getSimpleVT().SimpleTy) {
21523 case MVT::f16:
21524 return Subtarget.hasStdExtZfhmin();
21525 case MVT::f32:
21526 return Subtarget.hasStdExtF();
21527 case MVT::f64:
21528 return Subtarget.hasStdExtD();
21529 default:
21530 return false;
21531 }
21532}
21533
21535 // If we are using the small code model, we can reduce size of jump table
21536 // entry to 4 bytes.
21537 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21540 }
21542}
21543
21545 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21546 unsigned uid, MCContext &Ctx) const {
21547 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21549 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21550}
21551
21553 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21554 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21555 // a power of two as well.
21556 // FIXME: This doesn't work for zve32, but that's already broken
21557 // elsewhere for the same reason.
21558 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21559 static_assert(RISCV::RVVBitsPerBlock == 64,
21560 "RVVBitsPerBlock changed, audit needed");
21561 return true;
21562}
21563
21565 SDValue &Offset,
21567 SelectionDAG &DAG) const {
21568 // Target does not support indexed loads.
21569 if (!Subtarget.hasVendorXTHeadMemIdx())
21570 return false;
21571
21572 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21573 return false;
21574
21575 Base = Op->getOperand(0);
21576 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21577 int64_t RHSC = RHS->getSExtValue();
21578 if (Op->getOpcode() == ISD::SUB)
21579 RHSC = -(uint64_t)RHSC;
21580
21581 // The constants that can be encoded in the THeadMemIdx instructions
21582 // are of the form (sign_extend(imm5) << imm2).
21583 bool isLegalIndexedOffset = false;
21584 for (unsigned i = 0; i < 4; i++)
21585 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21586 isLegalIndexedOffset = true;
21587 break;
21588 }
21589
21590 if (!isLegalIndexedOffset)
21591 return false;
21592
21593 Offset = Op->getOperand(1);
21594 return true;
21595 }
21596
21597 return false;
21598}
21599
21601 SDValue &Offset,
21603 SelectionDAG &DAG) const {
21604 EVT VT;
21605 SDValue Ptr;
21606 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21607 VT = LD->getMemoryVT();
21608 Ptr = LD->getBasePtr();
21609 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21610 VT = ST->getMemoryVT();
21611 Ptr = ST->getBasePtr();
21612 } else
21613 return false;
21614
21615 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21616 return false;
21617
21618 AM = ISD::PRE_INC;
21619 return true;
21620}
21621
21623 SDValue &Base,
21624 SDValue &Offset,
21626 SelectionDAG &DAG) const {
21627 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21628 if (Op->getOpcode() != ISD::ADD)
21629 return false;
21630
21631 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21632 Base = LS->getBasePtr();
21633 else
21634 return false;
21635
21636 if (Base == Op->getOperand(0))
21637 Offset = Op->getOperand(1);
21638 else if (Base == Op->getOperand(1))
21639 Offset = Op->getOperand(0);
21640 else
21641 return false;
21642
21643 AM = ISD::POST_INC;
21644 return true;
21645 }
21646
21647 EVT VT;
21648 SDValue Ptr;
21649 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21650 VT = LD->getMemoryVT();
21651 Ptr = LD->getBasePtr();
21652 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21653 VT = ST->getMemoryVT();
21654 Ptr = ST->getBasePtr();
21655 } else
21656 return false;
21657
21658 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21659 return false;
21660 // Post-indexing updates the base, so it's not a valid transform
21661 // if that's not the same as the load's pointer.
21662 if (Ptr != Base)
21663 return false;
21664
21665 AM = ISD::POST_INC;
21666 return true;
21667}
21668
21670 EVT VT) const {
21671 EVT SVT = VT.getScalarType();
21672
21673 if (!SVT.isSimple())
21674 return false;
21675
21676 switch (SVT.getSimpleVT().SimpleTy) {
21677 case MVT::f16:
21678 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21679 : Subtarget.hasStdExtZfhOrZhinx();
21680 case MVT::f32:
21681 return Subtarget.hasStdExtFOrZfinx();
21682 case MVT::f64:
21683 return Subtarget.hasStdExtDOrZdinx();
21684 default:
21685 break;
21686 }
21687
21688 return false;
21689}
21690
21692 // Zacas will use amocas.w which does not require extension.
21693 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21694}
21695
21697 const Constant *PersonalityFn) const {
21698 return RISCV::X10;
21699}
21700
21702 const Constant *PersonalityFn) const {
21703 return RISCV::X11;
21704}
21705
21707 // Return false to suppress the unnecessary extensions if the LibCall
21708 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21709 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21710 Type.getSizeInBits() < Subtarget.getXLen()))
21711 return false;
21712
21713 return true;
21714}
21715
21717 bool IsSigned) const {
21718 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21719 return true;
21720
21721 return IsSigned;
21722}
21723
21725 SDValue C) const {
21726 // Check integral scalar types.
21727 if (!VT.isScalarInteger())
21728 return false;
21729
21730 // Omit the optimization if the sub target has the M extension and the data
21731 // size exceeds XLen.
21732 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21733 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21734 return false;
21735
21736 auto *ConstNode = cast<ConstantSDNode>(C);
21737 const APInt &Imm = ConstNode->getAPIntValue();
21738
21739 // Break the MUL to a SLLI and an ADD/SUB.
21740 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21741 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21742 return true;
21743
21744 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21745 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21746 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21747 (Imm - 8).isPowerOf2()))
21748 return true;
21749
21750 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21751 // a pair of LUI/ADDI.
21752 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21753 ConstNode->hasOneUse()) {
21754 APInt ImmS = Imm.ashr(Imm.countr_zero());
21755 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21756 (1 - ImmS).isPowerOf2())
21757 return true;
21758 }
21759
21760 return false;
21761}
21762
21764 SDValue ConstNode) const {
21765 // Let the DAGCombiner decide for vectors.
21766 EVT VT = AddNode.getValueType();
21767 if (VT.isVector())
21768 return true;
21769
21770 // Let the DAGCombiner decide for larger types.
21771 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21772 return true;
21773
21774 // It is worse if c1 is simm12 while c1*c2 is not.
21775 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21776 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21777 const APInt &C1 = C1Node->getAPIntValue();
21778 const APInt &C2 = C2Node->getAPIntValue();
21779 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21780 return false;
21781
21782 // Default to true and let the DAGCombiner decide.
21783 return true;
21784}
21785
21787 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21788 unsigned *Fast) const {
21789 if (!VT.isVector()) {
21790 if (Fast)
21791 *Fast = Subtarget.enableUnalignedScalarMem();
21792 return Subtarget.enableUnalignedScalarMem();
21793 }
21794
21795 // All vector implementations must support element alignment
21796 EVT ElemVT = VT.getVectorElementType();
21797 if (Alignment >= ElemVT.getStoreSize()) {
21798 if (Fast)
21799 *Fast = 1;
21800 return true;
21801 }
21802
21803 // Note: We lower an unmasked unaligned vector access to an equally sized
21804 // e8 element type access. Given this, we effectively support all unmasked
21805 // misaligned accesses. TODO: Work through the codegen implications of
21806 // allowing such accesses to be formed, and considered fast.
21807 if (Fast)
21808 *Fast = Subtarget.enableUnalignedVectorMem();
21809 return Subtarget.enableUnalignedVectorMem();
21810}
21811
21812
21814 const AttributeList &FuncAttributes) const {
21815 if (!Subtarget.hasVInstructions())
21816 return MVT::Other;
21817
21818 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21819 return MVT::Other;
21820
21821 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21822 // has an expansion threshold, and we want the number of hardware memory
21823 // operations to correspond roughly to that threshold. LMUL>1 operations
21824 // are typically expanded linearly internally, and thus correspond to more
21825 // than one actual memory operation. Note that store merging and load
21826 // combining will typically form larger LMUL operations from the LMUL1
21827 // operations emitted here, and that's okay because combining isn't
21828 // introducing new memory operations; it's just merging existing ones.
21829 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21830 if (Op.size() < MinVLenInBytes)
21831 // TODO: Figure out short memops. For the moment, do the default thing
21832 // which ends up using scalar sequences.
21833 return MVT::Other;
21834
21835 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21836 // fixed vectors.
21837 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21838 return MVT::Other;
21839
21840 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21841 // a large scalar constant and instead use vmv.v.x/i to do the
21842 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21843 // maximize the chance we can encode the size in the vsetvli.
21844 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21845 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21846
21847 // Do we have sufficient alignment for our preferred VT? If not, revert
21848 // to largest size allowed by our alignment criteria.
21849 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21850 Align RequiredAlign(PreferredVT.getStoreSize());
21851 if (Op.isFixedDstAlign())
21852 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21853 if (Op.isMemcpy())
21854 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21855 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21856 }
21857 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21858}
21859
21861 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21862 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21863 bool IsABIRegCopy = CC.has_value();
21864 EVT ValueVT = Val.getValueType();
21865
21866 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21867 if ((ValueVT == PairVT ||
21868 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21869 ValueVT == MVT::f64)) &&
21870 NumParts == 1 && PartVT == MVT::Untyped) {
21871 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21872 MVT XLenVT = Subtarget.getXLenVT();
21873 if (ValueVT == MVT::f64)
21874 Val = DAG.getBitcast(MVT::i64, Val);
21875 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
21876 // Always creating an MVT::Untyped part, so always use
21877 // RISCVISD::BuildGPRPair.
21878 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
21879 return true;
21880 }
21881
21882 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21883 PartVT == MVT::f32) {
21884 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21885 // nan, and cast to f32.
21886 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21887 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21888 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21889 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21890 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21891 Parts[0] = Val;
21892 return true;
21893 }
21894
21895 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
21896#ifndef NDEBUG
21897 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
21898 [[maybe_unused]] unsigned ValLMUL =
21900 ValNF * RISCV::RVVBitsPerBlock);
21901 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
21902 [[maybe_unused]] unsigned PartLMUL =
21904 PartNF * RISCV::RVVBitsPerBlock);
21905 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
21906 "RISC-V vector tuple type only accepts same register class type "
21907 "TUPLE_INSERT");
21908#endif
21909
21910 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
21911 Val, DAG.getVectorIdxConstant(0, DL));
21912 Parts[0] = Val;
21913 return true;
21914 }
21915
21916 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21917 LLVMContext &Context = *DAG.getContext();
21918 EVT ValueEltVT = ValueVT.getVectorElementType();
21919 EVT PartEltVT = PartVT.getVectorElementType();
21920 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21921 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21922 if (PartVTBitSize % ValueVTBitSize == 0) {
21923 assert(PartVTBitSize >= ValueVTBitSize);
21924 // If the element types are different, bitcast to the same element type of
21925 // PartVT first.
21926 // Give an example here, we want copy a <vscale x 1 x i8> value to
21927 // <vscale x 4 x i16>.
21928 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21929 // subvector, then we can bitcast to <vscale x 4 x i16>.
21930 if (ValueEltVT != PartEltVT) {
21931 if (PartVTBitSize > ValueVTBitSize) {
21932 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21933 assert(Count != 0 && "The number of element should not be zero.");
21934 EVT SameEltTypeVT =
21935 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21936 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21937 DAG.getUNDEF(SameEltTypeVT), Val,
21938 DAG.getVectorIdxConstant(0, DL));
21939 }
21940 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21941 } else {
21942 Val =
21943 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21944 Val, DAG.getVectorIdxConstant(0, DL));
21945 }
21946 Parts[0] = Val;
21947 return true;
21948 }
21949 }
21950
21951 return false;
21952}
21953
21955 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21956 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21957 bool IsABIRegCopy = CC.has_value();
21958
21959 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21960 if ((ValueVT == PairVT ||
21961 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21962 ValueVT == MVT::f64)) &&
21963 NumParts == 1 && PartVT == MVT::Untyped) {
21964 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21965 MVT XLenVT = Subtarget.getXLenVT();
21966
21967 SDValue Val = Parts[0];
21968 // Always starting with an MVT::Untyped part, so always use
21969 // RISCVISD::SplitGPRPair
21970 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
21971 Val);
21972 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
21973 Val.getValue(1));
21974 if (ValueVT == MVT::f64)
21975 Val = DAG.getBitcast(ValueVT, Val);
21976 return Val;
21977 }
21978
21979 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21980 PartVT == MVT::f32) {
21981 SDValue Val = Parts[0];
21982
21983 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21984 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21985 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21986 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21987 return Val;
21988 }
21989
21990 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21991 LLVMContext &Context = *DAG.getContext();
21992 SDValue Val = Parts[0];
21993 EVT ValueEltVT = ValueVT.getVectorElementType();
21994 EVT PartEltVT = PartVT.getVectorElementType();
21995 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21996 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21997 if (PartVTBitSize % ValueVTBitSize == 0) {
21998 assert(PartVTBitSize >= ValueVTBitSize);
21999 EVT SameEltTypeVT = ValueVT;
22000 // If the element types are different, convert it to the same element type
22001 // of PartVT.
22002 // Give an example here, we want copy a <vscale x 1 x i8> value from
22003 // <vscale x 4 x i16>.
22004 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
22005 // then we can extract <vscale x 1 x i8>.
22006 if (ValueEltVT != PartEltVT) {
22007 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22008 assert(Count != 0 && "The number of element should not be zero.");
22009 SameEltTypeVT =
22010 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22011 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
22012 }
22013 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
22014 DAG.getVectorIdxConstant(0, DL));
22015 return Val;
22016 }
22017 }
22018 return SDValue();
22019}
22020
22022 // When aggressively optimizing for code size, we prefer to use a div
22023 // instruction, as it is usually smaller than the alternative sequence.
22024 // TODO: Add vector division?
22025 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
22026 return OptSize && !VT.isVector();
22027}
22028
22030 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
22031 // some situation.
22032 unsigned Opc = N->getOpcode();
22033 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
22034 return false;
22035 return true;
22036}
22037
22038static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
22039 Module *M = IRB.GetInsertBlock()->getModule();
22040 Function *ThreadPointerFunc =
22041 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
22042 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
22043 IRB.CreateCall(ThreadPointerFunc), Offset);
22044}
22045
22047 // Fuchsia provides a fixed TLS slot for the stack cookie.
22048 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22049 if (Subtarget.isTargetFuchsia())
22050 return useTpOffset(IRB, -0x10);
22051
22052 // Android provides a fixed TLS slot for the stack cookie. See the definition
22053 // of TLS_SLOT_STACK_GUARD in
22054 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
22055 if (Subtarget.isTargetAndroid())
22056 return useTpOffset(IRB, -0x18);
22057
22058 Module *M = IRB.GetInsertBlock()->getModule();
22059
22060 if (M->getStackProtectorGuard() == "tls") {
22061 // Users must specify the offset explicitly
22062 int Offset = M->getStackProtectorGuardOffset();
22063 return useTpOffset(IRB, Offset);
22064 }
22065
22067}
22068
22070 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
22071 const DataLayout &DL) const {
22072 EVT VT = getValueType(DL, VTy);
22073 // Don't lower vlseg/vsseg for vector types that can't be split.
22074 if (!isTypeLegal(VT))
22075 return false;
22076
22078 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
22079 Alignment))
22080 return false;
22081
22082 MVT ContainerVT = VT.getSimpleVT();
22083
22084 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22085 if (!Subtarget.useRVVForFixedLengthVectors())
22086 return false;
22087 // Sometimes the interleaved access pass picks up splats as interleaves of
22088 // one element. Don't lower these.
22089 if (FVTy->getNumElements() < 2)
22090 return false;
22091
22093 } else {
22094 // The intrinsics for scalable vectors are not overloaded on pointer type
22095 // and can only handle the default address space.
22096 if (AddrSpace)
22097 return false;
22098 }
22099
22100 // Need to make sure that EMUL * NFIELDS ≤ 8
22101 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22102 if (Fractional)
22103 return true;
22104 return Factor * LMUL <= 8;
22105}
22106
22108 Align Alignment) const {
22109 if (!Subtarget.hasVInstructions())
22110 return false;
22111
22112 // Only support fixed vectors if we know the minimum vector size.
22113 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22114 return false;
22115
22116 EVT ScalarType = DataType.getScalarType();
22117 if (!isLegalElementTypeForRVV(ScalarType))
22118 return false;
22119
22120 if (!Subtarget.enableUnalignedVectorMem() &&
22121 Alignment < ScalarType.getStoreSize())
22122 return false;
22123
22124 return true;
22125}
22126
22128 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22129 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22130 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22131 Intrinsic::riscv_seg8_load};
22132
22133/// Lower an interleaved load into a vlsegN intrinsic.
22134///
22135/// E.g. Lower an interleaved load (Factor = 2):
22136/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22137/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
22138/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
22139///
22140/// Into:
22141/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22142/// %ptr, i64 4)
22143/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22144/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22147 ArrayRef<unsigned> Indices, unsigned Factor) const {
22148 assert(Indices.size() == Shuffles.size());
22149
22150 IRBuilder<> Builder(LI);
22151
22152 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22153 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22155 LI->getDataLayout()))
22156 return false;
22157
22158 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22159
22160 // If the segment load is going to be performed segment at a time anyways
22161 // and there's only one element used, use a strided load instead. This
22162 // will be equally fast, and create less vector register pressure.
22163 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22164 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22165 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22166 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22167 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22168 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22169 Value *VL = Builder.getInt32(VTy->getNumElements());
22170
22171 CallInst *CI =
22172 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22173 {VTy, BasePtr->getType(), Stride->getType()},
22174 {BasePtr, Stride, Mask, VL});
22175 CI->addParamAttr(
22177 Shuffles[0]->replaceAllUsesWith(CI);
22178 return true;
22179 };
22180
22181 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22182
22183 CallInst *VlsegN = Builder.CreateIntrinsic(
22184 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22185 {LI->getPointerOperand(), VL});
22186
22187 for (unsigned i = 0; i < Shuffles.size(); i++) {
22188 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22189 Shuffles[i]->replaceAllUsesWith(SubVec);
22190 }
22191
22192 return true;
22193}
22194
22196 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22197 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22198 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22199 Intrinsic::riscv_seg8_store};
22200
22201/// Lower an interleaved store into a vssegN intrinsic.
22202///
22203/// E.g. Lower an interleaved store (Factor = 3):
22204/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22205/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22206/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22207///
22208/// Into:
22209/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22210/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22211/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22212/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22213/// %ptr, i32 4)
22214///
22215/// Note that the new shufflevectors will be removed and we'll only generate one
22216/// vsseg3 instruction in CodeGen.
22218 ShuffleVectorInst *SVI,
22219 unsigned Factor) const {
22220 IRBuilder<> Builder(SI);
22221 auto Mask = SVI->getShuffleMask();
22222 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22223 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22224 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22225 ShuffleVTy->getNumElements() / Factor);
22226 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22227 SI->getPointerAddressSpace(),
22228 SI->getDataLayout()))
22229 return false;
22230
22231 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22232
22233 unsigned Index;
22234 // If the segment store only has one active lane (i.e. the interleave is
22235 // just a spread shuffle), we can use a strided store instead. This will
22236 // be equally fast, and create less vector register pressure.
22237 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22238 isSpreadMask(Mask, Factor, Index)) {
22239 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22240 Value *Data = SVI->getOperand(0);
22241 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22242 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22243 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22244 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22245 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22246 Value *VL = Builder.getInt32(VTy->getNumElements());
22247
22248 CallInst *CI = Builder.CreateIntrinsic(
22249 Intrinsic::experimental_vp_strided_store,
22250 {Data->getType(), BasePtr->getType(), Stride->getType()},
22251 {Data, BasePtr, Stride, Mask, VL});
22252 CI->addParamAttr(
22253 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22254
22255 return true;
22256 }
22257
22259 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22260 {VTy, SI->getPointerOperandType(), XLenTy});
22261
22263
22264 for (unsigned i = 0; i < Factor; i++) {
22265 Value *Shuffle = Builder.CreateShuffleVector(
22266 SVI->getOperand(0), SVI->getOperand(1),
22267 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22268 Ops.push_back(Shuffle);
22269 }
22270 // This VL should be OK (should be executable in one vsseg instruction,
22271 // potentially under larger LMULs) because we checked that the fixed vector
22272 // type fits in isLegalInterleavedAccessType
22273 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22274 Ops.append({SI->getPointerOperand(), VL});
22275
22276 Builder.CreateCall(VssegNFunc, Ops);
22277
22278 return true;
22279}
22280
22282 IntrinsicInst *DI, LoadInst *LI,
22283 SmallVectorImpl<Instruction *> &DeadInsts) const {
22284 assert(LI->isSimple());
22285 IRBuilder<> Builder(LI);
22286
22287 // Only deinterleave2 supported at present.
22288 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
22289 return false;
22290
22291 const unsigned Factor = 2;
22292
22293 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
22294 const DataLayout &DL = LI->getDataLayout();
22295
22296 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22297 LI->getPointerAddressSpace(), DL))
22298 return false;
22299
22300 Value *Return;
22301 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22302
22303 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22304 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22305 Return =
22306 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22307 {ResVTy, LI->getPointerOperandType(), XLenTy},
22308 {LI->getPointerOperand(), VL});
22309 } else {
22310 static const Intrinsic::ID IntrIds[] = {
22311 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22312 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22313 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22314 Intrinsic::riscv_vlseg8};
22315
22316 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22317 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22318 Type *VecTupTy = TargetExtType::get(
22319 LI->getContext(), "riscv.vector.tuple",
22321 NumElts * SEW / 8),
22322 Factor);
22323
22324 Value *VL = Constant::getAllOnesValue(XLenTy);
22325
22326 Value *Vlseg = Builder.CreateIntrinsic(
22327 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22328 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22329 ConstantInt::get(XLenTy, Log2_64(SEW))});
22330
22331 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22332 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22333 for (unsigned i = 0; i < Factor; ++i) {
22334 Value *VecExtract = Builder.CreateIntrinsic(
22335 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22336 {Vlseg, Builder.getInt32(i)});
22337 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22338 }
22339 }
22340
22341 DI->replaceAllUsesWith(Return);
22342
22343 return true;
22344}
22345
22348 SmallVectorImpl<Instruction *> &DeadInsts) const {
22349 assert(SI->isSimple());
22350 IRBuilder<> Builder(SI);
22351
22352 // Only interleave2 supported at present.
22353 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
22354 return false;
22355
22356 const unsigned Factor = 2;
22357
22358 VectorType *InVTy = cast<VectorType>(II->getArgOperand(0)->getType());
22359 const DataLayout &DL = SI->getDataLayout();
22360
22361 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22362 SI->getPointerAddressSpace(), DL))
22363 return false;
22364
22365 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22366
22367 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22368 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22369 Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
22370 {InVTy, SI->getPointerOperandType(), XLenTy},
22371 {II->getArgOperand(0), II->getArgOperand(1),
22372 SI->getPointerOperand(), VL});
22373 } else {
22374 static const Intrinsic::ID IntrIds[] = {
22375 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22376 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22377 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22378 Intrinsic::riscv_vsseg8};
22379
22380 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22381 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22382 Type *VecTupTy = TargetExtType::get(
22383 SI->getContext(), "riscv.vector.tuple",
22384 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22385 NumElts * SEW / 8),
22386 Factor);
22387
22389 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22390
22391 Value *VL = Constant::getAllOnesValue(XLenTy);
22392
22393 Value *StoredVal = PoisonValue::get(VecTupTy);
22394 for (unsigned i = 0; i < Factor; ++i)
22395 StoredVal = Builder.CreateIntrinsic(
22396 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22397 {StoredVal, II->getArgOperand(i), Builder.getInt32(i)});
22398
22399 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22400 ConstantInt::get(XLenTy, Log2_64(SEW))});
22401 }
22402
22403 return true;
22404}
22405
22409 const TargetInstrInfo *TII) const {
22410 assert(MBBI->isCall() && MBBI->getCFIType() &&
22411 "Invalid call instruction for a KCFI check");
22412 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22413 MBBI->getOpcode()));
22414
22415 MachineOperand &Target = MBBI->getOperand(0);
22416 Target.setIsRenamable(false);
22417
22418 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22419 .addReg(Target.getReg())
22420 .addImm(MBBI->getCFIType())
22421 .getInstr();
22422}
22423
22424#define GET_REGISTER_MATCHER
22425#include "RISCVGenAsmMatcher.inc"
22426
22429 const MachineFunction &MF) const {
22431 if (Reg == RISCV::NoRegister)
22433 if (Reg == RISCV::NoRegister)
22435 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22436 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22437 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22438 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22439 StringRef(RegName) + "\"."));
22440 return Reg;
22441}
22442
22445 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22446
22447 if (NontemporalInfo == nullptr)
22449
22450 // 1 for default value work as __RISCV_NTLH_ALL
22451 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22452 // 3 -> __RISCV_NTLH_ALL_PRIVATE
22453 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22454 // 5 -> __RISCV_NTLH_ALL
22455 int NontemporalLevel = 5;
22456 const MDNode *RISCVNontemporalInfo =
22457 I.getMetadata("riscv-nontemporal-domain");
22458 if (RISCVNontemporalInfo != nullptr)
22459 NontemporalLevel =
22460 cast<ConstantInt>(
22461 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22462 ->getValue())
22463 ->getZExtValue();
22464
22465 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22466 "RISC-V target doesn't support this non-temporal domain.");
22467
22468 NontemporalLevel -= 2;
22470 if (NontemporalLevel & 0b1)
22471 Flags |= MONontemporalBit0;
22472 if (NontemporalLevel & 0b10)
22473 Flags |= MONontemporalBit1;
22474
22475 return Flags;
22476}
22477
22480
22481 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22483 TargetFlags |= (NodeFlags & MONontemporalBit0);
22484 TargetFlags |= (NodeFlags & MONontemporalBit1);
22485 return TargetFlags;
22486}
22487
22489 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22490 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22491}
22492
22494 if (VT.isScalableVector())
22495 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22496 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22497 return true;
22498 return Subtarget.hasStdExtZbb() &&
22499 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22500}
22501
22503 ISD::CondCode Cond) const {
22504 return isCtpopFast(VT) ? 0 : 1;
22505}
22506
22508 const Instruction *I) const {
22509 if (Subtarget.hasStdExtZalasr()) {
22510 if (Subtarget.hasStdExtZtso()) {
22511 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22512 // should be lowered to plain load/store. The easiest way to do this is
22513 // to say we should insert fences for them, and the fence insertion code
22514 // will just not insert any fences
22515 auto *LI = dyn_cast<LoadInst>(I);
22516 auto *SI = dyn_cast<StoreInst>(I);
22517 if ((LI &&
22518 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22519 (SI &&
22520 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22521 // Here, this is a load or store which is seq_cst, and needs a .aq or
22522 // .rl therefore we shouldn't try to insert fences
22523 return false;
22524 }
22525 // Here, we are a TSO inst that isn't a seq_cst load/store
22526 return isa<LoadInst>(I) || isa<StoreInst>(I);
22527 }
22528 return false;
22529 }
22530 // Note that one specific case requires fence insertion for an
22531 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22532 // than this hook due to limitations in the interface here.
22533 return isa<LoadInst>(I) || isa<StoreInst>(I);
22534}
22535
22537
22538 // GISel support is in progress or complete for these opcodes.
22539 unsigned Op = Inst.getOpcode();
22540 if (Op == Instruction::Add || Op == Instruction::Sub ||
22541 Op == Instruction::And || Op == Instruction::Or ||
22542 Op == Instruction::Xor || Op == Instruction::InsertElement ||
22543 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22544 Op == Instruction::Freeze || Op == Instruction::Store)
22545 return false;
22546
22547 if (Inst.getType()->isScalableTy())
22548 return true;
22549
22550 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22551 if (Inst.getOperand(i)->getType()->isScalableTy() &&
22552 !isa<ReturnInst>(&Inst))
22553 return true;
22554
22555 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22556 if (AI->getAllocatedType()->isScalableTy())
22557 return true;
22558 }
22559
22560 return false;
22561}
22562
22563SDValue
22564RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22565 SelectionDAG &DAG,
22566 SmallVectorImpl<SDNode *> &Created) const {
22568 if (isIntDivCheap(N->getValueType(0), Attr))
22569 return SDValue(N, 0); // Lower SDIV as SDIV
22570
22571 // Only perform this transform if short forward branch opt is supported.
22572 if (!Subtarget.hasShortForwardBranchOpt())
22573 return SDValue();
22574 EVT VT = N->getValueType(0);
22575 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22576 return SDValue();
22577
22578 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22579 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22580 return SDValue();
22581 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22582}
22583
22584bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22585 EVT VT, const APInt &AndMask) const {
22586 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22587 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22589}
22590
22591unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22592 return Subtarget.getMinimumJumpTableEntries();
22593}
22594
22597 int JTI,
22598 SelectionDAG &DAG) const {
22599 if (Subtarget.hasStdExtZicfilp()) {
22600 // When Zicfilp enabled, we need to use software guarded branch for jump
22601 // table branch.
22602 SDValue Chain = Value;
22603 // Jump table debug info is only needed if CodeView is enabled.
22605 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22606 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22607 }
22608 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22609}
22610
22611// If an output pattern produces multiple instructions tablegen may pick an
22612// arbitrary type from an instructions destination register class to use for the
22613// VT of that MachineSDNode. This VT may be used to look up the representative
22614// register class. If the type isn't legal, the default implementation will
22615// not find a register class.
22616//
22617// Some integer types smaller than XLen are listed in the GPR register class to
22618// support isel patterns for GISel, but are not legal in SelectionDAG. The
22619// arbitrary type tablegen picks may be one of these smaller types.
22620//
22621// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22622// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22623std::pair<const TargetRegisterClass *, uint8_t>
22624RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22625 MVT VT) const {
22626 switch (VT.SimpleTy) {
22627 default:
22628 break;
22629 case MVT::i8:
22630 case MVT::i16:
22631 case MVT::i32:
22633 case MVT::bf16:
22634 case MVT::f16:
22636 }
22637
22639}
22640
22642
22643#define GET_RISCVVIntrinsicsTable_IMPL
22644#include "RISCVGenSearchableTables.inc"
22645
22646} // namespace llvm::RISCVVIntrinsicsTable
22647
22649
22650 // If the function specifically requests inline stack probes, emit them.
22651 if (MF.getFunction().hasFnAttribute("probe-stack"))
22652 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22653 "inline-asm";
22654
22655 return false;
22656}
22657
22659 Align StackAlign) const {
22660 // The default stack probe size is 4096 if the function has no
22661 // stack-probe-size attribute.
22662 const Function &Fn = MF.getFunction();
22663 unsigned StackProbeSize =
22664 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22665 // Round down to the stack alignment.
22666 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22667 return StackProbeSize ? StackProbeSize : StackAlign.value();
22668}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1111
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1887
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1842
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:867
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:404
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:397
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:505
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:906
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:752
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:384
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1259
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1568
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1568
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1555
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1651
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:298
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:275
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)