LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1531 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1534 if (Subtarget.hasVendorXTHeadMemPair())
1536 if (Subtarget.useRVVForFixedLengthVectors())
1538
1539 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1540 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1541
1542 // Disable strict node mutation.
1543 IsStrictFPEnabled = true;
1544 EnableExtLdPromotion = true;
1545
1546 // Let the subtarget decide if a predictable select is more expensive than the
1547 // corresponding branch. This information is used in CGP/SelectOpt to decide
1548 // when to convert selects into branches.
1549 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1550
1551 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1552 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1553
1555 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1556 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1557
1559 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1560 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1561
1562 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1563 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1564}
1565
1567 LLVMContext &Context,
1568 EVT VT) const {
1569 if (!VT.isVector())
1570 return getPointerTy(DL);
1571 if (Subtarget.hasVInstructions() &&
1572 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1573 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1575}
1576
1577MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1578 return Subtarget.getXLenVT();
1579}
1580
1581// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1582bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1583 unsigned VF,
1584 bool IsScalable) const {
1585 if (!Subtarget.hasVInstructions())
1586 return true;
1587
1588 if (!IsScalable)
1589 return true;
1590
1591 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1592 return true;
1593
1594 // Don't allow VF=1 if those types are't legal.
1595 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1596 return true;
1597
1598 // VLEN=32 support is incomplete.
1599 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1600 return true;
1601
1602 // The maximum VF is for the smallest element width with LMUL=8.
1603 // VF must be a power of 2.
1604 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1605 return VF > MaxVF || !isPowerOf2_32(VF);
1606}
1607
1609 return !Subtarget.hasVInstructions() ||
1610 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1611}
1612
1614 const CallInst &I,
1615 MachineFunction &MF,
1616 unsigned Intrinsic) const {
1617 auto &DL = I.getDataLayout();
1618
1619 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1620 bool IsUnitStrided, bool UsePtrVal = false) {
1622 // We can't use ptrVal if the intrinsic can access memory before the
1623 // pointer. This means we can't use it for strided or indexed intrinsics.
1624 if (UsePtrVal)
1625 Info.ptrVal = I.getArgOperand(PtrOp);
1626 else
1627 Info.fallbackAddressSpace =
1628 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1629 Type *MemTy;
1630 if (IsStore) {
1631 // Store value is the first operand.
1632 MemTy = I.getArgOperand(0)->getType();
1633 } else {
1634 // Use return type. If it's segment load, return type is a struct.
1635 MemTy = I.getType();
1636 if (MemTy->isStructTy())
1637 MemTy = MemTy->getStructElementType(0);
1638 }
1639 if (!IsUnitStrided)
1640 MemTy = MemTy->getScalarType();
1641
1642 Info.memVT = getValueType(DL, MemTy);
1643 if (MemTy->isTargetExtTy()) {
1644 // RISC-V vector tuple type's alignment type should be its element type.
1645 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1646 MemTy = Type::getIntNTy(
1647 MemTy->getContext(),
1648 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1649 ->getZExtValue());
1650 Info.align = DL.getABITypeAlign(MemTy);
1651 } else {
1652 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1653 }
1655 Info.flags |=
1657 return true;
1658 };
1659
1660 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1662
1664 switch (Intrinsic) {
1665 default:
1666 return false;
1667 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1668 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1669 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1670 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1671 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1672 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1675 case Intrinsic::riscv_masked_cmpxchg_i32:
1677 Info.memVT = MVT::i32;
1678 Info.ptrVal = I.getArgOperand(0);
1679 Info.offset = 0;
1680 Info.align = Align(4);
1683 return true;
1684 case Intrinsic::riscv_seg2_load:
1685 case Intrinsic::riscv_seg3_load:
1686 case Intrinsic::riscv_seg4_load:
1687 case Intrinsic::riscv_seg5_load:
1688 case Intrinsic::riscv_seg6_load:
1689 case Intrinsic::riscv_seg7_load:
1690 case Intrinsic::riscv_seg8_load:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1692 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1693 case Intrinsic::riscv_seg2_store:
1694 case Intrinsic::riscv_seg3_store:
1695 case Intrinsic::riscv_seg4_store:
1696 case Intrinsic::riscv_seg5_store:
1697 case Intrinsic::riscv_seg6_store:
1698 case Intrinsic::riscv_seg7_store:
1699 case Intrinsic::riscv_seg8_store:
1700 // Operands are (vec, ..., vec, ptr, vl)
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1704 case Intrinsic::riscv_vle:
1705 case Intrinsic::riscv_vle_mask:
1706 case Intrinsic::riscv_vleff:
1707 case Intrinsic::riscv_vleff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ true,
1711 /*UsePtrVal*/ true);
1712 case Intrinsic::riscv_vse:
1713 case Intrinsic::riscv_vse_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ true,
1717 /*UsePtrVal*/ true);
1718 case Intrinsic::riscv_vlse:
1719 case Intrinsic::riscv_vlse_mask:
1720 case Intrinsic::riscv_vloxei:
1721 case Intrinsic::riscv_vloxei_mask:
1722 case Intrinsic::riscv_vluxei:
1723 case Intrinsic::riscv_vluxei_mask:
1724 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1725 /*IsStore*/ false,
1726 /*IsUnitStrided*/ false);
1727 case Intrinsic::riscv_vsse:
1728 case Intrinsic::riscv_vsse_mask:
1729 case Intrinsic::riscv_vsoxei:
1730 case Intrinsic::riscv_vsoxei_mask:
1731 case Intrinsic::riscv_vsuxei:
1732 case Intrinsic::riscv_vsuxei_mask:
1733 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1734 /*IsStore*/ true,
1735 /*IsUnitStrided*/ false);
1736 case Intrinsic::riscv_vlseg2:
1737 case Intrinsic::riscv_vlseg3:
1738 case Intrinsic::riscv_vlseg4:
1739 case Intrinsic::riscv_vlseg5:
1740 case Intrinsic::riscv_vlseg6:
1741 case Intrinsic::riscv_vlseg7:
1742 case Intrinsic::riscv_vlseg8:
1743 case Intrinsic::riscv_vlseg2ff:
1744 case Intrinsic::riscv_vlseg3ff:
1745 case Intrinsic::riscv_vlseg4ff:
1746 case Intrinsic::riscv_vlseg5ff:
1747 case Intrinsic::riscv_vlseg6ff:
1748 case Intrinsic::riscv_vlseg7ff:
1749 case Intrinsic::riscv_vlseg8ff:
1750 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1751 /*IsStore*/ false,
1752 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1753 case Intrinsic::riscv_vlseg2_mask:
1754 case Intrinsic::riscv_vlseg3_mask:
1755 case Intrinsic::riscv_vlseg4_mask:
1756 case Intrinsic::riscv_vlseg5_mask:
1757 case Intrinsic::riscv_vlseg6_mask:
1758 case Intrinsic::riscv_vlseg7_mask:
1759 case Intrinsic::riscv_vlseg8_mask:
1760 case Intrinsic::riscv_vlseg2ff_mask:
1761 case Intrinsic::riscv_vlseg3ff_mask:
1762 case Intrinsic::riscv_vlseg4ff_mask:
1763 case Intrinsic::riscv_vlseg5ff_mask:
1764 case Intrinsic::riscv_vlseg6ff_mask:
1765 case Intrinsic::riscv_vlseg7ff_mask:
1766 case Intrinsic::riscv_vlseg8ff_mask:
1767 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1768 /*IsStore*/ false,
1769 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1770 case Intrinsic::riscv_vlsseg2:
1771 case Intrinsic::riscv_vlsseg3:
1772 case Intrinsic::riscv_vlsseg4:
1773 case Intrinsic::riscv_vlsseg5:
1774 case Intrinsic::riscv_vlsseg6:
1775 case Intrinsic::riscv_vlsseg7:
1776 case Intrinsic::riscv_vlsseg8:
1777 case Intrinsic::riscv_vloxseg2:
1778 case Intrinsic::riscv_vloxseg3:
1779 case Intrinsic::riscv_vloxseg4:
1780 case Intrinsic::riscv_vloxseg5:
1781 case Intrinsic::riscv_vloxseg6:
1782 case Intrinsic::riscv_vloxseg7:
1783 case Intrinsic::riscv_vloxseg8:
1784 case Intrinsic::riscv_vluxseg2:
1785 case Intrinsic::riscv_vluxseg3:
1786 case Intrinsic::riscv_vluxseg4:
1787 case Intrinsic::riscv_vluxseg5:
1788 case Intrinsic::riscv_vluxseg6:
1789 case Intrinsic::riscv_vluxseg7:
1790 case Intrinsic::riscv_vluxseg8:
1791 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1792 /*IsStore*/ false,
1793 /*IsUnitStrided*/ false);
1794 case Intrinsic::riscv_vlsseg2_mask:
1795 case Intrinsic::riscv_vlsseg3_mask:
1796 case Intrinsic::riscv_vlsseg4_mask:
1797 case Intrinsic::riscv_vlsseg5_mask:
1798 case Intrinsic::riscv_vlsseg6_mask:
1799 case Intrinsic::riscv_vlsseg7_mask:
1800 case Intrinsic::riscv_vlsseg8_mask:
1801 case Intrinsic::riscv_vloxseg2_mask:
1802 case Intrinsic::riscv_vloxseg3_mask:
1803 case Intrinsic::riscv_vloxseg4_mask:
1804 case Intrinsic::riscv_vloxseg5_mask:
1805 case Intrinsic::riscv_vloxseg6_mask:
1806 case Intrinsic::riscv_vloxseg7_mask:
1807 case Intrinsic::riscv_vloxseg8_mask:
1808 case Intrinsic::riscv_vluxseg2_mask:
1809 case Intrinsic::riscv_vluxseg3_mask:
1810 case Intrinsic::riscv_vluxseg4_mask:
1811 case Intrinsic::riscv_vluxseg5_mask:
1812 case Intrinsic::riscv_vluxseg6_mask:
1813 case Intrinsic::riscv_vluxseg7_mask:
1814 case Intrinsic::riscv_vluxseg8_mask:
1815 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1816 /*IsStore*/ false,
1817 /*IsUnitStrided*/ false);
1818 case Intrinsic::riscv_vsseg2:
1819 case Intrinsic::riscv_vsseg3:
1820 case Intrinsic::riscv_vsseg4:
1821 case Intrinsic::riscv_vsseg5:
1822 case Intrinsic::riscv_vsseg6:
1823 case Intrinsic::riscv_vsseg7:
1824 case Intrinsic::riscv_vsseg8:
1825 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1826 /*IsStore*/ true,
1827 /*IsUnitStrided*/ false);
1828 case Intrinsic::riscv_vsseg2_mask:
1829 case Intrinsic::riscv_vsseg3_mask:
1830 case Intrinsic::riscv_vsseg4_mask:
1831 case Intrinsic::riscv_vsseg5_mask:
1832 case Intrinsic::riscv_vsseg6_mask:
1833 case Intrinsic::riscv_vsseg7_mask:
1834 case Intrinsic::riscv_vsseg8_mask:
1835 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1836 /*IsStore*/ true,
1837 /*IsUnitStrided*/ false);
1838 case Intrinsic::riscv_vssseg2:
1839 case Intrinsic::riscv_vssseg3:
1840 case Intrinsic::riscv_vssseg4:
1841 case Intrinsic::riscv_vssseg5:
1842 case Intrinsic::riscv_vssseg6:
1843 case Intrinsic::riscv_vssseg7:
1844 case Intrinsic::riscv_vssseg8:
1845 case Intrinsic::riscv_vsoxseg2:
1846 case Intrinsic::riscv_vsoxseg3:
1847 case Intrinsic::riscv_vsoxseg4:
1848 case Intrinsic::riscv_vsoxseg5:
1849 case Intrinsic::riscv_vsoxseg6:
1850 case Intrinsic::riscv_vsoxseg7:
1851 case Intrinsic::riscv_vsoxseg8:
1852 case Intrinsic::riscv_vsuxseg2:
1853 case Intrinsic::riscv_vsuxseg3:
1854 case Intrinsic::riscv_vsuxseg4:
1855 case Intrinsic::riscv_vsuxseg5:
1856 case Intrinsic::riscv_vsuxseg6:
1857 case Intrinsic::riscv_vsuxseg7:
1858 case Intrinsic::riscv_vsuxseg8:
1859 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1860 /*IsStore*/ true,
1861 /*IsUnitStrided*/ false);
1862 case Intrinsic::riscv_vssseg2_mask:
1863 case Intrinsic::riscv_vssseg3_mask:
1864 case Intrinsic::riscv_vssseg4_mask:
1865 case Intrinsic::riscv_vssseg5_mask:
1866 case Intrinsic::riscv_vssseg6_mask:
1867 case Intrinsic::riscv_vssseg7_mask:
1868 case Intrinsic::riscv_vssseg8_mask:
1869 case Intrinsic::riscv_vsoxseg2_mask:
1870 case Intrinsic::riscv_vsoxseg3_mask:
1871 case Intrinsic::riscv_vsoxseg4_mask:
1872 case Intrinsic::riscv_vsoxseg5_mask:
1873 case Intrinsic::riscv_vsoxseg6_mask:
1874 case Intrinsic::riscv_vsoxseg7_mask:
1875 case Intrinsic::riscv_vsoxseg8_mask:
1876 case Intrinsic::riscv_vsuxseg2_mask:
1877 case Intrinsic::riscv_vsuxseg3_mask:
1878 case Intrinsic::riscv_vsuxseg4_mask:
1879 case Intrinsic::riscv_vsuxseg5_mask:
1880 case Intrinsic::riscv_vsuxseg6_mask:
1881 case Intrinsic::riscv_vsuxseg7_mask:
1882 case Intrinsic::riscv_vsuxseg8_mask:
1883 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1884 /*IsStore*/ true,
1885 /*IsUnitStrided*/ false);
1886 }
1887}
1888
1890 const AddrMode &AM, Type *Ty,
1891 unsigned AS,
1892 Instruction *I) const {
1893 // No global is ever allowed as a base.
1894 if (AM.BaseGV)
1895 return false;
1896
1897 // None of our addressing modes allows a scalable offset
1898 if (AM.ScalableOffset)
1899 return false;
1900
1901 // RVV instructions only support register addressing.
1902 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1903 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1904
1905 // Require a 12-bit signed offset.
1906 if (!isInt<12>(AM.BaseOffs))
1907 return false;
1908
1909 switch (AM.Scale) {
1910 case 0: // "r+i" or just "i", depending on HasBaseReg.
1911 break;
1912 case 1:
1913 if (!AM.HasBaseReg) // allow "r+i".
1914 break;
1915 return false; // disallow "r+r" or "r+r+i".
1916 default:
1917 return false;
1918 }
1919
1920 return true;
1921}
1922
1924 return isInt<12>(Imm);
1925}
1926
1928 return isInt<12>(Imm);
1929}
1930
1931// On RV32, 64-bit integers are split into their high and low parts and held
1932// in two different registers, so the trunc is free since the low register can
1933// just be used.
1934// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1935// isTruncateFree?
1937 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1938 return false;
1939 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1940 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1941 return (SrcBits == 64 && DestBits == 32);
1942}
1943
1945 // We consider i64->i32 free on RV64 since we have good selection of W
1946 // instructions that make promoting operations back to i64 free in many cases.
1947 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1948 !DstVT.isInteger())
1949 return false;
1950 unsigned SrcBits = SrcVT.getSizeInBits();
1951 unsigned DestBits = DstVT.getSizeInBits();
1952 return (SrcBits == 64 && DestBits == 32);
1953}
1954
1956 EVT SrcVT = Val.getValueType();
1957 // free truncate from vnsrl and vnsra
1958 if (Subtarget.hasVInstructions() &&
1959 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1960 SrcVT.isVector() && VT2.isVector()) {
1961 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1962 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1963 if (SrcBits == DestBits * 2) {
1964 return true;
1965 }
1966 }
1967 return TargetLowering::isTruncateFree(Val, VT2);
1968}
1969
1971 // Zexts are free if they can be combined with a load.
1972 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1973 // poorly with type legalization of compares preferring sext.
1974 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1975 EVT MemVT = LD->getMemoryVT();
1976 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1977 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1978 LD->getExtensionType() == ISD::ZEXTLOAD))
1979 return true;
1980 }
1981
1982 return TargetLowering::isZExtFree(Val, VT2);
1983}
1984
1986 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1987}
1988
1990 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1991}
1992
1994 return Subtarget.hasStdExtZbb() ||
1995 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1996}
1997
1999 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 const Instruction &AndI) const {
2005 // We expect to be able to match a bit extraction instruction if the Zbs
2006 // extension is supported and the mask is a power of two. However, we
2007 // conservatively return false if the mask would fit in an ANDI instruction,
2008 // on the basis that it's possible the sinking+duplication of the AND in
2009 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2010 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2011 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2012 return false;
2013 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2014 if (!Mask)
2015 return false;
2016 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2017}
2018
2020 EVT VT = Y.getValueType();
2021
2022 // FIXME: Support vectors once we have tests.
2023 if (VT.isVector())
2024 return false;
2025
2026 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2027 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2028}
2029
2031 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2032 if (Subtarget.hasStdExtZbs())
2033 return X.getValueType().isScalarInteger();
2034 auto *C = dyn_cast<ConstantSDNode>(Y);
2035 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2036 if (Subtarget.hasVendorXTHeadBs())
2037 return C != nullptr;
2038 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2039 return C && C->getAPIntValue().ule(10);
2040}
2041
2043 EVT VT) const {
2044 // Only enable for rvv.
2045 if (!VT.isVector() || !Subtarget.hasVInstructions())
2046 return false;
2047
2048 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2049 return false;
2050
2051 return true;
2052}
2053
2055 Type *Ty) const {
2056 assert(Ty->isIntegerTy());
2057
2058 unsigned BitSize = Ty->getIntegerBitWidth();
2059 if (BitSize > Subtarget.getXLen())
2060 return false;
2061
2062 // Fast path, assume 32-bit immediates are cheap.
2063 int64_t Val = Imm.getSExtValue();
2064 if (isInt<32>(Val))
2065 return true;
2066
2067 // A constant pool entry may be more aligned thant he load we're trying to
2068 // replace. If we don't support unaligned scalar mem, prefer the constant
2069 // pool.
2070 // TODO: Can the caller pass down the alignment?
2071 if (!Subtarget.enableUnalignedScalarMem())
2072 return true;
2073
2074 // Prefer to keep the load if it would require many instructions.
2075 // This uses the same threshold we use for constant pools but doesn't
2076 // check useConstantPoolForLargeInts.
2077 // TODO: Should we keep the load only when we're definitely going to emit a
2078 // constant pool?
2079
2081 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2082}
2083
2087 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2088 SelectionDAG &DAG) const {
2089 // One interesting pattern that we'd want to form is 'bit extract':
2090 // ((1 >> Y) & 1) ==/!= 0
2091 // But we also need to be careful not to try to reverse that fold.
2092
2093 // Is this '((1 >> Y) & 1)'?
2094 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2095 return false; // Keep the 'bit extract' pattern.
2096
2097 // Will this be '((1 >> Y) & 1)' after the transform?
2098 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2099 return true; // Do form the 'bit extract' pattern.
2100
2101 // If 'X' is a constant, and we transform, then we will immediately
2102 // try to undo the fold, thus causing endless combine loop.
2103 // So only do the transform if X is not a constant. This matches the default
2104 // implementation of this function.
2105 return !XC;
2106}
2107
2109 unsigned Opc = VecOp.getOpcode();
2110
2111 // Assume target opcodes can't be scalarized.
2112 // TODO - do we have any exceptions?
2113 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2114 return false;
2115
2116 // If the vector op is not supported, try to convert to scalar.
2117 EVT VecVT = VecOp.getValueType();
2118 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2119 return true;
2120
2121 // If the vector op is supported, but the scalar op is not, the transform may
2122 // not be worthwhile.
2123 // Permit a vector binary operation can be converted to scalar binary
2124 // operation which is custom lowered with illegal type.
2125 EVT ScalarVT = VecVT.getScalarType();
2126 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2127 isOperationCustom(Opc, ScalarVT);
2128}
2129
2131 const GlobalAddressSDNode *GA) const {
2132 // In order to maximise the opportunity for common subexpression elimination,
2133 // keep a separate ADD node for the global address offset instead of folding
2134 // it in the global address node. Later peephole optimisations may choose to
2135 // fold it back in when profitable.
2136 return false;
2137}
2138
2139// Returns 0-31 if the fli instruction is available for the type and this is
2140// legal FP immediate for the type. Returns -1 otherwise.
2142 if (!Subtarget.hasStdExtZfa())
2143 return -1;
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return -1;
2157
2158 return RISCVLoadFPImm::getLoadFPImm(Imm);
2159}
2160
2162 bool ForCodeSize) const {
2163 bool IsLegalVT = false;
2164 if (VT == MVT::f16)
2165 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2166 else if (VT == MVT::f32)
2167 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2168 else if (VT == MVT::f64)
2169 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2170 else if (VT == MVT::bf16)
2171 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2172
2173 if (!IsLegalVT)
2174 return false;
2175
2176 if (getLegalZfaFPImm(Imm, VT) >= 0)
2177 return true;
2178
2179 // Cannot create a 64 bit floating-point immediate value for rv32.
2180 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2181 // td can handle +0.0 or -0.0 already.
2182 // -0.0 can be created by fmv + fneg.
2183 return Imm.isZero();
2184 }
2185
2186 // Special case: fmv + fneg
2187 if (Imm.isNegZero())
2188 return true;
2189
2190 // Building an integer and then converting requires a fmv at the end of
2191 // the integer sequence. The fmv is not required for Zfinx.
2192 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2193 const int Cost =
2194 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2195 Subtarget.getXLen(), Subtarget);
2196 return Cost <= FPImmCost;
2197}
2198
2199// TODO: This is very conservative.
2201 unsigned Index) const {
2203 return false;
2204
2205 // Only support extracting a fixed from a fixed vector for now.
2206 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2207 return false;
2208
2209 EVT EltVT = ResVT.getVectorElementType();
2210 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2211
2212 // The smallest type we can slide is i8.
2213 // TODO: We can extract index 0 from a mask vector without a slide.
2214 if (EltVT == MVT::i1)
2215 return false;
2216
2217 unsigned ResElts = ResVT.getVectorNumElements();
2218 unsigned SrcElts = SrcVT.getVectorNumElements();
2219
2220 unsigned MinVLen = Subtarget.getRealMinVLen();
2221 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2222
2223 // If we're extracting only data from the first VLEN bits of the source
2224 // then we can always do this with an m1 vslidedown.vx. Restricting the
2225 // Index ensures we can use a vslidedown.vi.
2226 // TODO: We can generalize this when the exact VLEN is known.
2227 if (Index + ResElts <= MinVLMAX && Index < 31)
2228 return true;
2229
2230 // Convervatively only handle extracting half of a vector.
2231 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2232 // the upper half of a vector until we have more test coverage.
2233 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2234 // a cheap extract. However, this case is important in practice for
2235 // shuffled extracts of longer vectors. How resolve?
2236 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2237}
2238
2241 EVT VT) const {
2242 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2243 // We might still end up using a GPR but that will be decided based on ABI.
2244 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2245 !Subtarget.hasStdExtZfhminOrZhinxmin())
2246 return MVT::f32;
2247
2249
2250 return PartVT;
2251}
2252
2253unsigned
2255 std::optional<MVT> RegisterVT) const {
2256 // Pair inline assembly operand
2257 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2258 *RegisterVT == MVT::Untyped)
2259 return 1;
2260
2261 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2262}
2263
2266 EVT VT) const {
2267 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2268 // We might still end up using a GPR but that will be decided based on ABI.
2269 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2270 !Subtarget.hasStdExtZfhminOrZhinxmin())
2271 return 1;
2272
2274}
2275
2277 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2278 unsigned &NumIntermediates, MVT &RegisterVT) const {
2280 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2281
2282 return NumRegs;
2283}
2284
2285// Changes the condition code and swaps operands if necessary, so the SetCC
2286// operation matches one of the comparisons supported directly by branches
2287// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2288// with 1/-1.
2289static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2290 ISD::CondCode &CC, SelectionDAG &DAG) {
2291 // If this is a single bit test that can't be handled by ANDI, shift the
2292 // bit to be tested to the MSB and perform a signed compare with 0.
2293 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2294 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2295 isa<ConstantSDNode>(LHS.getOperand(1))) {
2296 uint64_t Mask = LHS.getConstantOperandVal(1);
2297 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2298 unsigned ShAmt = 0;
2299 if (isPowerOf2_64(Mask)) {
2301 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2302 } else {
2303 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2304 }
2305
2306 LHS = LHS.getOperand(0);
2307 if (ShAmt != 0)
2308 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2309 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2310 return;
2311 }
2312 }
2313
2314 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2315 int64_t C = RHSC->getSExtValue();
2316 switch (CC) {
2317 default: break;
2318 case ISD::SETGT:
2319 // Convert X > -1 to X >= 0.
2320 if (C == -1) {
2321 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2322 CC = ISD::SETGE;
2323 return;
2324 }
2325 break;
2326 case ISD::SETLT:
2327 // Convert X < 1 to 0 >= X.
2328 if (C == 1) {
2329 RHS = LHS;
2330 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2331 CC = ISD::SETGE;
2332 return;
2333 }
2334 break;
2335 }
2336 }
2337
2338 switch (CC) {
2339 default:
2340 break;
2341 case ISD::SETGT:
2342 case ISD::SETLE:
2343 case ISD::SETUGT:
2344 case ISD::SETULE:
2346 std::swap(LHS, RHS);
2347 break;
2348 }
2349}
2350
2352 if (VT.isRISCVVectorTuple()) {
2353 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2354 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2355 return RISCVII::LMUL_F8;
2356 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2357 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2358 return RISCVII::LMUL_F4;
2359 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2360 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2361 return RISCVII::LMUL_F2;
2362 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2363 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2364 return RISCVII::LMUL_1;
2365 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2366 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2367 return RISCVII::LMUL_2;
2368 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2369 return RISCVII::LMUL_4;
2370 llvm_unreachable("Invalid vector tuple type LMUL.");
2371 }
2372
2373 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2374 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2375 if (VT.getVectorElementType() == MVT::i1)
2376 KnownSize *= 8;
2377
2378 switch (KnownSize) {
2379 default:
2380 llvm_unreachable("Invalid LMUL.");
2381 case 8:
2383 case 16:
2385 case 32:
2387 case 64:
2389 case 128:
2391 case 256:
2393 case 512:
2395 }
2396}
2397
2399 switch (LMul) {
2400 default:
2401 llvm_unreachable("Invalid LMUL.");
2406 return RISCV::VRRegClassID;
2408 return RISCV::VRM2RegClassID;
2410 return RISCV::VRM4RegClassID;
2412 return RISCV::VRM8RegClassID;
2413 }
2414}
2415
2416unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2417 RISCVII::VLMUL LMUL = getLMUL(VT);
2418 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2419 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2420 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2421 LMUL == RISCVII::VLMUL::LMUL_1) {
2422 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm1_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2427 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm2_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2432 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm4_0 + Index;
2435 }
2436 llvm_unreachable("Invalid vector type.");
2437}
2438
2440 if (VT.isRISCVVectorTuple()) {
2441 unsigned NF = VT.getRISCVVectorTupleNumFields();
2442 unsigned RegsPerField =
2443 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2444 (NF * RISCV::RVVBitsPerBlock));
2445 switch (RegsPerField) {
2446 case 1:
2447 if (NF == 2)
2448 return RISCV::VRN2M1RegClassID;
2449 if (NF == 3)
2450 return RISCV::VRN3M1RegClassID;
2451 if (NF == 4)
2452 return RISCV::VRN4M1RegClassID;
2453 if (NF == 5)
2454 return RISCV::VRN5M1RegClassID;
2455 if (NF == 6)
2456 return RISCV::VRN6M1RegClassID;
2457 if (NF == 7)
2458 return RISCV::VRN7M1RegClassID;
2459 if (NF == 8)
2460 return RISCV::VRN8M1RegClassID;
2461 break;
2462 case 2:
2463 if (NF == 2)
2464 return RISCV::VRN2M2RegClassID;
2465 if (NF == 3)
2466 return RISCV::VRN3M2RegClassID;
2467 if (NF == 4)
2468 return RISCV::VRN4M2RegClassID;
2469 break;
2470 case 4:
2471 assert(NF == 2);
2472 return RISCV::VRN2M4RegClassID;
2473 default:
2474 break;
2475 }
2476 llvm_unreachable("Invalid vector tuple type RegClass.");
2477 }
2478
2479 if (VT.getVectorElementType() == MVT::i1)
2480 return RISCV::VRRegClassID;
2481 return getRegClassIDForLMUL(getLMUL(VT));
2482}
2483
2484// Attempt to decompose a subvector insert/extract between VecVT and
2485// SubVecVT via subregister indices. Returns the subregister index that
2486// can perform the subvector insert/extract with the given element index, as
2487// well as the index corresponding to any leftover subvectors that must be
2488// further inserted/extracted within the register class for SubVecVT.
2489std::pair<unsigned, unsigned>
2491 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2492 const RISCVRegisterInfo *TRI) {
2493 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2494 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2495 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2496 "Register classes not ordered");
2497 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2498 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2499
2500 // If VecVT is a vector tuple type, either it's the tuple type with same
2501 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2502 if (VecVT.isRISCVVectorTuple()) {
2503 if (VecRegClassID == SubRegClassID)
2504 return {RISCV::NoSubRegister, 0};
2505
2506 assert(SubVecVT.isScalableVector() &&
2507 "Only allow scalable vector subvector.");
2508 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2509 "Invalid vector tuple insert/extract for vector and subvector with "
2510 "different LMUL.");
2511 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2512 }
2513
2514 // Try to compose a subregister index that takes us from the incoming
2515 // LMUL>1 register class down to the outgoing one. At each step we half
2516 // the LMUL:
2517 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2518 // Note that this is not guaranteed to find a subregister index, such as
2519 // when we are extracting from one VR type to another.
2520 unsigned SubRegIdx = RISCV::NoSubRegister;
2521 for (const unsigned RCID :
2522 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2523 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2524 VecVT = VecVT.getHalfNumVectorElementsVT();
2525 bool IsHi =
2526 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2527 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2528 getSubregIndexByMVT(VecVT, IsHi));
2529 if (IsHi)
2530 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2531 }
2532 return {SubRegIdx, InsertExtractIdx};
2533}
2534
2535// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2536// stores for those types.
2537bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2538 return !Subtarget.useRVVForFixedLengthVectors() ||
2539 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2540}
2541
2543 if (!ScalarTy.isSimple())
2544 return false;
2545 switch (ScalarTy.getSimpleVT().SimpleTy) {
2546 case MVT::iPTR:
2547 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2548 case MVT::i8:
2549 case MVT::i16:
2550 case MVT::i32:
2551 return true;
2552 case MVT::i64:
2553 return Subtarget.hasVInstructionsI64();
2554 case MVT::f16:
2555 return Subtarget.hasVInstructionsF16Minimal();
2556 case MVT::bf16:
2557 return Subtarget.hasVInstructionsBF16Minimal();
2558 case MVT::f32:
2559 return Subtarget.hasVInstructionsF32();
2560 case MVT::f64:
2561 return Subtarget.hasVInstructionsF64();
2562 default:
2563 return false;
2564 }
2565}
2566
2567
2568unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2569 return NumRepeatedDivisors;
2570}
2571
2573 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2574 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2575 "Unexpected opcode");
2576 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2577 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2579 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2580 if (!II)
2581 return SDValue();
2582 return Op.getOperand(II->VLOperand + 1 + HasChain);
2583}
2584
2586 const RISCVSubtarget &Subtarget) {
2587 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2588 if (!Subtarget.useRVVForFixedLengthVectors())
2589 return false;
2590
2591 // We only support a set of vector types with a consistent maximum fixed size
2592 // across all supported vector element types to avoid legalization issues.
2593 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2594 // fixed-length vector type we support is 1024 bytes.
2595 if (VT.getFixedSizeInBits() > 1024 * 8)
2596 return false;
2597
2598 unsigned MinVLen = Subtarget.getRealMinVLen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601
2602 // Don't use RVV for vectors we cannot scalarize if required.
2603 switch (EltVT.SimpleTy) {
2604 // i1 is supported but has different rules.
2605 default:
2606 return false;
2607 case MVT::i1:
2608 // Masks can only use a single register.
2609 if (VT.getVectorNumElements() > MinVLen)
2610 return false;
2611 MinVLen /= 8;
2612 break;
2613 case MVT::i8:
2614 case MVT::i16:
2615 case MVT::i32:
2616 break;
2617 case MVT::i64:
2618 if (!Subtarget.hasVInstructionsI64())
2619 return false;
2620 break;
2621 case MVT::f16:
2622 if (!Subtarget.hasVInstructionsF16Minimal())
2623 return false;
2624 break;
2625 case MVT::bf16:
2626 if (!Subtarget.hasVInstructionsBF16Minimal())
2627 return false;
2628 break;
2629 case MVT::f32:
2630 if (!Subtarget.hasVInstructionsF32())
2631 return false;
2632 break;
2633 case MVT::f64:
2634 if (!Subtarget.hasVInstructionsF64())
2635 return false;
2636 break;
2637 }
2638
2639 // Reject elements larger than ELEN.
2640 if (EltVT.getSizeInBits() > Subtarget.getELen())
2641 return false;
2642
2643 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2644 // Don't use RVV for types that don't fit.
2645 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2646 return false;
2647
2648 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2649 // the base fixed length RVV support in place.
2650 if (!VT.isPow2VectorType())
2651 return false;
2652
2653 return true;
2654}
2655
2656bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2657 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2658}
2659
2660// Return the largest legal scalable vector type that matches VT's element type.
2662 const RISCVSubtarget &Subtarget) {
2663 // This may be called before legal types are setup.
2664 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2665 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2666 "Expected legal fixed length vector!");
2667
2668 unsigned MinVLen = Subtarget.getRealMinVLen();
2669 unsigned MaxELen = Subtarget.getELen();
2670
2671 MVT EltVT = VT.getVectorElementType();
2672 switch (EltVT.SimpleTy) {
2673 default:
2674 llvm_unreachable("unexpected element type for RVV container");
2675 case MVT::i1:
2676 case MVT::i8:
2677 case MVT::i16:
2678 case MVT::i32:
2679 case MVT::i64:
2680 case MVT::bf16:
2681 case MVT::f16:
2682 case MVT::f32:
2683 case MVT::f64: {
2684 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2685 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2686 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2687 unsigned NumElts =
2689 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2690 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2691 return MVT::getScalableVectorVT(EltVT, NumElts);
2692 }
2693 }
2694}
2695
2697 const RISCVSubtarget &Subtarget) {
2699 Subtarget);
2700}
2701
2703 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2704}
2705
2706// Grow V to consume an entire RVV register.
2708 const RISCVSubtarget &Subtarget) {
2709 assert(VT.isScalableVector() &&
2710 "Expected to convert into a scalable vector!");
2711 assert(V.getValueType().isFixedLengthVector() &&
2712 "Expected a fixed length vector operand!");
2713 SDLoc DL(V);
2714 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2715 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2716}
2717
2718// Shrink V so it's just big enough to maintain a VT's worth of data.
2720 const RISCVSubtarget &Subtarget) {
2722 "Expected to convert into a fixed length vector!");
2723 assert(V.getValueType().isScalableVector() &&
2724 "Expected a scalable vector operand!");
2725 SDLoc DL(V);
2726 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2727 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2728}
2729
2730/// Return the type of the mask type suitable for masking the provided
2731/// vector type. This is simply an i1 element type vector of the same
2732/// (possibly scalable) length.
2733static MVT getMaskTypeFor(MVT VecVT) {
2734 assert(VecVT.isVector());
2736 return MVT::getVectorVT(MVT::i1, EC);
2737}
2738
2739/// Creates an all ones mask suitable for masking a vector of type VecTy with
2740/// vector length VL. .
2741static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2742 SelectionDAG &DAG) {
2743 MVT MaskVT = getMaskTypeFor(VecVT);
2744 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2745}
2746
2747static std::pair<SDValue, SDValue>
2749 const RISCVSubtarget &Subtarget) {
2750 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2751 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2752 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2753 return {Mask, VL};
2754}
2755
2756static std::pair<SDValue, SDValue>
2757getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2758 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2761 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2762 return {Mask, VL};
2763}
2764
2765// Gets the two common "VL" operands: an all-ones mask and the vector length.
2766// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2767// the vector type that the fixed-length vector is contained in. Otherwise if
2768// VecVT is scalable, then ContainerVT should be the same as VecVT.
2769static std::pair<SDValue, SDValue>
2770getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2771 const RISCVSubtarget &Subtarget) {
2772 if (VecVT.isFixedLengthVector())
2773 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2774 Subtarget);
2775 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2776 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2777}
2778
2780 SelectionDAG &DAG) const {
2781 assert(VecVT.isScalableVector() && "Expected scalable vector");
2782 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2783 VecVT.getVectorElementCount());
2784}
2785
2786std::pair<unsigned, unsigned>
2788 const RISCVSubtarget &Subtarget) {
2789 assert(VecVT.isScalableVector() && "Expected scalable vector");
2790
2791 unsigned EltSize = VecVT.getScalarSizeInBits();
2792 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2793
2794 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2795 unsigned MaxVLMAX =
2796 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2797
2798 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2799 unsigned MinVLMAX =
2800 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2801
2802 return std::make_pair(MinVLMAX, MaxVLMAX);
2803}
2804
2805// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2806// of either is (currently) supported. This can get us into an infinite loop
2807// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2808// as a ..., etc.
2809// Until either (or both) of these can reliably lower any node, reporting that
2810// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2811// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2812// which is not desirable.
2814 EVT VT, unsigned DefinedValues) const {
2815 return false;
2816}
2817
2819 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2820 // implementation-defined.
2821 if (!VT.isVector())
2823 unsigned DLenFactor = Subtarget.getDLenFactor();
2824 unsigned Cost;
2825 if (VT.isScalableVector()) {
2826 unsigned LMul;
2827 bool Fractional;
2828 std::tie(LMul, Fractional) =
2830 if (Fractional)
2831 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2832 else
2833 Cost = (LMul * DLenFactor);
2834 } else {
2835 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2836 }
2837 return Cost;
2838}
2839
2840
2841/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2842/// is generally quadratic in the number of vreg implied by LMUL. Note that
2843/// operand (index and possibly mask) are handled separately.
2845 return getLMULCost(VT) * getLMULCost(VT);
2846}
2847
2848/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2849/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2855/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2856/// for the type VT. (This does not cover the vslide1up or vslide1down
2857/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2860 return getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2868 return getLMULCost(VT);
2869}
2870
2872 const RISCVSubtarget &Subtarget) {
2873 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2874 // bf16 conversions are always promoted to f32.
2875 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2876 Op.getValueType() == MVT::bf16) {
2877 bool IsStrict = Op->isStrictFPOpcode();
2878
2879 SDLoc DL(Op);
2880 if (IsStrict) {
2881 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2882 {Op.getOperand(0), Op.getOperand(1)});
2883 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2884 {Op.getValueType(), MVT::Other},
2885 {Val.getValue(1), Val.getValue(0),
2886 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2887 }
2888 return DAG.getNode(
2889 ISD::FP_ROUND, DL, Op.getValueType(),
2890 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2892 }
2893
2894 // Other operations are legal.
2895 return Op;
2896}
2897
2899 const RISCVSubtarget &Subtarget) {
2900 // RISC-V FP-to-int conversions saturate to the destination register size, but
2901 // don't produce 0 for nan. We can use a conversion instruction and fix the
2902 // nan case with a compare and a select.
2903 SDValue Src = Op.getOperand(0);
2904
2905 MVT DstVT = Op.getSimpleValueType();
2906 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2907
2908 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2909
2910 if (!DstVT.isVector()) {
2911 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2912 // the result.
2913 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2914 Src.getValueType() == MVT::bf16) {
2915 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2916 }
2917
2918 unsigned Opc;
2919 if (SatVT == DstVT)
2920 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2921 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2923 else
2924 return SDValue();
2925 // FIXME: Support other SatVTs by clamping before or after the conversion.
2926
2927 SDLoc DL(Op);
2928 SDValue FpToInt = DAG.getNode(
2929 Opc, DL, DstVT, Src,
2931
2932 if (Opc == RISCVISD::FCVT_WU_RV64)
2933 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2934
2935 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2936 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2938 }
2939
2940 // Vectors.
2941
2942 MVT DstEltVT = DstVT.getVectorElementType();
2943 MVT SrcVT = Src.getSimpleValueType();
2944 MVT SrcEltVT = SrcVT.getVectorElementType();
2945 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2946 unsigned DstEltSize = DstEltVT.getSizeInBits();
2947
2948 // Only handle saturating to the destination type.
2949 if (SatVT != DstEltVT)
2950 return SDValue();
2951
2952 MVT DstContainerVT = DstVT;
2953 MVT SrcContainerVT = SrcVT;
2954 if (DstVT.isFixedLengthVector()) {
2955 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2956 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2957 assert(DstContainerVT.getVectorElementCount() ==
2958 SrcContainerVT.getVectorElementCount() &&
2959 "Expected same element count");
2960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2961 }
2962
2963 SDLoc DL(Op);
2964
2965 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2966
2967 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2968 {Src, Src, DAG.getCondCode(ISD::SETNE),
2969 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2970
2971 // Need to widen by more than 1 step, promote the FP type, then do a widening
2972 // convert.
2973 if (DstEltSize > (2 * SrcEltSize)) {
2974 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2975 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2976 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2977 }
2978
2979 MVT CvtContainerVT = DstContainerVT;
2980 MVT CvtEltVT = DstEltVT;
2981 if (SrcEltSize > (2 * DstEltSize)) {
2982 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2983 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2984 }
2985
2986 unsigned RVVOpc =
2988 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2989
2990 while (CvtContainerVT != DstContainerVT) {
2991 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2992 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2993 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2994 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2996 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2997 }
2998
2999 SDValue SplatZero = DAG.getNode(
3000 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3001 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3002 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3003 Res, DAG.getUNDEF(DstContainerVT), VL);
3004
3005 if (DstVT.isFixedLengthVector())
3006 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3007
3008 return Res;
3009}
3010
3012 const RISCVSubtarget &Subtarget) {
3013 bool IsStrict = Op->isStrictFPOpcode();
3014 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3015
3016 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3017 // bf16 conversions are always promoted to f32.
3018 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3019 SrcVal.getValueType() == MVT::bf16) {
3020 SDLoc DL(Op);
3021 if (IsStrict) {
3022 SDValue Ext =
3023 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3024 {Op.getOperand(0), SrcVal});
3025 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3026 {Ext.getValue(1), Ext.getValue(0)});
3027 }
3028 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3029 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3030 }
3031
3032 // Other operations are legal.
3033 return Op;
3034}
3035
3037 switch (Opc) {
3038 case ISD::FROUNDEVEN:
3040 case ISD::VP_FROUNDEVEN:
3041 return RISCVFPRndMode::RNE;
3042 case ISD::FTRUNC:
3043 case ISD::STRICT_FTRUNC:
3044 case ISD::VP_FROUNDTOZERO:
3045 return RISCVFPRndMode::RTZ;
3046 case ISD::FFLOOR:
3047 case ISD::STRICT_FFLOOR:
3048 case ISD::VP_FFLOOR:
3049 return RISCVFPRndMode::RDN;
3050 case ISD::FCEIL:
3051 case ISD::STRICT_FCEIL:
3052 case ISD::VP_FCEIL:
3053 return RISCVFPRndMode::RUP;
3054 case ISD::FROUND:
3055 case ISD::STRICT_FROUND:
3056 case ISD::VP_FROUND:
3057 return RISCVFPRndMode::RMM;
3058 case ISD::FRINT:
3059 case ISD::VP_FRINT:
3060 return RISCVFPRndMode::DYN;
3061 }
3062
3064}
3065
3066// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3067// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3068// the integer domain and back. Taking care to avoid converting values that are
3069// nan or already correct.
3070static SDValue
3072 const RISCVSubtarget &Subtarget) {
3073 MVT VT = Op.getSimpleValueType();
3074 assert(VT.isVector() && "Unexpected type");
3075
3076 SDLoc DL(Op);
3077
3078 SDValue Src = Op.getOperand(0);
3079
3080 MVT ContainerVT = VT;
3081 if (VT.isFixedLengthVector()) {
3082 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3083 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3084 }
3085
3086 SDValue Mask, VL;
3087 if (Op->isVPOpcode()) {
3088 Mask = Op.getOperand(1);
3089 if (VT.isFixedLengthVector())
3090 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3091 Subtarget);
3092 VL = Op.getOperand(2);
3093 } else {
3094 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3095 }
3096
3097 // Freeze the source since we are increasing the number of uses.
3098 Src = DAG.getFreeze(Src);
3099
3100 // We do the conversion on the absolute value and fix the sign at the end.
3101 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3102
3103 // Determine the largest integer that can be represented exactly. This and
3104 // values larger than it don't have any fractional bits so don't need to
3105 // be converted.
3106 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3107 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3108 APFloat MaxVal = APFloat(FltSem);
3109 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3110 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3111 SDValue MaxValNode =
3112 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3113 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3114 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3115
3116 // If abs(Src) was larger than MaxVal or nan, keep it.
3117 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3118 Mask =
3119 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3120 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3121 Mask, Mask, VL});
3122
3123 // Truncate to integer and convert back to FP.
3124 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3125 MVT XLenVT = Subtarget.getXLenVT();
3126 SDValue Truncated;
3127
3128 switch (Op.getOpcode()) {
3129 default:
3130 llvm_unreachable("Unexpected opcode");
3131 case ISD::FRINT:
3132 case ISD::VP_FRINT:
3133 case ISD::FCEIL:
3134 case ISD::VP_FCEIL:
3135 case ISD::FFLOOR:
3136 case ISD::VP_FFLOOR:
3137 case ISD::FROUND:
3138 case ISD::FROUNDEVEN:
3139 case ISD::VP_FROUND:
3140 case ISD::VP_FROUNDEVEN:
3141 case ISD::VP_FROUNDTOZERO: {
3144 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3145 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3146 break;
3147 }
3148 case ISD::FTRUNC:
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3150 Mask, VL);
3151 break;
3152 case ISD::FNEARBYINT:
3153 case ISD::VP_FNEARBYINT:
3154 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3155 Mask, VL);
3156 break;
3157 }
3158
3159 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3160 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3161 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3162 Mask, VL);
3163
3164 // Restore the original sign so that -0.0 is preserved.
3165 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3166 Src, Src, Mask, VL);
3167
3168 if (!VT.isFixedLengthVector())
3169 return Truncated;
3170
3171 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3172}
3173
3174// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3175// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3176// qNan and coverting the new source to integer and back to FP.
3177static SDValue
3179 const RISCVSubtarget &Subtarget) {
3180 SDLoc DL(Op);
3181 MVT VT = Op.getSimpleValueType();
3182 SDValue Chain = Op.getOperand(0);
3183 SDValue Src = Op.getOperand(1);
3184
3185 MVT ContainerVT = VT;
3186 if (VT.isFixedLengthVector()) {
3187 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3188 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3189 }
3190
3191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3192
3193 // Freeze the source since we are increasing the number of uses.
3194 Src = DAG.getFreeze(Src);
3195
3196 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3197 MVT MaskVT = Mask.getSimpleValueType();
3199 DAG.getVTList(MaskVT, MVT::Other),
3200 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3201 DAG.getUNDEF(MaskVT), Mask, VL});
3202 Chain = Unorder.getValue(1);
3204 DAG.getVTList(ContainerVT, MVT::Other),
3205 {Chain, Src, Src, Src, Unorder, VL});
3206 Chain = Src.getValue(1);
3207
3208 // We do the conversion on the absolute value and fix the sign at the end.
3209 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3210
3211 // Determine the largest integer that can be represented exactly. This and
3212 // values larger than it don't have any fractional bits so don't need to
3213 // be converted.
3214 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3215 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3216 APFloat MaxVal = APFloat(FltSem);
3217 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3218 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3219 SDValue MaxValNode =
3220 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3221 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3222 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3223
3224 // If abs(Src) was larger than MaxVal or nan, keep it.
3225 Mask = DAG.getNode(
3226 RISCVISD::SETCC_VL, DL, MaskVT,
3227 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3228
3229 // Truncate to integer and convert back to FP.
3230 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3231 MVT XLenVT = Subtarget.getXLenVT();
3232 SDValue Truncated;
3233
3234 switch (Op.getOpcode()) {
3235 default:
3236 llvm_unreachable("Unexpected opcode");
3237 case ISD::STRICT_FCEIL:
3238 case ISD::STRICT_FFLOOR:
3239 case ISD::STRICT_FROUND:
3243 Truncated = DAG.getNode(
3244 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3245 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3246 break;
3247 }
3248 case ISD::STRICT_FTRUNC:
3249 Truncated =
3251 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3252 break;
3255 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3256 Mask, VL);
3257 break;
3258 }
3259 Chain = Truncated.getValue(1);
3260
3261 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3262 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3263 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3264 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3265 Truncated, Mask, VL);
3266 Chain = Truncated.getValue(1);
3267 }
3268
3269 // Restore the original sign so that -0.0 is preserved.
3270 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3271 Src, Src, Mask, VL);
3272
3273 if (VT.isFixedLengthVector())
3274 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3275 return DAG.getMergeValues({Truncated, Chain}, DL);
3276}
3277
3278static SDValue
3280 const RISCVSubtarget &Subtarget) {
3281 MVT VT = Op.getSimpleValueType();
3282 if (VT.isVector())
3283 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3284
3285 if (DAG.shouldOptForSize())
3286 return SDValue();
3287
3288 SDLoc DL(Op);
3289 SDValue Src = Op.getOperand(0);
3290
3291 // Create an integer the size of the mantissa with the MSB set. This and all
3292 // values larger than it don't have any fractional bits so don't need to be
3293 // converted.
3294 const fltSemantics &FltSem = VT.getFltSemantics();
3295 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3296 APFloat MaxVal = APFloat(FltSem);
3297 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3298 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3299 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3300
3302 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3303 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3304}
3305
3306// Expand vector LRINT and LLRINT by converting to the integer domain.
3308 const RISCVSubtarget &Subtarget) {
3309 MVT VT = Op.getSimpleValueType();
3310 assert(VT.isVector() && "Unexpected type");
3311
3312 SDLoc DL(Op);
3313 SDValue Src = Op.getOperand(0);
3314 MVT ContainerVT = VT;
3315
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3322 SDValue Truncated = DAG.getNode(
3323 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3325 VL);
3326
3327 if (!VT.isFixedLengthVector())
3328 return Truncated;
3329
3330 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3331}
3332
3333static SDValue
3335 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3336 SDValue Offset, SDValue Mask, SDValue VL,
3338 if (Passthru.isUndef())
3340 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3341 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3342 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3343}
3344
3345static SDValue
3346getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3347 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3348 SDValue VL,
3350 if (Passthru.isUndef())
3352 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3353 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3354 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3355}
3356
3357static MVT getLMUL1VT(MVT VT) {
3359 "Unexpected vector MVT");
3363}
3364
3368 int64_t Addend;
3369};
3370
3371static std::optional<APInt> getExactInteger(const APFloat &APF,
3373 // We will use a SINT_TO_FP to materialize this constant so we should use a
3374 // signed APSInt here.
3375 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3376 // We use an arbitrary rounding mode here. If a floating-point is an exact
3377 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3378 // the rounding mode changes the output value, then it is not an exact
3379 // integer.
3381 bool IsExact;
3382 // If it is out of signed integer range, it will return an invalid operation.
3383 // If it is not an exact integer, IsExact is false.
3384 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3386 !IsExact)
3387 return std::nullopt;
3388 return ValInt.extractBits(BitWidth, 0);
3389}
3390
3391// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3392// to the (non-zero) step S and start value X. This can be then lowered as the
3393// RVV sequence (VID * S) + X, for example.
3394// The step S is represented as an integer numerator divided by a positive
3395// denominator. Note that the implementation currently only identifies
3396// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3397// cannot detect 2/3, for example.
3398// Note that this method will also match potentially unappealing index
3399// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3400// determine whether this is worth generating code for.
3401//
3402// EltSizeInBits is the size of the type that the sequence will be calculated
3403// in, i.e. SEW for build_vectors or XLEN for address calculations.
3404static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3405 unsigned EltSizeInBits) {
3406 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3407 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3408 return std::nullopt;
3409 bool IsInteger = Op.getValueType().isInteger();
3410
3411 std::optional<unsigned> SeqStepDenom;
3412 std::optional<APInt> SeqStepNum;
3413 std::optional<APInt> SeqAddend;
3414 std::optional<std::pair<APInt, unsigned>> PrevElt;
3415 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3416
3417 // First extract the ops into a list of constant integer values. This may not
3418 // be possible for floats if they're not all representable as integers.
3420 const unsigned OpSize = Op.getScalarValueSizeInBits();
3421 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3422 if (Elt.isUndef()) {
3423 Elts[Idx] = std::nullopt;
3424 continue;
3425 }
3426 if (IsInteger) {
3427 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3428 } else {
3429 auto ExactInteger =
3430 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3431 if (!ExactInteger)
3432 return std::nullopt;
3433 Elts[Idx] = *ExactInteger;
3434 }
3435 }
3436
3437 for (auto [Idx, Elt] : enumerate(Elts)) {
3438 // Assume undef elements match the sequence; we just have to be careful
3439 // when interpolating across them.
3440 if (!Elt)
3441 continue;
3442
3443 if (PrevElt) {
3444 // Calculate the step since the last non-undef element, and ensure
3445 // it's consistent across the entire sequence.
3446 unsigned IdxDiff = Idx - PrevElt->second;
3447 APInt ValDiff = *Elt - PrevElt->first;
3448
3449 // A zero-value value difference means that we're somewhere in the middle
3450 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3451 // step change before evaluating the sequence.
3452 if (ValDiff == 0)
3453 continue;
3454
3455 int64_t Remainder = ValDiff.srem(IdxDiff);
3456 // Normalize the step if it's greater than 1.
3457 if (Remainder != ValDiff.getSExtValue()) {
3458 // The difference must cleanly divide the element span.
3459 if (Remainder != 0)
3460 return std::nullopt;
3461 ValDiff = ValDiff.sdiv(IdxDiff);
3462 IdxDiff = 1;
3463 }
3464
3465 if (!SeqStepNum)
3466 SeqStepNum = ValDiff;
3467 else if (ValDiff != SeqStepNum)
3468 return std::nullopt;
3469
3470 if (!SeqStepDenom)
3471 SeqStepDenom = IdxDiff;
3472 else if (IdxDiff != *SeqStepDenom)
3473 return std::nullopt;
3474 }
3475
3476 // Record this non-undef element for later.
3477 if (!PrevElt || PrevElt->first != *Elt)
3478 PrevElt = std::make_pair(*Elt, Idx);
3479 }
3480
3481 // We need to have logged a step for this to count as a legal index sequence.
3482 if (!SeqStepNum || !SeqStepDenom)
3483 return std::nullopt;
3484
3485 // Loop back through the sequence and validate elements we might have skipped
3486 // while waiting for a valid step. While doing this, log any sequence addend.
3487 for (auto [Idx, Elt] : enumerate(Elts)) {
3488 if (!Elt)
3489 continue;
3490 APInt ExpectedVal =
3491 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3492 *SeqStepNum)
3493 .sdiv(*SeqStepDenom);
3494
3495 APInt Addend = *Elt - ExpectedVal;
3496 if (!SeqAddend)
3497 SeqAddend = Addend;
3498 else if (Addend != SeqAddend)
3499 return std::nullopt;
3500 }
3501
3502 assert(SeqAddend && "Must have an addend if we have a step");
3503
3504 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3505 SeqAddend->getSExtValue()};
3506}
3507
3508// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3509// and lower it as a VRGATHER_VX_VL from the source vector.
3510static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3511 SelectionDAG &DAG,
3512 const RISCVSubtarget &Subtarget) {
3513 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3514 return SDValue();
3515 SDValue Vec = SplatVal.getOperand(0);
3516 // Don't perform this optimization for i1 vectors, or if the element types are
3517 // different
3518 // FIXME: Support i1 vectors, maybe by promoting to i8?
3519 MVT EltTy = VT.getVectorElementType();
3520 if (EltTy == MVT::i1 ||
3522 return SDValue();
3523 SDValue Idx = SplatVal.getOperand(1);
3524 // The index must be a legal type.
3525 if (Idx.getValueType() != Subtarget.getXLenVT())
3526 return SDValue();
3527
3528 // Check that Index lies within VT
3529 // TODO: Can we check if the Index is constant and known in-bounds?
3531 return SDValue();
3532
3533 MVT ContainerVT = VT;
3534 if (VT.isFixedLengthVector())
3535 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3536
3537 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3538 DAG.getUNDEF(ContainerVT), Vec,
3539 DAG.getVectorIdxConstant(0, DL));
3540
3541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3542
3543 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3544 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3545
3546 if (!VT.isFixedLengthVector())
3547 return Gather;
3548
3549 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3550}
3551
3552/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3553/// which constitute a large proportion of the elements. In such cases we can
3554/// splat a vector with the dominant element and make up the shortfall with
3555/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3556/// Note that this includes vectors of 2 elements by association. The
3557/// upper-most element is the "dominant" one, allowing us to use a splat to
3558/// "insert" the upper element, and an insert of the lower element at position
3559/// 0, which improves codegen.
3561 const RISCVSubtarget &Subtarget) {
3562 MVT VT = Op.getSimpleValueType();
3563 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3564
3565 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3566
3567 SDLoc DL(Op);
3568 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3569
3570 MVT XLenVT = Subtarget.getXLenVT();
3571 unsigned NumElts = Op.getNumOperands();
3572
3573 SDValue DominantValue;
3574 unsigned MostCommonCount = 0;
3575 DenseMap<SDValue, unsigned> ValueCounts;
3576 unsigned NumUndefElts =
3577 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3578
3579 // Track the number of scalar loads we know we'd be inserting, estimated as
3580 // any non-zero floating-point constant. Other kinds of element are either
3581 // already in registers or are materialized on demand. The threshold at which
3582 // a vector load is more desirable than several scalar materializion and
3583 // vector-insertion instructions is not known.
3584 unsigned NumScalarLoads = 0;
3585
3586 for (SDValue V : Op->op_values()) {
3587 if (V.isUndef())
3588 continue;
3589
3590 unsigned &Count = ValueCounts[V];
3591 if (0 == Count)
3592 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3593 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3594
3595 // Is this value dominant? In case of a tie, prefer the highest element as
3596 // it's cheaper to insert near the beginning of a vector than it is at the
3597 // end.
3598 if (++Count >= MostCommonCount) {
3599 DominantValue = V;
3600 MostCommonCount = Count;
3601 }
3602 }
3603
3604 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3605 unsigned NumDefElts = NumElts - NumUndefElts;
3606 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3607
3608 // Don't perform this optimization when optimizing for size, since
3609 // materializing elements and inserting them tends to cause code bloat.
3610 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3611 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3612 ((MostCommonCount > DominantValueCountThreshold) ||
3613 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3614 // Start by splatting the most common element.
3615 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3616
3617 DenseSet<SDValue> Processed{DominantValue};
3618
3619 // We can handle an insert into the last element (of a splat) via
3620 // v(f)slide1down. This is slightly better than the vslideup insert
3621 // lowering as it avoids the need for a vector group temporary. It
3622 // is also better than using vmerge.vx as it avoids the need to
3623 // materialize the mask in a vector register.
3624 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3625 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3626 LastOp != DominantValue) {
3627 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3628 auto OpCode =
3630 if (!VT.isFloatingPoint())
3631 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3632 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3633 LastOp, Mask, VL);
3634 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3635 Processed.insert(LastOp);
3636 }
3637
3638 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3639 for (const auto &OpIdx : enumerate(Op->ops())) {
3640 const SDValue &V = OpIdx.value();
3641 if (V.isUndef() || !Processed.insert(V).second)
3642 continue;
3643 if (ValueCounts[V] == 1) {
3644 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3645 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3646 } else {
3647 // Blend in all instances of this value using a VSELECT, using a
3648 // mask where each bit signals whether that element is the one
3649 // we're after.
3651 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3652 return DAG.getConstant(V == V1, DL, XLenVT);
3653 });
3654 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3655 DAG.getBuildVector(SelMaskTy, DL, Ops),
3656 DAG.getSplatBuildVector(VT, DL, V), Vec);
3657 }
3658 }
3659
3660 return Vec;
3661 }
3662
3663 return SDValue();
3664}
3665
3667 const RISCVSubtarget &Subtarget) {
3668 MVT VT = Op.getSimpleValueType();
3669 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3670
3671 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3672
3673 SDLoc DL(Op);
3674 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3675
3676 MVT XLenVT = Subtarget.getXLenVT();
3677 unsigned NumElts = Op.getNumOperands();
3678
3679 if (VT.getVectorElementType() == MVT::i1) {
3680 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3681 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3682 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3683 }
3684
3685 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3686 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3687 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3688 }
3689
3690 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3691 // scalar integer chunks whose bit-width depends on the number of mask
3692 // bits and XLEN.
3693 // First, determine the most appropriate scalar integer type to use. This
3694 // is at most XLenVT, but may be shrunk to a smaller vector element type
3695 // according to the size of the final vector - use i8 chunks rather than
3696 // XLenVT if we're producing a v8i1. This results in more consistent
3697 // codegen across RV32 and RV64.
3698 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3699 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3700 // If we have to use more than one INSERT_VECTOR_ELT then this
3701 // optimization is likely to increase code size; avoid peforming it in
3702 // such a case. We can use a load from a constant pool in this case.
3703 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3704 return SDValue();
3705 // Now we can create our integer vector type. Note that it may be larger
3706 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3707 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3708 MVT IntegerViaVecVT =
3709 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3710 IntegerViaVecElts);
3711
3712 uint64_t Bits = 0;
3713 unsigned BitPos = 0, IntegerEltIdx = 0;
3714 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3715
3716 for (unsigned I = 0; I < NumElts;) {
3717 SDValue V = Op.getOperand(I);
3718 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3719 Bits |= ((uint64_t)BitValue << BitPos);
3720 ++BitPos;
3721 ++I;
3722
3723 // Once we accumulate enough bits to fill our scalar type or process the
3724 // last element, insert into our vector and clear our accumulated data.
3725 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3726 if (NumViaIntegerBits <= 32)
3727 Bits = SignExtend64<32>(Bits);
3728 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3729 Elts[IntegerEltIdx] = Elt;
3730 Bits = 0;
3731 BitPos = 0;
3732 IntegerEltIdx++;
3733 }
3734 }
3735
3736 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3737
3738 if (NumElts < NumViaIntegerBits) {
3739 // If we're producing a smaller vector than our minimum legal integer
3740 // type, bitcast to the equivalent (known-legal) mask type, and extract
3741 // our final mask.
3742 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3743 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3744 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3745 DAG.getConstant(0, DL, XLenVT));
3746 } else {
3747 // Else we must have produced an integer type with the same size as the
3748 // mask type; bitcast for the final result.
3749 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3750 Vec = DAG.getBitcast(VT, Vec);
3751 }
3752
3753 return Vec;
3754 }
3755
3756 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3757 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3759 if (!VT.isFloatingPoint())
3760 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3761 Splat =
3762 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3763 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3764 }
3765
3766 // Try and match index sequences, which we can lower to the vid instruction
3767 // with optional modifications. An all-undef vector is matched by
3768 // getSplatValue, above.
3769 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3770 int64_t StepNumerator = SimpleVID->StepNumerator;
3771 unsigned StepDenominator = SimpleVID->StepDenominator;
3772 int64_t Addend = SimpleVID->Addend;
3773
3774 assert(StepNumerator != 0 && "Invalid step");
3775 bool Negate = false;
3776 int64_t SplatStepVal = StepNumerator;
3777 unsigned StepOpcode = ISD::MUL;
3778 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3779 // anyway as the shift of 63 won't fit in uimm5.
3780 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3781 isPowerOf2_64(std::abs(StepNumerator))) {
3782 Negate = StepNumerator < 0;
3783 StepOpcode = ISD::SHL;
3784 SplatStepVal = Log2_64(std::abs(StepNumerator));
3785 }
3786
3787 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3788 // threshold since it's the immediate value many RVV instructions accept.
3789 // There is no vmul.vi instruction so ensure multiply constant can fit in
3790 // a single addi instruction.
3791 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3792 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3793 isPowerOf2_32(StepDenominator) &&
3794 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3795 MVT VIDVT =
3797 MVT VIDContainerVT =
3798 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3799 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3800 // Convert right out of the scalable type so we can use standard ISD
3801 // nodes for the rest of the computation. If we used scalable types with
3802 // these, we'd lose the fixed-length vector info and generate worse
3803 // vsetvli code.
3804 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3805 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3806 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3807 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3808 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3809 }
3810 if (StepDenominator != 1) {
3811 SDValue SplatStep =
3812 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3813 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3814 }
3815 if (Addend != 0 || Negate) {
3816 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3817 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3818 VID);
3819 }
3820 if (VT.isFloatingPoint()) {
3821 // TODO: Use vfwcvt to reduce register pressure.
3822 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3823 }
3824 return VID;
3825 }
3826 }
3827
3828 // For very small build_vectors, use a single scalar insert of a constant.
3829 // TODO: Base this on constant rematerialization cost, not size.
3830 const unsigned EltBitSize = VT.getScalarSizeInBits();
3831 if (VT.getSizeInBits() <= 32 &&
3833 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3834 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3835 "Unexpected sequence type");
3836 // If we can use the original VL with the modified element type, this
3837 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3838 // be moved into InsertVSETVLI?
3839 unsigned ViaVecLen =
3840 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3841 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3842
3843 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3844 uint64_t SplatValue = 0;
3845 // Construct the amalgamated value at this larger vector type.
3846 for (const auto &OpIdx : enumerate(Op->op_values())) {
3847 const auto &SeqV = OpIdx.value();
3848 if (!SeqV.isUndef())
3849 SplatValue |=
3850 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3851 }
3852
3853 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3854 // achieve better constant materializion.
3855 // On RV32, we need to sign-extend to use getSignedConstant.
3856 if (ViaIntVT == MVT::i32)
3857 SplatValue = SignExtend64<32>(SplatValue);
3858
3859 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3860 DAG.getUNDEF(ViaVecVT),
3861 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3862 DAG.getVectorIdxConstant(0, DL));
3863 if (ViaVecLen != 1)
3865 MVT::getVectorVT(ViaIntVT, 1), Vec,
3866 DAG.getConstant(0, DL, XLenVT));
3867 return DAG.getBitcast(VT, Vec);
3868 }
3869
3870
3871 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3872 // when re-interpreted as a vector with a larger element type. For example,
3873 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3874 // could be instead splat as
3875 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3876 // TODO: This optimization could also work on non-constant splats, but it
3877 // would require bit-manipulation instructions to construct the splat value.
3878 SmallVector<SDValue> Sequence;
3879 const auto *BV = cast<BuildVectorSDNode>(Op);
3880 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3882 BV->getRepeatedSequence(Sequence) &&
3883 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3884 unsigned SeqLen = Sequence.size();
3885 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3886 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3887 ViaIntVT == MVT::i64) &&
3888 "Unexpected sequence type");
3889
3890 // If we can use the original VL with the modified element type, this
3891 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3892 // be moved into InsertVSETVLI?
3893 const unsigned RequiredVL = NumElts / SeqLen;
3894 const unsigned ViaVecLen =
3895 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3896 NumElts : RequiredVL;
3897 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3898
3899 unsigned EltIdx = 0;
3900 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3901 uint64_t SplatValue = 0;
3902 // Construct the amalgamated value which can be splatted as this larger
3903 // vector type.
3904 for (const auto &SeqV : Sequence) {
3905 if (!SeqV.isUndef())
3906 SplatValue |=
3907 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3908 EltIdx++;
3909 }
3910
3911 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3912 // achieve better constant materializion.
3913 // On RV32, we need to sign-extend to use getSignedConstant.
3914 if (ViaIntVT == MVT::i32)
3915 SplatValue = SignExtend64<32>(SplatValue);
3916
3917 // Since we can't introduce illegal i64 types at this stage, we can only
3918 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3919 // way we can use RVV instructions to splat.
3920 assert((ViaIntVT.bitsLE(XLenVT) ||
3921 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3922 "Unexpected bitcast sequence");
3923 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3924 SDValue ViaVL =
3925 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3926 MVT ViaContainerVT =
3927 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3928 SDValue Splat =
3929 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3930 DAG.getUNDEF(ViaContainerVT),
3931 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3932 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3933 if (ViaVecLen != RequiredVL)
3935 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3936 DAG.getConstant(0, DL, XLenVT));
3937 return DAG.getBitcast(VT, Splat);
3938 }
3939 }
3940
3941 // If the number of signbits allows, see if we can lower as a <N x i8>.
3942 // Our main goal here is to reduce LMUL (and thus work) required to
3943 // build the constant, but we will also narrow if the resulting
3944 // narrow vector is known to materialize cheaply.
3945 // TODO: We really should be costing the smaller vector. There are
3946 // profitable cases this misses.
3947 if (EltBitSize > 8 && VT.isInteger() &&
3948 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3949 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3950 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3951 DL, Op->ops());
3952 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3953 Source, DAG, Subtarget);
3954 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3955 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3956 }
3957
3958 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3959 return Res;
3960
3961 // For constant vectors, use generic constant pool lowering. Otherwise,
3962 // we'd have to materialize constants in GPRs just to move them into the
3963 // vector.
3964 return SDValue();
3965}
3966
3967static unsigned getPACKOpcode(unsigned DestBW,
3968 const RISCVSubtarget &Subtarget) {
3969 switch (DestBW) {
3970 default:
3971 llvm_unreachable("Unsupported pack size");
3972 case 16:
3973 return RISCV::PACKH;
3974 case 32:
3975 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3976 case 64:
3977 assert(Subtarget.is64Bit());
3978 return RISCV::PACK;
3979 }
3980}
3981
3982/// Double the element size of the build vector to reduce the number
3983/// of vslide1down in the build vector chain. In the worst case, this
3984/// trades three scalar operations for 1 vector operation. Scalar
3985/// operations are generally lower latency, and for out-of-order cores
3986/// we also benefit from additional parallelism.
3988 const RISCVSubtarget &Subtarget) {
3989 SDLoc DL(Op);
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992 MVT ElemVT = VT.getVectorElementType();
3993 if (!ElemVT.isInteger())
3994 return SDValue();
3995
3996 // TODO: Relax these architectural restrictions, possibly with costing
3997 // of the actual instructions required.
3998 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3999 return SDValue();
4000
4001 unsigned NumElts = VT.getVectorNumElements();
4002 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4003 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4004 NumElts % 2 != 0)
4005 return SDValue();
4006
4007 // Produce [B,A] packed into a type twice as wide. Note that all
4008 // scalars are XLenVT, possibly masked (see below).
4009 MVT XLenVT = Subtarget.getXLenVT();
4010 SDValue Mask = DAG.getConstant(
4011 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4012 auto pack = [&](SDValue A, SDValue B) {
4013 // Bias the scheduling of the inserted operations to near the
4014 // definition of the element - this tends to reduce register
4015 // pressure overall.
4016 SDLoc ElemDL(B);
4017 if (Subtarget.hasStdExtZbkb())
4018 // Note that we're relying on the high bits of the result being
4019 // don't care. For PACKW, the result is *sign* extended.
4020 return SDValue(
4021 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4022 ElemDL, XLenVT, A, B),
4023 0);
4024
4025 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4026 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4027 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4028 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4029 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4031 };
4032
4033 SmallVector<SDValue> NewOperands;
4034 NewOperands.reserve(NumElts / 2);
4035 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4036 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4037 assert(NumElts == NewOperands.size() * 2);
4038 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4039 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4040 return DAG.getNode(ISD::BITCAST, DL, VT,
4041 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4042}
4043
4045 const RISCVSubtarget &Subtarget) {
4046 MVT VT = Op.getSimpleValueType();
4047 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4048
4049 MVT EltVT = VT.getVectorElementType();
4050 MVT XLenVT = Subtarget.getXLenVT();
4051
4052 SDLoc DL(Op);
4053
4054 // Proper support for f16 requires Zvfh. bf16 always requires special
4055 // handling. We need to cast the scalar to integer and create an integer
4056 // build_vector.
4057 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4058 MVT IVT = VT.changeVectorElementType(MVT::i16);
4060 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4061 SDValue Elem = Op.getOperand(I);
4062 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4063 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4064 // Called by LegalizeDAG, we need to use XLenVT operations since we
4065 // can't create illegal types.
4066 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4067 // Manually constant fold so the integer build_vector can be lowered
4068 // better. Waiting for DAGCombine will be too late.
4069 APInt V =
4070 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4071 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4072 } else {
4073 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4074 }
4075 } else {
4076 // Called by scalar type legalizer, we can use i16.
4077 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4078 }
4079 }
4080 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4081 return DAG.getBitcast(VT, Res);
4082 }
4083
4084 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4086 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4087
4088 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4089
4090 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4091
4092 if (VT.getVectorElementType() == MVT::i1) {
4093 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4094 // vector type, we have a legal equivalently-sized i8 type, so we can use
4095 // that.
4096 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4097 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4098
4099 SDValue WideVec;
4100 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4101 // For a splat, perform a scalar truncate before creating the wider
4102 // vector.
4103 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4104 DAG.getConstant(1, DL, Splat.getValueType()));
4105 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4106 } else {
4107 SmallVector<SDValue, 8> Ops(Op->op_values());
4108 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4109 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4110 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4111 }
4112
4113 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4114 }
4115
4116 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4117 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4118 return Gather;
4119 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4121 if (!VT.isFloatingPoint())
4122 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4123 Splat =
4124 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4125 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4126 }
4127
4128 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4129 return Res;
4130
4131 // If we're compiling for an exact VLEN value, we can split our work per
4132 // register in the register group.
4133 if (const auto VLen = Subtarget.getRealVLen();
4134 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4135 MVT ElemVT = VT.getVectorElementType();
4136 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4137 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4138 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4139 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4140 assert(M1VT == getLMUL1VT(M1VT));
4141
4142 // The following semantically builds up a fixed length concat_vector
4143 // of the component build_vectors. We eagerly lower to scalable and
4144 // insert_subvector here to avoid DAG combining it back to a large
4145 // build_vector.
4146 SmallVector<SDValue> BuildVectorOps(Op->ops());
4147 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4148 SDValue Vec = DAG.getUNDEF(ContainerVT);
4149 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4150 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4151 SDValue SubBV =
4152 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4153 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4154 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4155 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4156 DAG.getVectorIdxConstant(InsertIdx, DL));
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159 }
4160
4161 // If we're about to resort to vslide1down (or stack usage), pack our
4162 // elements into the widest scalar type we can. This will force a VL/VTYPE
4163 // toggle, but reduces the critical path, the number of vslide1down ops
4164 // required, and possibly enables scalar folds of the values.
4165 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4166 return Res;
4167
4168 // For m1 vectors, if we have non-undef values in both halves of our vector,
4169 // split the vector into low and high halves, build them separately, then
4170 // use a vselect to combine them. For long vectors, this cuts the critical
4171 // path of the vslide1down sequence in half, and gives us an opportunity
4172 // to special case each half independently. Note that we don't change the
4173 // length of the sub-vectors here, so if both fallback to the generic
4174 // vslide1down path, we should be able to fold the vselect into the final
4175 // vslidedown (for the undef tail) for the first half w/ masking.
4176 unsigned NumElts = VT.getVectorNumElements();
4177 unsigned NumUndefElts =
4178 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4179 unsigned NumDefElts = NumElts - NumUndefElts;
4180 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4181 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4182 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4183 SmallVector<SDValue> MaskVals;
4184 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4185 SubVecAOps.reserve(NumElts);
4186 SubVecBOps.reserve(NumElts);
4187 for (unsigned i = 0; i < NumElts; i++) {
4188 SDValue Elem = Op->getOperand(i);
4189 if (i < NumElts / 2) {
4190 SubVecAOps.push_back(Elem);
4191 SubVecBOps.push_back(UndefElem);
4192 } else {
4193 SubVecAOps.push_back(UndefElem);
4194 SubVecBOps.push_back(Elem);
4195 }
4196 bool SelectMaskVal = (i < NumElts / 2);
4197 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4198 }
4199 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4200 MaskVals.size() == NumElts);
4201
4202 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4203 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4204 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4205 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4206 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4207 }
4208
4209 // Cap the cost at a value linear to the number of elements in the vector.
4210 // The default lowering is to use the stack. The vector store + scalar loads
4211 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4212 // being (at least) linear in LMUL. As a result, using the vslidedown
4213 // lowering for every element ends up being VL*LMUL..
4214 // TODO: Should we be directly costing the stack alternative? Doing so might
4215 // give us a more accurate upper bound.
4216 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4217
4218 // TODO: unify with TTI getSlideCost.
4219 InstructionCost PerSlideCost = 1;
4220 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4221 default: break;
4223 PerSlideCost = 2;
4224 break;
4226 PerSlideCost = 4;
4227 break;
4229 PerSlideCost = 8;
4230 break;
4231 }
4232
4233 // TODO: Should we be using the build instseq then cost + evaluate scheme
4234 // we use for integer constants here?
4235 unsigned UndefCount = 0;
4236 for (const SDValue &V : Op->ops()) {
4237 if (V.isUndef()) {
4238 UndefCount++;
4239 continue;
4240 }
4241 if (UndefCount) {
4242 LinearBudget -= PerSlideCost;
4243 UndefCount = 0;
4244 }
4245 LinearBudget -= PerSlideCost;
4246 }
4247 if (UndefCount) {
4248 LinearBudget -= PerSlideCost;
4249 }
4250
4251 if (LinearBudget < 0)
4252 return SDValue();
4253
4254 assert((!VT.isFloatingPoint() ||
4255 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4256 "Illegal type which will result in reserved encoding");
4257
4258 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4259
4260 SDValue Vec;
4261 UndefCount = 0;
4262 for (SDValue V : Op->ops()) {
4263 if (V.isUndef()) {
4264 UndefCount++;
4265 continue;
4266 }
4267
4268 // Start our sequence with a TA splat in the hopes that hardware is able to
4269 // recognize there's no dependency on the prior value of our temporary
4270 // register.
4271 if (!Vec) {
4272 Vec = DAG.getSplatVector(VT, DL, V);
4273 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4274 UndefCount = 0;
4275 continue;
4276 }
4277
4278 if (UndefCount) {
4279 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4280 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4281 Vec, Offset, Mask, VL, Policy);
4282 UndefCount = 0;
4283 }
4284 auto OpCode =
4286 if (!VT.isFloatingPoint())
4287 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4288 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4289 V, Mask, VL);
4290 }
4291 if (UndefCount) {
4292 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4293 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4294 Vec, Offset, Mask, VL, Policy);
4295 }
4296 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4297}
4298
4299static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4301 SelectionDAG &DAG) {
4302 if (!Passthru)
4303 Passthru = DAG.getUNDEF(VT);
4304 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4305 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4306 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4307 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4308 // node in order to try and match RVV vector/scalar instructions.
4309 if ((LoC >> 31) == HiC)
4310 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4311
4312 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4313 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4314 // vlmax vsetvli or vsetivli to change the VL.
4315 // FIXME: Support larger constants?
4316 // FIXME: Support non-constant VLs by saturating?
4317 if (LoC == HiC) {
4318 SDValue NewVL;
4319 if (isAllOnesConstant(VL) ||
4320 (isa<RegisterSDNode>(VL) &&
4321 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4322 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4323 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4324 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4325
4326 if (NewVL) {
4327 MVT InterVT =
4328 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4329 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4330 DAG.getUNDEF(InterVT), Lo, NewVL);
4331 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4332 }
4333 }
4334 }
4335
4336 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4337 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4338 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4339 Hi.getConstantOperandVal(1) == 31)
4340 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4341
4342 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4343 // even if it might be sign extended.
4344 if (Hi.isUndef())
4345 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4346
4347 // Fall back to a stack store and stride x0 vector load.
4348 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4349 Hi, VL);
4350}
4351
4352// Called by type legalization to handle splat of i64 on RV32.
4353// FIXME: We can optimize this when the type has sign or zero bits in one
4354// of the halves.
4355static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4356 SDValue Scalar, SDValue VL,
4357 SelectionDAG &DAG) {
4358 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4359 SDValue Lo, Hi;
4360 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4361 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4362}
4363
4364// This function lowers a splat of a scalar operand Splat with the vector
4365// length VL. It ensures the final sequence is type legal, which is useful when
4366// lowering a splat after type legalization.
4367static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4368 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4369 const RISCVSubtarget &Subtarget) {
4370 bool HasPassthru = Passthru && !Passthru.isUndef();
4371 if (!HasPassthru && !Passthru)
4372 Passthru = DAG.getUNDEF(VT);
4373
4374 MVT EltVT = VT.getVectorElementType();
4375 MVT XLenVT = Subtarget.getXLenVT();
4376
4377 if (VT.isFloatingPoint()) {
4378 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4379 EltVT == MVT::bf16) {
4380 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4381 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4382 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4383 else
4384 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4385 MVT IVT = VT.changeVectorElementType(MVT::i16);
4386 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4387 SDValue Splat =
4388 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4389 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4390 }
4391 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4392 }
4393
4394 // Simplest case is that the operand needs to be promoted to XLenVT.
4395 if (Scalar.getValueType().bitsLE(XLenVT)) {
4396 // If the operand is a constant, sign extend to increase our chances
4397 // of being able to use a .vi instruction. ANY_EXTEND would become a
4398 // a zero extend and the simm5 check in isel would fail.
4399 // FIXME: Should we ignore the upper bits in isel instead?
4400 unsigned ExtOpc =
4401 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4402 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4403 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4404 }
4405
4406 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4407 "Unexpected scalar for splat lowering!");
4408
4409 if (isOneConstant(VL) && isNullConstant(Scalar))
4410 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4411 DAG.getConstant(0, DL, XLenVT), VL);
4412
4413 // Otherwise use the more complicated splatting algorithm.
4414 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4415}
4416
4417// This function lowers an insert of a scalar operand Scalar into lane
4418// 0 of the vector regardless of the value of VL. The contents of the
4419// remaining lanes of the result vector are unspecified. VL is assumed
4420// to be non-zero.
4422 const SDLoc &DL, SelectionDAG &DAG,
4423 const RISCVSubtarget &Subtarget) {
4424 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4425
4426 const MVT XLenVT = Subtarget.getXLenVT();
4427 SDValue Passthru = DAG.getUNDEF(VT);
4428
4429 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4430 isNullConstant(Scalar.getOperand(1))) {
4431 SDValue ExtractedVal = Scalar.getOperand(0);
4432 // The element types must be the same.
4433 if (ExtractedVal.getValueType().getVectorElementType() ==
4434 VT.getVectorElementType()) {
4435 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4436 MVT ExtractedContainerVT = ExtractedVT;
4437 if (ExtractedContainerVT.isFixedLengthVector()) {
4438 ExtractedContainerVT = getContainerForFixedLengthVector(
4439 DAG, ExtractedContainerVT, Subtarget);
4440 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4441 ExtractedVal, DAG, Subtarget);
4442 }
4443 if (ExtractedContainerVT.bitsLE(VT))
4444 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4445 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4446 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4447 DAG.getVectorIdxConstant(0, DL));
4448 }
4449 }
4450
4451
4452 if (VT.isFloatingPoint())
4453 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4454 DAG.getUNDEF(VT), Scalar, VL);
4455
4456 // Avoid the tricky legalization cases by falling back to using the
4457 // splat code which already handles it gracefully.
4458 if (!Scalar.getValueType().bitsLE(XLenVT))
4459 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4460 DAG.getConstant(1, DL, XLenVT),
4461 VT, DL, DAG, Subtarget);
4462
4463 // If the operand is a constant, sign extend to increase our chances
4464 // of being able to use a .vi instruction. ANY_EXTEND would become a
4465 // a zero extend and the simm5 check in isel would fail.
4466 // FIXME: Should we ignore the upper bits in isel instead?
4467 unsigned ExtOpc =
4468 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4469 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4470 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4471 VL);
4472}
4473
4474// Can this shuffle be performed on exactly one (possibly larger) input?
4475static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4476 SDValue V2) {
4477
4478 if (V2.isUndef() &&
4480 return V1;
4481
4482 // Both input must be extracts.
4483 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4484 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4485 return SDValue();
4486
4487 // Extracting from the same source.
4488 SDValue Src = V1.getOperand(0);
4489 if (Src != V2.getOperand(0))
4490 return SDValue();
4491
4492 // Src needs to have twice the number of elements.
4493 unsigned NumElts = VT.getVectorNumElements();
4494 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4495 return SDValue();
4496
4497 // The extracts must extract the two halves of the source.
4498 if (V1.getConstantOperandVal(1) != 0 ||
4499 V2.getConstantOperandVal(1) != NumElts)
4500 return SDValue();
4501
4502 return Src;
4503}
4504
4505/// Is this shuffle interleaving contiguous elements from one vector into the
4506/// even elements and contiguous elements from another vector into the odd
4507/// elements. \p EvenSrc will contain the element that should be in the first
4508/// even element. \p OddSrc will contain the element that should be in the first
4509/// odd element. These can be the first element in a source or the element half
4510/// way through the source.
4511static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4512 int &OddSrc, const RISCVSubtarget &Subtarget) {
4513 // We need to be able to widen elements to the next larger integer type.
4514 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4515 return false;
4516
4517 int Size = Mask.size();
4518 int NumElts = VT.getVectorNumElements();
4519 assert(Size == (int)NumElts && "Unexpected mask size");
4520
4521 SmallVector<unsigned, 2> StartIndexes;
4522 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4523 return false;
4524
4525 EvenSrc = StartIndexes[0];
4526 OddSrc = StartIndexes[1];
4527
4528 // One source should be low half of first vector.
4529 if (EvenSrc != 0 && OddSrc != 0)
4530 return false;
4531
4532 // Subvectors will be subtracted from either at the start of the two input
4533 // vectors, or at the start and middle of the first vector if it's an unary
4534 // interleave.
4535 // In both cases, HalfNumElts will be extracted.
4536 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4537 // we'll create an illegal extract_subvector.
4538 // FIXME: We could support other values using a slidedown first.
4539 int HalfNumElts = NumElts / 2;
4540 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4541}
4542
4543/// Match shuffles that concatenate two vectors, rotate the concatenation,
4544/// and then extract the original number of elements from the rotated result.
4545/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4546/// returned rotation amount is for a rotate right, where elements move from
4547/// higher elements to lower elements. \p LoSrc indicates the first source
4548/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4549/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4550/// 0 or 1 if a rotation is found.
4551///
4552/// NOTE: We talk about rotate to the right which matches how bit shift and
4553/// rotate instructions are described where LSBs are on the right, but LLVM IR
4554/// and the table below write vectors with the lowest elements on the left.
4555static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4556 int Size = Mask.size();
4557
4558 // We need to detect various ways of spelling a rotation:
4559 // [11, 12, 13, 14, 15, 0, 1, 2]
4560 // [-1, 12, 13, 14, -1, -1, 1, -1]
4561 // [-1, -1, -1, -1, -1, -1, 1, 2]
4562 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4563 // [-1, 4, 5, 6, -1, -1, 9, -1]
4564 // [-1, 4, 5, 6, -1, -1, -1, -1]
4565 int Rotation = 0;
4566 LoSrc = -1;
4567 HiSrc = -1;
4568 for (int i = 0; i != Size; ++i) {
4569 int M = Mask[i];
4570 if (M < 0)
4571 continue;
4572
4573 // Determine where a rotate vector would have started.
4574 int StartIdx = i - (M % Size);
4575 // The identity rotation isn't interesting, stop.
4576 if (StartIdx == 0)
4577 return -1;
4578
4579 // If we found the tail of a vector the rotation must be the missing
4580 // front. If we found the head of a vector, it must be how much of the
4581 // head.
4582 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4583
4584 if (Rotation == 0)
4585 Rotation = CandidateRotation;
4586 else if (Rotation != CandidateRotation)
4587 // The rotations don't match, so we can't match this mask.
4588 return -1;
4589
4590 // Compute which value this mask is pointing at.
4591 int MaskSrc = M < Size ? 0 : 1;
4592
4593 // Compute which of the two target values this index should be assigned to.
4594 // This reflects whether the high elements are remaining or the low elemnts
4595 // are remaining.
4596 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4597
4598 // Either set up this value if we've not encountered it before, or check
4599 // that it remains consistent.
4600 if (TargetSrc < 0)
4601 TargetSrc = MaskSrc;
4602 else if (TargetSrc != MaskSrc)
4603 // This may be a rotation, but it pulls from the inputs in some
4604 // unsupported interleaving.
4605 return -1;
4606 }
4607
4608 // Check that we successfully analyzed the mask, and normalize the results.
4609 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4610 assert((LoSrc >= 0 || HiSrc >= 0) &&
4611 "Failed to find a rotated input vector!");
4612
4613 return Rotation;
4614}
4615
4616// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4617// 2, 4, 8 and the integer type Factor-times larger than VT's
4618// element type must be a legal element type.
4619// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4620// -> [p, q, r, s] (Factor=2, Index=1)
4622 SDValue Src, unsigned Factor,
4623 unsigned Index, SelectionDAG &DAG) {
4624 unsigned EltBits = VT.getScalarSizeInBits();
4625 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4626 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4627 SrcEC.divideCoefficientBy(Factor));
4628 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4629 SrcEC.divideCoefficientBy(Factor));
4630 Src = DAG.getBitcast(WideSrcVT, Src);
4631
4632 unsigned Shift = Index * EltBits;
4633 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4634 DAG.getConstant(Shift, DL, WideSrcVT));
4635 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4637 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4638 DAG.getVectorIdxConstant(0, DL));
4639 return DAG.getBitcast(VT, Res);
4640}
4641
4642// Lower the following shuffle to vslidedown.
4643// a)
4644// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4645// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4646// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4647// b)
4648// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4649// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4650// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4651// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4652// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4653// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4655 SDValue V1, SDValue V2,
4656 ArrayRef<int> Mask,
4657 const RISCVSubtarget &Subtarget,
4658 SelectionDAG &DAG) {
4659 auto findNonEXTRACT_SUBVECTORParent =
4660 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4661 uint64_t Offset = 0;
4662 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4663 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4664 // a scalable vector. But we don't want to match the case.
4665 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4666 Offset += Parent.getConstantOperandVal(1);
4667 Parent = Parent.getOperand(0);
4668 }
4669 return std::make_pair(Parent, Offset);
4670 };
4671
4672 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4673 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4674
4675 // Extracting from the same source.
4676 SDValue Src = V1Src;
4677 if (Src != V2Src)
4678 return SDValue();
4679
4680 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4681 SmallVector<int, 16> NewMask(Mask);
4682 for (size_t i = 0; i != NewMask.size(); ++i) {
4683 if (NewMask[i] == -1)
4684 continue;
4685
4686 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4687 NewMask[i] = NewMask[i] + V1IndexOffset;
4688 } else {
4689 // Minus NewMask.size() is needed. Otherwise, the b case would be
4690 // <5,6,7,12> instead of <5,6,7,8>.
4691 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4692 }
4693 }
4694
4695 // First index must be known and non-zero. It will be used as the slidedown
4696 // amount.
4697 if (NewMask[0] <= 0)
4698 return SDValue();
4699
4700 // NewMask is also continuous.
4701 for (unsigned i = 1; i != NewMask.size(); ++i)
4702 if (NewMask[i - 1] + 1 != NewMask[i])
4703 return SDValue();
4704
4705 MVT XLenVT = Subtarget.getXLenVT();
4706 MVT SrcVT = Src.getSimpleValueType();
4707 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4708 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4709 SDValue Slidedown =
4710 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4711 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4712 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4713 return DAG.getNode(
4715 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4716 DAG.getConstant(0, DL, XLenVT));
4717}
4718
4719// Because vslideup leaves the destination elements at the start intact, we can
4720// use it to perform shuffles that insert subvectors:
4721//
4722// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4723// ->
4724// vsetvli zero, 8, e8, mf2, ta, ma
4725// vslideup.vi v8, v9, 4
4726//
4727// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4728// ->
4729// vsetvli zero, 5, e8, mf2, tu, ma
4730// vslideup.v1 v8, v9, 2
4732 SDValue V1, SDValue V2,
4733 ArrayRef<int> Mask,
4734 const RISCVSubtarget &Subtarget,
4735 SelectionDAG &DAG) {
4736 unsigned NumElts = VT.getVectorNumElements();
4737 int NumSubElts, Index;
4738 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4739 Index))
4740 return SDValue();
4741
4742 bool OpsSwapped = Mask[Index] < (int)NumElts;
4743 SDValue InPlace = OpsSwapped ? V2 : V1;
4744 SDValue ToInsert = OpsSwapped ? V1 : V2;
4745
4746 MVT XLenVT = Subtarget.getXLenVT();
4747 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4748 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4749 // We slide up by the index that the subvector is being inserted at, and set
4750 // VL to the index + the number of elements being inserted.
4752 // If the we're adding a suffix to the in place vector, i.e. inserting right
4753 // up to the very end of it, then we don't actually care about the tail.
4754 if (NumSubElts + Index >= (int)NumElts)
4755 Policy |= RISCVII::TAIL_AGNOSTIC;
4756
4757 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4758 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4759 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4760
4761 SDValue Res;
4762 // If we're inserting into the lowest elements, use a tail undisturbed
4763 // vmv.v.v.
4764 if (Index == 0)
4765 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4766 VL);
4767 else
4768 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4769 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4770 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4771}
4772
4773/// Match v(f)slide1up/down idioms. These operations involve sliding
4774/// N-1 elements to make room for an inserted scalar at one end.
4776 SDValue V1, SDValue V2,
4777 ArrayRef<int> Mask,
4778 const RISCVSubtarget &Subtarget,
4779 SelectionDAG &DAG) {
4780 bool OpsSwapped = false;
4781 if (!isa<BuildVectorSDNode>(V1)) {
4782 if (!isa<BuildVectorSDNode>(V2))
4783 return SDValue();
4784 std::swap(V1, V2);
4785 OpsSwapped = true;
4786 }
4787 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4788 if (!Splat)
4789 return SDValue();
4790
4791 // Return true if the mask could describe a slide of Mask.size() - 1
4792 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4793 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4794 const unsigned S = (Offset > 0) ? 0 : -Offset;
4795 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4796 for (unsigned i = S; i != E; ++i)
4797 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4798 return false;
4799 return true;
4800 };
4801
4802 const unsigned NumElts = VT.getVectorNumElements();
4803 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4804 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4805 return SDValue();
4806
4807 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4808 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4809 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4810 return SDValue();
4811
4812 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4813 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4814
4815 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4816 // vslide1{down,up}.vx instead.
4817 if (VT.getVectorElementType() == MVT::bf16 ||
4818 (VT.getVectorElementType() == MVT::f16 &&
4819 !Subtarget.hasVInstructionsF16())) {
4820 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4821 Splat =
4822 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4823 V2 = DAG.getBitcast(
4824 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4825 SDValue Vec = DAG.getNode(
4827 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4828 Vec = DAG.getBitcast(ContainerVT, Vec);
4829 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4830 }
4831
4832 auto OpCode = IsVSlidedown ?
4835 if (!VT.isFloatingPoint())
4836 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4837 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4838 DAG.getUNDEF(ContainerVT),
4839 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4840 Splat, TrueMask, VL);
4841 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4842}
4843
4844// Match a mask which "spreads" the leading elements of a vector evenly
4845// across the result. Factor is the spread amount, and Index is the
4846// offset applied. (on success, Index < Factor) This is the inverse
4847// of a deinterleave with the same Factor and Index. This is analogous
4848// to an interleave, except that all but one lane is undef.
4849static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4850 SmallVector<bool> LaneIsUndef(Factor, true);
4851 for (unsigned i = 0; i < Mask.size(); i++)
4852 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4853
4854 bool Found = false;
4855 for (unsigned i = 0; i < Factor; i++) {
4856 if (LaneIsUndef[i])
4857 continue;
4858 if (Found)
4859 return false;
4860 Index = i;
4861 Found = true;
4862 }
4863 if (!Found)
4864 return false;
4865
4866 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4867 unsigned j = i * Factor + Index;
4868 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4869 return false;
4870 }
4871 return true;
4872}
4873
4874// Given a vector a, b, c, d return a vector Factor times longer
4875// with Factor-1 undef's between elements. Ex:
4876// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4877// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4878static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4879 const SDLoc &DL, SelectionDAG &DAG) {
4880
4881 MVT VT = V.getSimpleValueType();
4882 unsigned EltBits = VT.getScalarSizeInBits();
4884 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4885
4886 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4887
4888 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4889 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4890 // allow the SHL to fold away if Index is 0.
4891 if (Index != 0)
4892 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4893 DAG.getConstant(EltBits * Index, DL, WideVT));
4894 // Make sure to use original element type
4896 EC.multiplyCoefficientBy(Factor));
4897 return DAG.getBitcast(ResultVT, Result);
4898}
4899
4900// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4901// to create an interleaved vector of <[vscale x] n*2 x ty>.
4902// This requires that the size of ty is less than the subtarget's maximum ELEN.
4904 const SDLoc &DL, SelectionDAG &DAG,
4905 const RISCVSubtarget &Subtarget) {
4906
4907 // FIXME: Not only does this optimize the code, it fixes some correctness
4908 // issues because MIR does not have freeze.
4909 if (EvenV.isUndef())
4910 return getWideningSpread(OddV, 2, 1, DL, DAG);
4911 if (OddV.isUndef())
4912 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4913
4914 MVT VecVT = EvenV.getSimpleValueType();
4915 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4916 // Convert fixed vectors to scalable if needed
4917 if (VecContainerVT.isFixedLengthVector()) {
4918 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4919 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4920 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4921 }
4922
4923 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4924
4925 // We're working with a vector of the same size as the resulting
4926 // interleaved vector, but with half the number of elements and
4927 // twice the SEW (Hence the restriction on not using the maximum
4928 // ELEN)
4929 MVT WideVT =
4931 VecVT.getVectorElementCount());
4932 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4933 if (WideContainerVT.isFixedLengthVector())
4934 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4935
4936 // Bitcast the input vectors to integers in case they are FP
4937 VecContainerVT = VecContainerVT.changeTypeToInteger();
4938 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4939 OddV = DAG.getBitcast(VecContainerVT, OddV);
4940
4941 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4942 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4943
4944 SDValue Interleaved;
4945 if (Subtarget.hasStdExtZvbb()) {
4946 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4947 SDValue OffsetVec =
4948 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4949 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4950 OffsetVec, Passthru, Mask, VL);
4951 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4952 Interleaved, EvenV, Passthru, Mask, VL);
4953 } else {
4954 // FIXME: We should freeze the odd vector here. We already handled the case
4955 // of provably undef/poison above.
4956
4957 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4958 // vwaddu.vv
4959 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4960 OddV, Passthru, Mask, VL);
4961
4962 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4963 SDValue AllOnesVec = DAG.getSplatVector(
4964 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4965 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4966 OddV, AllOnesVec, Passthru, Mask, VL);
4967
4968 // Add the two together so we get
4969 // (OddV * 0xff...ff) + (OddV + EvenV)
4970 // = (OddV * 0x100...00) + EvenV
4971 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4972 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4973 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4974 Interleaved, OddsMul, Passthru, Mask, VL);
4975 }
4976
4977 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4978 MVT ResultContainerVT = MVT::getVectorVT(
4979 VecVT.getVectorElementType(), // Make sure to use original type
4980 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4981 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4982
4983 // Convert back to a fixed vector if needed
4984 MVT ResultVT =
4987 if (ResultVT.isFixedLengthVector())
4988 Interleaved =
4989 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4990
4991 return Interleaved;
4992}
4993
4994// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4995// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4997 SelectionDAG &DAG,
4998 const RISCVSubtarget &Subtarget) {
4999 SDLoc DL(SVN);
5000 MVT VT = SVN->getSimpleValueType(0);
5001 SDValue V = SVN->getOperand(0);
5002 unsigned NumElts = VT.getVectorNumElements();
5003
5004 assert(VT.getVectorElementType() == MVT::i1);
5005
5007 SVN->getMask().size()) ||
5008 !SVN->getOperand(1).isUndef())
5009 return SDValue();
5010
5011 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5012 EVT ViaVT = EVT::getVectorVT(
5013 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5014 EVT ViaBitVT =
5015 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5016
5017 // If we don't have zvbb or the larger element type > ELEN, the operation will
5018 // be illegal.
5020 ViaVT) ||
5021 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5022 return SDValue();
5023
5024 // If the bit vector doesn't fit exactly into the larger element type, we need
5025 // to insert it into the larger vector and then shift up the reversed bits
5026 // afterwards to get rid of the gap introduced.
5027 if (ViaEltSize > NumElts)
5028 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5029 V, DAG.getVectorIdxConstant(0, DL));
5030
5031 SDValue Res =
5032 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5033
5034 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5035 // element type.
5036 if (ViaEltSize > NumElts)
5037 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5038 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5039
5040 Res = DAG.getBitcast(ViaBitVT, Res);
5041
5042 if (ViaEltSize > NumElts)
5043 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5044 DAG.getVectorIdxConstant(0, DL));
5045 return Res;
5046}
5047
5049 SelectionDAG &DAG,
5050 const RISCVSubtarget &Subtarget,
5051 MVT &RotateVT, unsigned &RotateAmt) {
5052 SDLoc DL(SVN);
5053
5054 EVT VT = SVN->getValueType(0);
5055 unsigned NumElts = VT.getVectorNumElements();
5056 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5057 unsigned NumSubElts;
5058 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5059 NumElts, NumSubElts, RotateAmt))
5060 return false;
5061 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5062 NumElts / NumSubElts);
5063
5064 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5065 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5066}
5067
5068// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5069// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5070// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5072 SelectionDAG &DAG,
5073 const RISCVSubtarget &Subtarget) {
5074 SDLoc DL(SVN);
5075
5076 EVT VT = SVN->getValueType(0);
5077 unsigned RotateAmt;
5078 MVT RotateVT;
5079 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5080 return SDValue();
5081
5082 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5083
5084 SDValue Rotate;
5085 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5086 // so canonicalize to vrev8.
5087 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5088 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5089 else
5090 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5091 DAG.getConstant(RotateAmt, DL, RotateVT));
5092
5093 return DAG.getBitcast(VT, Rotate);
5094}
5095
5096// If compiling with an exactly known VLEN, see if we can split a
5097// shuffle on m2 or larger into a small number of m1 sized shuffles
5098// which write each destination registers exactly once.
5100 SelectionDAG &DAG,
5101 const RISCVSubtarget &Subtarget) {
5102 SDLoc DL(SVN);
5103 MVT VT = SVN->getSimpleValueType(0);
5104 SDValue V1 = SVN->getOperand(0);
5105 SDValue V2 = SVN->getOperand(1);
5106 ArrayRef<int> Mask = SVN->getMask();
5107
5108 // If we don't know exact data layout, not much we can do. If this
5109 // is already m1 or smaller, no point in splitting further.
5110 const auto VLen = Subtarget.getRealVLen();
5111 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5112 return SDValue();
5113
5114 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5115 // expansion for.
5116 unsigned RotateAmt;
5117 MVT RotateVT;
5118 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5119 return SDValue();
5120
5121 MVT ElemVT = VT.getVectorElementType();
5122 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5123
5124 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5125 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5126 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5127 assert(M1VT == getLMUL1VT(M1VT));
5128 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5129 unsigned NormalizedVF = ContainerVT.getVectorMinNumElements();
5130 unsigned NumOfSrcRegs = NormalizedVF / NumOpElts;
5131 unsigned NumOfDestRegs = NormalizedVF / NumOpElts;
5132 // The following semantically builds up a fixed length concat_vector
5133 // of the component shuffle_vectors. We eagerly lower to scalable here
5134 // to avoid DAG combining it back to a large shuffle_vector again.
5135 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5136 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5137 SmallVector<SDValue> SubRegs(NumOfDestRegs);
5138 unsigned RegCnt = 0;
5139 unsigned PrevCnt = 0;
5141 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5142 [&]() {
5143 PrevCnt = RegCnt;
5144 ++RegCnt;
5145 },
5146 [&, &DAG = DAG](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx,
5147 unsigned DstVecIdx) {
5148 SDValue SrcVec = SrcVecIdx >= NumOfSrcRegs ? V2 : V1;
5149 unsigned ExtractIdx = (SrcVecIdx % NumOfSrcRegs) * NumOpElts;
5150 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5151 DAG.getVectorIdxConstant(ExtractIdx, DL));
5152 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5153 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5154 SubRegs[RegCnt] = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5155 PrevCnt = RegCnt;
5156 ++RegCnt;
5157 },
5158 [&, &DAG = DAG](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2) {
5159 if (PrevCnt + 1 == RegCnt)
5160 ++RegCnt;
5161 SDValue SubVec1 = SubRegs[PrevCnt + 1];
5162 if (!SubVec1) {
5163 SDValue SrcVec = Idx1 >= NumOfSrcRegs ? V2 : V1;
5164 unsigned ExtractIdx = (Idx1 % NumOfSrcRegs) * NumOpElts;
5165 SubVec1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5166 DAG.getVectorIdxConstant(ExtractIdx, DL));
5167 }
5168 SubVec1 = convertFromScalableVector(OneRegVT, SubVec1, DAG, Subtarget);
5169 SDValue SrcVec = Idx2 >= NumOfSrcRegs ? V2 : V1;
5170 unsigned ExtractIdx = (Idx2 % NumOfSrcRegs) * NumOpElts;
5171 SDValue SubVec2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5172 DAG.getVectorIdxConstant(ExtractIdx, DL));
5173 SubVec2 = convertFromScalableVector(OneRegVT, SubVec2, DAG, Subtarget);
5174 SubVec1 =
5175 DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, SrcSubMask);
5176 SubVec1 = convertToScalableVector(M1VT, SubVec1, DAG, Subtarget);
5177 SubRegs[PrevCnt + 1] = SubVec1;
5178 });
5179 assert(RegCnt == NumOfDestRegs && "Whole vector must be processed");
5180 SDValue Vec = DAG.getUNDEF(ContainerVT);
5181 for (auto [I, V] : enumerate(SubRegs)) {
5182 if (!V)
5183 continue;
5184 unsigned InsertIdx = I * NumOpElts;
5185
5186 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5187 DAG.getVectorIdxConstant(InsertIdx, DL));
5188 }
5189 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5190}
5191
5192// Matches a subset of compress masks with a contiguous prefix of output
5193// elements. This could be extended to allow gaps by deciding which
5194// source elements to spuriously demand.
5196 int Last = -1;
5197 bool SawUndef = false;
5198 for (unsigned i = 0; i < Mask.size(); i++) {
5199 if (Mask[i] == -1) {
5200 SawUndef = true;
5201 continue;
5202 }
5203 if (SawUndef)
5204 return false;
5205 if (i > (unsigned)Mask[i])
5206 return false;
5207 if (Mask[i] <= Last)
5208 return false;
5209 Last = Mask[i];
5210 }
5211 return true;
5212}
5213
5214/// Given a shuffle where the indices are disjoint between the two sources,
5215/// e.g.:
5216///
5217/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5218///
5219/// Merge the two sources into one and do a single source shuffle:
5220///
5221/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5222/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5223///
5224/// A vselect will either be merged into a masked instruction or be lowered as a
5225/// vmerge.vvm, which is cheaper than a vrgather.vv.
5227 SelectionDAG &DAG,
5228 const RISCVSubtarget &Subtarget) {
5229 MVT VT = SVN->getSimpleValueType(0);
5230 MVT XLenVT = Subtarget.getXLenVT();
5231 SDLoc DL(SVN);
5232
5233 const ArrayRef<int> Mask = SVN->getMask();
5234
5235 // Work out which source each lane will come from.
5236 SmallVector<int, 16> Srcs(Mask.size(), -1);
5237
5238 for (int Idx : Mask) {
5239 if (Idx == -1)
5240 continue;
5241 unsigned SrcIdx = Idx % Mask.size();
5242 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5243 if (Srcs[SrcIdx] == -1)
5244 // Mark this source as using this lane.
5245 Srcs[SrcIdx] = Src;
5246 else if (Srcs[SrcIdx] != Src)
5247 // The other source is using this lane: not disjoint.
5248 return SDValue();
5249 }
5250
5251 SmallVector<SDValue> SelectMaskVals;
5252 for (int Lane : Srcs) {
5253 if (Lane == -1)
5254 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5255 else
5256 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5257 }
5258 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5259 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5260 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5261 SVN->getOperand(0), SVN->getOperand(1));
5262
5263 // Move all indices relative to the first source.
5264 SmallVector<int> NewMask(Mask.size());
5265 for (unsigned I = 0; I < Mask.size(); I++) {
5266 if (Mask[I] == -1)
5267 NewMask[I] = -1;
5268 else
5269 NewMask[I] = Mask[I] % Mask.size();
5270 }
5271
5272 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5273}
5274
5276 const RISCVSubtarget &Subtarget) {
5277 SDValue V1 = Op.getOperand(0);
5278 SDValue V2 = Op.getOperand(1);
5279 SDLoc DL(Op);
5280 MVT XLenVT = Subtarget.getXLenVT();
5281 MVT VT = Op.getSimpleValueType();
5282 unsigned NumElts = VT.getVectorNumElements();
5283 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5284
5285 if (VT.getVectorElementType() == MVT::i1) {
5286 // Lower to a vror.vi of a larger element type if possible before we promote
5287 // i1s to i8s.
5288 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5289 return V;
5290 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5291 return V;
5292
5293 // Promote i1 shuffle to i8 shuffle.
5294 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5295 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5296 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5297 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5298 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5299 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5300 ISD::SETNE);
5301 }
5302
5303 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5304
5305 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5306
5307 if (SVN->isSplat()) {
5308 const int Lane = SVN->getSplatIndex();
5309 if (Lane >= 0) {
5310 MVT SVT = VT.getVectorElementType();
5311
5312 // Turn splatted vector load into a strided load with an X0 stride.
5313 SDValue V = V1;
5314 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5315 // with undef.
5316 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5317 int Offset = Lane;
5318 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5319 int OpElements =
5320 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5321 V = V.getOperand(Offset / OpElements);
5322 Offset %= OpElements;
5323 }
5324
5325 // We need to ensure the load isn't atomic or volatile.
5326 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5327 auto *Ld = cast<LoadSDNode>(V);
5328 Offset *= SVT.getStoreSize();
5329 SDValue NewAddr = DAG.getMemBasePlusOffset(
5330 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5331
5332 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5333 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5334 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5335 SDValue IntID =
5336 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5337 SDValue Ops[] = {Ld->getChain(),
5338 IntID,
5339 DAG.getUNDEF(ContainerVT),
5340 NewAddr,
5341 DAG.getRegister(RISCV::X0, XLenVT),
5342 VL};
5343 SDValue NewLoad = DAG.getMemIntrinsicNode(
5344 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5346 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5347 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5348 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5349 }
5350
5351 MVT SplatVT = ContainerVT;
5352
5353 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5354 if (SVT == MVT::bf16 ||
5355 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5356 SVT = MVT::i16;
5357 SplatVT = ContainerVT.changeVectorElementType(SVT);
5358 }
5359
5360 // Otherwise use a scalar load and splat. This will give the best
5361 // opportunity to fold a splat into the operation. ISel can turn it into
5362 // the x0 strided load if we aren't able to fold away the select.
5363 if (SVT.isFloatingPoint())
5364 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5365 Ld->getPointerInfo().getWithOffset(Offset),
5366 Ld->getOriginalAlign(),
5367 Ld->getMemOperand()->getFlags());
5368 else
5369 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5370 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5371 Ld->getOriginalAlign(),
5372 Ld->getMemOperand()->getFlags());
5374
5375 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5377 SDValue Splat =
5378 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5379 Splat = DAG.getBitcast(ContainerVT, Splat);
5380 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5381 }
5382
5383 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5384 assert(Lane < (int)NumElts && "Unexpected lane!");
5385 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5386 V1, DAG.getConstant(Lane, DL, XLenVT),
5387 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5388 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5389 }
5390 }
5391
5392 // For exact VLEN m2 or greater, try to split to m1 operations if we
5393 // can split cleanly.
5394 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5395 return V;
5396
5397 ArrayRef<int> Mask = SVN->getMask();
5398
5399 if (SDValue V =
5400 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5401 return V;
5402
5403 if (SDValue V =
5404 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5405 return V;
5406
5407 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5408 // available.
5409 if (Subtarget.hasStdExtZvkb())
5410 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5411 return V;
5412
5413 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5414 // be undef which can be handled with a single SLIDEDOWN/UP.
5415 int LoSrc, HiSrc;
5416 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5417 if (Rotation > 0) {
5418 SDValue LoV, HiV;
5419 if (LoSrc >= 0) {
5420 LoV = LoSrc == 0 ? V1 : V2;
5421 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5422 }
5423 if (HiSrc >= 0) {
5424 HiV = HiSrc == 0 ? V1 : V2;
5425 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5426 }
5427
5428 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5429 // to slide LoV up by (NumElts - Rotation).
5430 unsigned InvRotate = NumElts - Rotation;
5431
5432 SDValue Res = DAG.getUNDEF(ContainerVT);
5433 if (HiV) {
5434 // Even though we could use a smaller VL, don't to avoid a vsetivli
5435 // toggle.
5436 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5437 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5438 }
5439 if (LoV)
5440 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5441 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5443
5444 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5445 }
5446
5447 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5448 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5449
5450 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5451 // use shift and truncate to perform the shuffle.
5452 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5453 // shift-and-trunc reducing total cost for everything except an mf8 result.
5454 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5455 // to do the entire operation.
5456 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5457 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5458 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5459 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5460 unsigned Index = 0;
5461 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5462 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5463 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5464 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5465 }
5466 }
5467 }
5468
5469 if (SDValue V =
5470 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5471 return V;
5472
5473 // Detect an interleave shuffle and lower to
5474 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5475 int EvenSrc, OddSrc;
5476 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5477 // Extract the halves of the vectors.
5478 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5479
5480 // Recognize if one half is actually undef; the matching above will
5481 // otherwise reuse the even stream for the undef one. This improves
5482 // spread(2) shuffles.
5483 bool LaneIsUndef[2] = { true, true};
5484 for (unsigned i = 0; i < Mask.size(); i++)
5485 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5486
5487 int Size = Mask.size();
5488 SDValue EvenV, OddV;
5489 if (LaneIsUndef[0]) {
5490 EvenV = DAG.getUNDEF(HalfVT);
5491 } else {
5492 assert(EvenSrc >= 0 && "Undef source?");
5493 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5494 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5495 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5496 }
5497
5498 if (LaneIsUndef[1]) {
5499 OddV = DAG.getUNDEF(HalfVT);
5500 } else {
5501 assert(OddSrc >= 0 && "Undef source?");
5502 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5503 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5504 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5505 }
5506
5507 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5508 }
5509
5510
5511 // Handle any remaining single source shuffles
5512 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5513 if (V2.isUndef()) {
5514 // We might be able to express the shuffle as a bitrotate. But even if we
5515 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5516 // shifts and a vor will have a higher throughput than a vrgather.
5517 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5518 return V;
5519
5520 // Can we generate a vcompress instead of a vrgather? These scale better
5521 // at high LMUL, at the cost of not being able to fold a following select
5522 // into them. The mask constants are also smaller than the index vector
5523 // constants, and thus easier to materialize.
5524 if (isCompressMask(Mask)) {
5525 SmallVector<SDValue> MaskVals(NumElts,
5526 DAG.getConstant(false, DL, XLenVT));
5527 for (auto Idx : Mask) {
5528 if (Idx == -1)
5529 break;
5530 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5531 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5532 }
5533 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5534 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5535 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5536 DAG.getUNDEF(VT));
5537 }
5538
5539 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5540 // is fully covered in interleave(2) above, so it is ignored here.
5541 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5542 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5543 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5544 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5545 unsigned Index;
5546 if (isSpreadMask(Mask, Factor, Index)) {
5547 MVT NarrowVT =
5548 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5549 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5550 DAG.getVectorIdxConstant(0, DL));
5551 return getWideningSpread(Src, Factor, Index, DL, DAG);
5552 }
5553 }
5554 }
5555
5556 if (VT.getScalarSizeInBits() == 8 &&
5557 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5558 // On such a vector we're unable to use i8 as the index type.
5559 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5560 // may involve vector splitting if we're already at LMUL=8, or our
5561 // user-supplied maximum fixed-length LMUL.
5562 return SDValue();
5563 }
5564
5565 // Base case for the two operand recursion below - handle the worst case
5566 // single source shuffle.
5567 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5568 MVT IndexVT = VT.changeTypeToInteger();
5569 // Since we can't introduce illegal index types at this stage, use i16 and
5570 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5571 // than XLenVT.
5572 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5573 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5574 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5575 }
5576
5577 // If the mask allows, we can do all the index computation in 16 bits. This
5578 // requires less work and less register pressure at high LMUL, and creates
5579 // smaller constants which may be cheaper to materialize.
5580 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5581 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5582 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5583 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5584 }
5585
5586 MVT IndexContainerVT =
5587 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5588
5589 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5590 SmallVector<SDValue> GatherIndicesLHS;
5591 for (int MaskIndex : Mask) {
5592 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5593 GatherIndicesLHS.push_back(IsLHSIndex
5594 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5595 : DAG.getUNDEF(XLenVT));
5596 }
5597 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5598 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5599 Subtarget);
5600 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5601 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5602 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5603 }
5604
5605 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5606 // merged with a second vrgather.
5607 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5608
5609 // Now construct the mask that will be used by the blended vrgather operation.
5610 // Construct the appropriate indices into each vector.
5611 for (int MaskIndex : Mask) {
5612 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5613 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5614 ? MaskIndex : -1);
5615 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5616 }
5617
5618 // If the mask indices are disjoint between the two sources, we can lower it
5619 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5620 // operands may end up being lowered to something cheaper than a vrgather.vv.
5621 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5622 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5623 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5624 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5625 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5626 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5627 return V;
5628
5629 // Try to pick a profitable operand order.
5630 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5631 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5632
5633 // Recursively invoke lowering for each operand if we had two
5634 // independent single source shuffles, and then combine the result via a
5635 // vselect. Note that the vselect will likely be folded back into the
5636 // second permute (vrgather, or other) by the post-isel combine.
5637 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5638 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5639
5640 SmallVector<SDValue> MaskVals;
5641 for (int MaskIndex : Mask) {
5642 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5643 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5644 }
5645
5646 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5647 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5648 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5649
5650 if (SwapOps)
5651 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5652 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5653}
5654
5656 // Support splats for any type. These should type legalize well.
5657 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5658 return true;
5659
5660 // Only support legal VTs for other shuffles for now.
5661 if (!isTypeLegal(VT))
5662 return false;
5663
5664 MVT SVT = VT.getSimpleVT();
5665
5666 // Not for i1 vectors.
5667 if (SVT.getScalarType() == MVT::i1)
5668 return false;
5669
5670 int Dummy1, Dummy2;
5671 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5672 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5673}
5674
5675// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5676// the exponent.
5677SDValue
5678RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5679 SelectionDAG &DAG) const {
5680 MVT VT = Op.getSimpleValueType();
5681 unsigned EltSize = VT.getScalarSizeInBits();
5682 SDValue Src = Op.getOperand(0);
5683 SDLoc DL(Op);
5684 MVT ContainerVT = VT;
5685
5686 SDValue Mask, VL;
5687 if (Op->isVPOpcode()) {
5688 Mask = Op.getOperand(1);
5689 if (VT.isFixedLengthVector())
5690 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5691 Subtarget);
5692 VL = Op.getOperand(2);
5693 }
5694
5695 // We choose FP type that can represent the value if possible. Otherwise, we
5696 // use rounding to zero conversion for correct exponent of the result.
5697 // TODO: Use f16 for i8 when possible?
5698 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5699 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5700 FloatEltVT = MVT::f32;
5701 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5702
5703 // Legal types should have been checked in the RISCVTargetLowering
5704 // constructor.
5705 // TODO: Splitting may make sense in some cases.
5706 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5707 "Expected legal float type!");
5708
5709 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5710 // The trailing zero count is equal to log2 of this single bit value.
5711 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5712 SDValue Neg = DAG.getNegative(Src, DL, VT);
5713 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5714 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5715 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5716 Src, Mask, VL);
5717 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5718 }
5719
5720 // We have a legal FP type, convert to it.
5721 SDValue FloatVal;
5722 if (FloatVT.bitsGT(VT)) {
5723 if (Op->isVPOpcode())
5724 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5725 else
5726 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5727 } else {
5728 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5729 if (VT.isFixedLengthVector()) {
5730 ContainerVT = getContainerForFixedLengthVector(VT);
5731 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5732 }
5733 if (!Op->isVPOpcode())
5734 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5735 SDValue RTZRM =
5737 MVT ContainerFloatVT =
5738 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5739 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5740 Src, Mask, RTZRM, VL);
5741 if (VT.isFixedLengthVector())
5742 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5743 }
5744 // Bitcast to integer and shift the exponent to the LSB.
5745 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5746 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5747 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5748
5749 SDValue Exp;
5750 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5751 if (Op->isVPOpcode()) {
5752 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5753 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5754 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5755 } else {
5756 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5757 DAG.getConstant(ShiftAmt, DL, IntVT));
5758 if (IntVT.bitsLT(VT))
5759 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5760 else if (IntVT.bitsGT(VT))
5761 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5762 }
5763
5764 // The exponent contains log2 of the value in biased form.
5765 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5766 // For trailing zeros, we just need to subtract the bias.
5767 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5768 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5769 DAG.getConstant(ExponentBias, DL, VT));
5770 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5771 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5772 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5773
5774 // For leading zeros, we need to remove the bias and convert from log2 to
5775 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5776 unsigned Adjust = ExponentBias + (EltSize - 1);
5777 SDValue Res;
5778 if (Op->isVPOpcode())
5779 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5780 Mask, VL);
5781 else
5782 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5783
5784 // The above result with zero input equals to Adjust which is greater than
5785 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5786 if (Op.getOpcode() == ISD::CTLZ)
5787 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5788 else if (Op.getOpcode() == ISD::VP_CTLZ)
5789 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5790 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5791 return Res;
5792}
5793
5794SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5795 SelectionDAG &DAG) const {
5796 SDLoc DL(Op);
5797 MVT XLenVT = Subtarget.getXLenVT();
5798 SDValue Source = Op->getOperand(0);
5799 MVT SrcVT = Source.getSimpleValueType();
5800 SDValue Mask = Op->getOperand(1);
5801 SDValue EVL = Op->getOperand(2);
5802
5803 if (SrcVT.isFixedLengthVector()) {
5804 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5805 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5806 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5807 Subtarget);
5808 SrcVT = ContainerVT;
5809 }
5810
5811 // Convert to boolean vector.
5812 if (SrcVT.getScalarType() != MVT::i1) {
5813 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5814 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5815 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5816 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5817 DAG.getUNDEF(SrcVT), Mask, EVL});
5818 }
5819
5820 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5821 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5822 // In this case, we can interpret poison as -1, so nothing to do further.
5823 return Res;
5824
5825 // Convert -1 to VL.
5826 SDValue SetCC =
5827 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5828 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5829 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5830}
5831
5832// While RVV has alignment restrictions, we should always be able to load as a
5833// legal equivalently-sized byte-typed vector instead. This method is
5834// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5835// the load is already correctly-aligned, it returns SDValue().
5836SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5837 SelectionDAG &DAG) const {
5838 auto *Load = cast<LoadSDNode>(Op);
5839 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5840
5842 Load->getMemoryVT(),
5843 *Load->getMemOperand()))
5844 return SDValue();
5845
5846 SDLoc DL(Op);
5847 MVT VT = Op.getSimpleValueType();
5848 unsigned EltSizeBits = VT.getScalarSizeInBits();
5849 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5850 "Unexpected unaligned RVV load type");
5851 MVT NewVT =
5852 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5853 assert(NewVT.isValid() &&
5854 "Expecting equally-sized RVV vector types to be legal");
5855 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5856 Load->getPointerInfo(), Load->getOriginalAlign(),
5857 Load->getMemOperand()->getFlags());
5858 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5859}
5860
5861// While RVV has alignment restrictions, we should always be able to store as a
5862// legal equivalently-sized byte-typed vector instead. This method is
5863// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5864// returns SDValue() if the store is already correctly aligned.
5865SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5866 SelectionDAG &DAG) const {
5867 auto *Store = cast<StoreSDNode>(Op);
5868 assert(Store && Store->getValue().getValueType().isVector() &&
5869 "Expected vector store");
5870
5872 Store->getMemoryVT(),
5873 *Store->getMemOperand()))
5874 return SDValue();
5875
5876 SDLoc DL(Op);
5877 SDValue StoredVal = Store->getValue();
5878 MVT VT = StoredVal.getSimpleValueType();
5879 unsigned EltSizeBits = VT.getScalarSizeInBits();
5880 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5881 "Unexpected unaligned RVV store type");
5882 MVT NewVT =
5883 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5884 assert(NewVT.isValid() &&
5885 "Expecting equally-sized RVV vector types to be legal");
5886 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5887 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5888 Store->getPointerInfo(), Store->getOriginalAlign(),
5889 Store->getMemOperand()->getFlags());
5890}
5891
5893 const RISCVSubtarget &Subtarget) {
5894 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5895
5896 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5897
5898 // All simm32 constants should be handled by isel.
5899 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5900 // this check redundant, but small immediates are common so this check
5901 // should have better compile time.
5902 if (isInt<32>(Imm))
5903 return Op;
5904
5905 // We only need to cost the immediate, if constant pool lowering is enabled.
5906 if (!Subtarget.useConstantPoolForLargeInts())
5907 return Op;
5908
5910 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5911 return Op;
5912
5913 // Optimizations below are disabled for opt size. If we're optimizing for
5914 // size, use a constant pool.
5915 if (DAG.shouldOptForSize())
5916 return SDValue();
5917
5918 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5919 // that if it will avoid a constant pool.
5920 // It will require an extra temporary register though.
5921 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5922 // low and high 32 bits are the same and bit 31 and 63 are set.
5923 unsigned ShiftAmt, AddOpc;
5924 RISCVMatInt::InstSeq SeqLo =
5925 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5926 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5927 return Op;
5928
5929 return SDValue();
5930}
5931
5932SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
5933 SelectionDAG &DAG) const {
5934 MVT VT = Op.getSimpleValueType();
5935 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
5936
5937 // Can this constant be selected by a Zfa FLI instruction?
5938 bool Negate = false;
5939 int Index = getLegalZfaFPImm(Imm, VT);
5940
5941 // If the constant is negative, try negating.
5942 if (Index < 0 && Imm.isNegative()) {
5943 Index = getLegalZfaFPImm(-Imm, VT);
5944 Negate = true;
5945 }
5946
5947 // If we couldn't find a FLI lowering, fall back to generic code.
5948 if (Index < 0)
5949 return SDValue();
5950
5951 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
5952 SDLoc DL(Op);
5953 SDValue Const =
5954 DAG.getNode(RISCVISD::FLI, DL, VT,
5955 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
5956 if (!Negate)
5957 return Const;
5958
5959 return DAG.getNode(ISD::FNEG, DL, VT, Const);
5960}
5961
5963 const RISCVSubtarget &Subtarget) {
5964 SDLoc dl(Op);
5965 AtomicOrdering FenceOrdering =
5966 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5967 SyncScope::ID FenceSSID =
5968 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5969
5970 if (Subtarget.hasStdExtZtso()) {
5971 // The only fence that needs an instruction is a sequentially-consistent
5972 // cross-thread fence.
5973 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5974 FenceSSID == SyncScope::System)
5975 return Op;
5976
5977 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5978 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5979 }
5980
5981 // singlethread fences only synchronize with signal handlers on the same
5982 // thread and thus only need to preserve instruction order, not actually
5983 // enforce memory ordering.
5984 if (FenceSSID == SyncScope::SingleThread)
5985 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5986 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5987
5988 return Op;
5989}
5990
5991SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5992 SelectionDAG &DAG) const {
5993 SDLoc DL(Op);
5994 MVT VT = Op.getSimpleValueType();
5995 MVT XLenVT = Subtarget.getXLenVT();
5996 unsigned Check = Op.getConstantOperandVal(1);
5997 unsigned TDCMask = 0;
5998 if (Check & fcSNan)
5999 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6000 if (Check & fcQNan)
6001 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6002 if (Check & fcPosInf)
6004 if (Check & fcNegInf)
6006 if (Check & fcPosNormal)
6008 if (Check & fcNegNormal)
6010 if (Check & fcPosSubnormal)
6012 if (Check & fcNegSubnormal)
6014 if (Check & fcPosZero)
6015 TDCMask |= RISCV::FPMASK_Positive_Zero;
6016 if (Check & fcNegZero)
6017 TDCMask |= RISCV::FPMASK_Negative_Zero;
6018
6019 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6020
6021 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6022
6023 if (VT.isVector()) {
6024 SDValue Op0 = Op.getOperand(0);
6025 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6026
6027 if (VT.isScalableVector()) {
6029 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6030 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6031 Mask = Op.getOperand(2);
6032 VL = Op.getOperand(3);
6033 }
6034 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6035 VL, Op->getFlags());
6036 if (IsOneBitMask)
6037 return DAG.getSetCC(DL, VT, FPCLASS,
6038 DAG.getConstant(TDCMask, DL, DstVT),
6040 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6041 DAG.getConstant(TDCMask, DL, DstVT));
6042 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6043 ISD::SETNE);
6044 }
6045
6046 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6047 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6048 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6049 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6050 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6051 Mask = Op.getOperand(2);
6052 MVT MaskContainerVT =
6053 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6054 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6055 VL = Op.getOperand(3);
6056 }
6057 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6058
6059 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6060 Mask, VL, Op->getFlags());
6061
6062 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6063 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6064 if (IsOneBitMask) {
6065 SDValue VMSEQ =
6066 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6067 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6068 DAG.getUNDEF(ContainerVT), Mask, VL});
6069 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6070 }
6071 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6072 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6073
6074 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6075 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6076 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6077
6078 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6079 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6080 DAG.getUNDEF(ContainerVT), Mask, VL});
6081 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6082 }
6083
6084 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6085 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6086 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6088 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6089}
6090
6091// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6092// operations propagate nans.
6094 const RISCVSubtarget &Subtarget) {
6095 SDLoc DL(Op);
6096 MVT VT = Op.getSimpleValueType();
6097
6098 SDValue X = Op.getOperand(0);
6099 SDValue Y = Op.getOperand(1);
6100
6101 if (!VT.isVector()) {
6102 MVT XLenVT = Subtarget.getXLenVT();
6103
6104 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6105 // ensures that when one input is a nan, the other will also be a nan
6106 // allowing the nan to propagate. If both inputs are nan, this will swap the
6107 // inputs which is harmless.
6108
6109 SDValue NewY = Y;
6110 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6111 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6112 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6113 }
6114
6115 SDValue NewX = X;
6116 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6117 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6118 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6119 }
6120
6121 unsigned Opc =
6122 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6123 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6124 }
6125
6126 // Check no NaNs before converting to fixed vector scalable.
6127 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6128 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6129
6130 MVT ContainerVT = VT;
6131 if (VT.isFixedLengthVector()) {
6132 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6133 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6134 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6135 }
6136
6137 SDValue Mask, VL;
6138 if (Op->isVPOpcode()) {
6139 Mask = Op.getOperand(2);
6140 if (VT.isFixedLengthVector())
6141 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6142 Subtarget);
6143 VL = Op.getOperand(3);
6144 } else {
6145 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6146 }
6147
6148 SDValue NewY = Y;
6149 if (!XIsNeverNan) {
6150 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6151 {X, X, DAG.getCondCode(ISD::SETOEQ),
6152 DAG.getUNDEF(ContainerVT), Mask, VL});
6153 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6154 DAG.getUNDEF(ContainerVT), VL);
6155 }
6156
6157 SDValue NewX = X;
6158 if (!YIsNeverNan) {
6159 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6160 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6161 DAG.getUNDEF(ContainerVT), Mask, VL});
6162 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6163 DAG.getUNDEF(ContainerVT), VL);
6164 }
6165
6166 unsigned Opc =
6167 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6170 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6171 DAG.getUNDEF(ContainerVT), Mask, VL);
6172 if (VT.isFixedLengthVector())
6173 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6174 return Res;
6175}
6176
6178 const RISCVSubtarget &Subtarget) {
6179 bool IsFABS = Op.getOpcode() == ISD::FABS;
6180 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6181 "Wrong opcode for lowering FABS or FNEG.");
6182
6183 MVT XLenVT = Subtarget.getXLenVT();
6184 MVT VT = Op.getSimpleValueType();
6185 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6186
6187 SDLoc DL(Op);
6188 SDValue Fmv =
6189 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6190
6191 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6192 Mask = Mask.sext(Subtarget.getXLen());
6193
6194 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6195 SDValue Logic =
6196 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6197 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6198}
6199
6201 const RISCVSubtarget &Subtarget) {
6202 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6203
6204 MVT XLenVT = Subtarget.getXLenVT();
6205 MVT VT = Op.getSimpleValueType();
6206 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6207
6208 SDValue Mag = Op.getOperand(0);
6209 SDValue Sign = Op.getOperand(1);
6210
6211 SDLoc DL(Op);
6212
6213 // Get sign bit into an integer value.
6214 SDValue SignAsInt;
6215 unsigned SignSize = Sign.getValueSizeInBits();
6216 if (SignSize == Subtarget.getXLen()) {
6217 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6218 } else if (SignSize == 16) {
6219 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6220 } else if (SignSize == 32) {
6221 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6222 } else if (SignSize == 64) {
6223 assert(XLenVT == MVT::i32 && "Unexpected type");
6224 // Copy the upper word to integer.
6225 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6226 .getValue(1);
6227 SignSize = 32;
6228 } else
6229 llvm_unreachable("Unexpected sign size");
6230
6231 // Get the signbit at the right position for MagAsInt.
6232 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6233 if (ShiftAmount > 0) {
6234 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6235 DAG.getConstant(ShiftAmount, DL, XLenVT));
6236 } else if (ShiftAmount < 0) {
6237 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6238 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6239 }
6240
6241 // Mask the sign bit and any bits above it. The extra bits will be dropped
6242 // when we convert back to FP.
6243 SDValue SignMask = DAG.getConstant(
6244 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6245 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6246
6247 // Transform Mag value to integer, and clear the sign bit.
6248 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6249 SDValue ClearSignMask = DAG.getConstant(
6250 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6251 SDValue ClearedSign =
6252 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6253
6254 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6256
6257 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6258}
6259
6260/// Get a RISC-V target specified VL op for a given SDNode.
6261static unsigned getRISCVVLOp(SDValue Op) {
6262#define OP_CASE(NODE) \
6263 case ISD::NODE: \
6264 return RISCVISD::NODE##_VL;
6265#define VP_CASE(NODE) \
6266 case ISD::VP_##NODE: \
6267 return RISCVISD::NODE##_VL;
6268 // clang-format off
6269 switch (Op.getOpcode()) {
6270 default:
6271 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6272 OP_CASE(ADD)
6273 OP_CASE(SUB)
6274 OP_CASE(MUL)
6275 OP_CASE(MULHS)
6276 OP_CASE(MULHU)
6277 OP_CASE(SDIV)
6278 OP_CASE(SREM)
6279 OP_CASE(UDIV)
6280 OP_CASE(UREM)
6281 OP_CASE(SHL)
6282 OP_CASE(SRA)
6283 OP_CASE(SRL)
6284 OP_CASE(ROTL)
6285 OP_CASE(ROTR)
6286 OP_CASE(BSWAP)
6287 OP_CASE(CTTZ)
6288 OP_CASE(CTLZ)
6289 OP_CASE(CTPOP)
6290 OP_CASE(BITREVERSE)
6291 OP_CASE(SADDSAT)
6292 OP_CASE(UADDSAT)
6293 OP_CASE(SSUBSAT)
6294 OP_CASE(USUBSAT)
6295 OP_CASE(AVGFLOORS)
6296 OP_CASE(AVGFLOORU)
6297 OP_CASE(AVGCEILS)
6298 OP_CASE(AVGCEILU)
6299 OP_CASE(FADD)
6300 OP_CASE(FSUB)
6301 OP_CASE(FMUL)
6302 OP_CASE(FDIV)
6303 OP_CASE(FNEG)
6304 OP_CASE(FABS)
6305 OP_CASE(FSQRT)
6306 OP_CASE(SMIN)
6307 OP_CASE(SMAX)
6308 OP_CASE(UMIN)
6309 OP_CASE(UMAX)
6310 OP_CASE(STRICT_FADD)
6311 OP_CASE(STRICT_FSUB)
6312 OP_CASE(STRICT_FMUL)
6313 OP_CASE(STRICT_FDIV)
6314 OP_CASE(STRICT_FSQRT)
6315 VP_CASE(ADD) // VP_ADD
6316 VP_CASE(SUB) // VP_SUB
6317 VP_CASE(MUL) // VP_MUL
6318 VP_CASE(SDIV) // VP_SDIV
6319 VP_CASE(SREM) // VP_SREM
6320 VP_CASE(UDIV) // VP_UDIV
6321 VP_CASE(UREM) // VP_UREM
6322 VP_CASE(SHL) // VP_SHL
6323 VP_CASE(FADD) // VP_FADD
6324 VP_CASE(FSUB) // VP_FSUB
6325 VP_CASE(FMUL) // VP_FMUL
6326 VP_CASE(FDIV) // VP_FDIV
6327 VP_CASE(FNEG) // VP_FNEG
6328 VP_CASE(FABS) // VP_FABS
6329 VP_CASE(SMIN) // VP_SMIN
6330 VP_CASE(SMAX) // VP_SMAX
6331 VP_CASE(UMIN) // VP_UMIN
6332 VP_CASE(UMAX) // VP_UMAX
6333 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6334 VP_CASE(SETCC) // VP_SETCC
6335 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6336 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6337 VP_CASE(BITREVERSE) // VP_BITREVERSE
6338 VP_CASE(SADDSAT) // VP_SADDSAT
6339 VP_CASE(UADDSAT) // VP_UADDSAT
6340 VP_CASE(SSUBSAT) // VP_SSUBSAT
6341 VP_CASE(USUBSAT) // VP_USUBSAT
6342 VP_CASE(BSWAP) // VP_BSWAP
6343 VP_CASE(CTLZ) // VP_CTLZ
6344 VP_CASE(CTTZ) // VP_CTTZ
6345 VP_CASE(CTPOP) // VP_CTPOP
6347 case ISD::VP_CTLZ_ZERO_UNDEF:
6348 return RISCVISD::CTLZ_VL;
6350 case ISD::VP_CTTZ_ZERO_UNDEF:
6351 return RISCVISD::CTTZ_VL;
6352 case ISD::FMA:
6353 case ISD::VP_FMA:
6354 return RISCVISD::VFMADD_VL;
6355 case ISD::STRICT_FMA:
6357 case ISD::AND:
6358 case ISD::VP_AND:
6359 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6360 return RISCVISD::VMAND_VL;
6361 return RISCVISD::AND_VL;
6362 case ISD::OR:
6363 case ISD::VP_OR:
6364 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6365 return RISCVISD::VMOR_VL;
6366 return RISCVISD::OR_VL;
6367 case ISD::XOR:
6368 case ISD::VP_XOR:
6369 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6370 return RISCVISD::VMXOR_VL;
6371 return RISCVISD::XOR_VL;
6372 case ISD::VP_SELECT:
6373 case ISD::VP_MERGE:
6374 return RISCVISD::VMERGE_VL;
6375 case ISD::VP_SRA:
6376 return RISCVISD::SRA_VL;
6377 case ISD::VP_SRL:
6378 return RISCVISD::SRL_VL;
6379 case ISD::VP_SQRT:
6380 return RISCVISD::FSQRT_VL;
6381 case ISD::VP_SIGN_EXTEND:
6382 return RISCVISD::VSEXT_VL;
6383 case ISD::VP_ZERO_EXTEND:
6384 return RISCVISD::VZEXT_VL;
6385 case ISD::VP_FP_TO_SINT:
6387 case ISD::VP_FP_TO_UINT:
6389 case ISD::FMINNUM:
6390 case ISD::VP_FMINNUM:
6391 return RISCVISD::VFMIN_VL;
6392 case ISD::FMAXNUM:
6393 case ISD::VP_FMAXNUM:
6394 return RISCVISD::VFMAX_VL;
6395 case ISD::LRINT:
6396 case ISD::VP_LRINT:
6397 case ISD::LLRINT:
6398 case ISD::VP_LLRINT:
6400 }
6401 // clang-format on
6402#undef OP_CASE
6403#undef VP_CASE
6404}
6405
6406/// Return true if a RISC-V target specified op has a passthru operand.
6407static bool hasPassthruOp(unsigned Opcode) {
6408 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6410 "not a RISC-V target specific op");
6411 static_assert(
6414 "adding target specific op should update this function");
6415 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6416 return true;
6417 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6418 return true;
6419 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6420 return true;
6421 if (Opcode == RISCVISD::SETCC_VL)
6422 return true;
6423 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6424 return true;
6425 if (Opcode == RISCVISD::VMERGE_VL)
6426 return true;
6427 return false;
6428}
6429
6430/// Return true if a RISC-V target specified op has a mask operand.
6431static bool hasMaskOp(unsigned Opcode) {
6432 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6434 "not a RISC-V target specific op");
6435 static_assert(
6438 "adding target specific op should update this function");
6439 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6440 return true;
6441 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6442 return true;
6443 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6445 return true;
6446 return false;
6447}
6448
6450 const RISCVSubtarget &Subtarget) {
6451 if (Op.getValueType() == MVT::nxv32f16 &&
6452 (Subtarget.hasVInstructionsF16Minimal() &&
6453 !Subtarget.hasVInstructionsF16()))
6454 return true;
6455 if (Op.getValueType() == MVT::nxv32bf16)
6456 return true;
6457 return false;
6458}
6459
6461 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6462 SDLoc DL(Op);
6463
6466
6467 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6468 if (!Op.getOperand(j).getValueType().isVector()) {
6469 LoOperands[j] = Op.getOperand(j);
6470 HiOperands[j] = Op.getOperand(j);
6471 continue;
6472 }
6473 std::tie(LoOperands[j], HiOperands[j]) =
6474 DAG.SplitVector(Op.getOperand(j), DL);
6475 }
6476
6477 SDValue LoRes =
6478 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6479 SDValue HiRes =
6480 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6481
6482 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6483}
6484
6486 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6487 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6488 SDLoc DL(Op);
6489
6492
6493 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6494 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6495 std::tie(LoOperands[j], HiOperands[j]) =
6496 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6497 continue;
6498 }
6499 if (!Op.getOperand(j).getValueType().isVector()) {
6500 LoOperands[j] = Op.getOperand(j);
6501 HiOperands[j] = Op.getOperand(j);
6502 continue;
6503 }
6504 std::tie(LoOperands[j], HiOperands[j]) =
6505 DAG.SplitVector(Op.getOperand(j), DL);
6506 }
6507
6508 SDValue LoRes =
6509 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6510 SDValue HiRes =
6511 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6512
6513 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6514}
6515
6517 SDLoc DL(Op);
6518
6519 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6520 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6521 auto [EVLLo, EVLHi] =
6522 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6523
6524 SDValue ResLo =
6525 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6526 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6527 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6528 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6529}
6530
6532
6533 assert(Op->isStrictFPOpcode());
6534
6535 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6536
6537 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6538 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6539
6540 SDLoc DL(Op);
6541
6544
6545 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6546 if (!Op.getOperand(j).getValueType().isVector()) {
6547 LoOperands[j] = Op.getOperand(j);
6548 HiOperands[j] = Op.getOperand(j);
6549 continue;
6550 }
6551 std::tie(LoOperands[j], HiOperands[j]) =
6552 DAG.SplitVector(Op.getOperand(j), DL);
6553 }
6554
6555 SDValue LoRes =
6556 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6557 HiOperands[0] = LoRes.getValue(1);
6558 SDValue HiRes =
6559 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6560
6561 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6562 LoRes.getValue(0), HiRes.getValue(0));
6563 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6564}
6565
6567 SelectionDAG &DAG) const {
6568 switch (Op.getOpcode()) {
6569 default:
6570 report_fatal_error("unimplemented operand");
6571 case ISD::ATOMIC_FENCE:
6572 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6573 case ISD::GlobalAddress:
6574 return lowerGlobalAddress(Op, DAG);
6575 case ISD::BlockAddress:
6576 return lowerBlockAddress(Op, DAG);
6577 case ISD::ConstantPool:
6578 return lowerConstantPool(Op, DAG);
6579 case ISD::JumpTable:
6580 return lowerJumpTable(Op, DAG);
6582 return lowerGlobalTLSAddress(Op, DAG);
6583 case ISD::Constant:
6584 return lowerConstant(Op, DAG, Subtarget);
6585 case ISD::ConstantFP:
6586 return lowerConstantFP(Op, DAG);
6587 case ISD::SELECT:
6588 return lowerSELECT(Op, DAG);
6589 case ISD::BRCOND:
6590 return lowerBRCOND(Op, DAG);
6591 case ISD::VASTART:
6592 return lowerVASTART(Op, DAG);
6593 case ISD::FRAMEADDR:
6594 return lowerFRAMEADDR(Op, DAG);
6595 case ISD::RETURNADDR:
6596 return lowerRETURNADDR(Op, DAG);
6597 case ISD::SHL_PARTS:
6598 return lowerShiftLeftParts(Op, DAG);
6599 case ISD::SRA_PARTS:
6600 return lowerShiftRightParts(Op, DAG, true);
6601 case ISD::SRL_PARTS:
6602 return lowerShiftRightParts(Op, DAG, false);
6603 case ISD::ROTL:
6604 case ISD::ROTR:
6605 if (Op.getValueType().isFixedLengthVector()) {
6606 assert(Subtarget.hasStdExtZvkb());
6607 return lowerToScalableOp(Op, DAG);
6608 }
6609 assert(Subtarget.hasVendorXTHeadBb() &&
6610 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6611 "Unexpected custom legalization");
6612 // XTHeadBb only supports rotate by constant.
6613 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6614 return SDValue();
6615 return Op;
6616 case ISD::BITCAST: {
6617 SDLoc DL(Op);
6618 EVT VT = Op.getValueType();
6619 SDValue Op0 = Op.getOperand(0);
6620 EVT Op0VT = Op0.getValueType();
6621 MVT XLenVT = Subtarget.getXLenVT();
6622 if (Op0VT == MVT::i16 &&
6623 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6624 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6625 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6626 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6627 }
6628 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6629 Subtarget.hasStdExtFOrZfinx()) {
6630 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6631 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6632 }
6633 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6634 Subtarget.hasStdExtDOrZdinx()) {
6635 SDValue Lo, Hi;
6636 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6637 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6638 }
6639
6640 // Consider other scalar<->scalar casts as legal if the types are legal.
6641 // Otherwise expand them.
6642 if (!VT.isVector() && !Op0VT.isVector()) {
6643 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6644 return Op;
6645 return SDValue();
6646 }
6647
6648 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6649 "Unexpected types");
6650
6651 if (VT.isFixedLengthVector()) {
6652 // We can handle fixed length vector bitcasts with a simple replacement
6653 // in isel.
6654 if (Op0VT.isFixedLengthVector())
6655 return Op;
6656 // When bitcasting from scalar to fixed-length vector, insert the scalar
6657 // into a one-element vector of the result type, and perform a vector
6658 // bitcast.
6659 if (!Op0VT.isVector()) {
6660 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6661 if (!isTypeLegal(BVT))
6662 return SDValue();
6663 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6664 DAG.getUNDEF(BVT), Op0,
6665 DAG.getVectorIdxConstant(0, DL)));
6666 }
6667 return SDValue();
6668 }
6669 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6670 // thus: bitcast the vector to a one-element vector type whose element type
6671 // is the same as the result type, and extract the first element.
6672 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6673 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6674 if (!isTypeLegal(BVT))
6675 return SDValue();
6676 SDValue BVec = DAG.getBitcast(BVT, Op0);
6677 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6678 DAG.getVectorIdxConstant(0, DL));
6679 }
6680 return SDValue();
6681 }
6683 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6685 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6687 return LowerINTRINSIC_VOID(Op, DAG);
6688 case ISD::IS_FPCLASS:
6689 return LowerIS_FPCLASS(Op, DAG);
6690 case ISD::BITREVERSE: {
6691 MVT VT = Op.getSimpleValueType();
6692 if (VT.isFixedLengthVector()) {
6693 assert(Subtarget.hasStdExtZvbb());
6694 return lowerToScalableOp(Op, DAG);
6695 }
6696 SDLoc DL(Op);
6697 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6698 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6699 // Expand bitreverse to a bswap(rev8) followed by brev8.
6700 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6701 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6702 }
6703 case ISD::TRUNCATE:
6706 // Only custom-lower vector truncates
6707 if (!Op.getSimpleValueType().isVector())
6708 return Op;
6709 return lowerVectorTruncLike(Op, DAG);
6710 case ISD::ANY_EXTEND:
6711 case ISD::ZERO_EXTEND:
6712 if (Op.getOperand(0).getValueType().isVector() &&
6713 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6714 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6715 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6716 case ISD::SIGN_EXTEND:
6717 if (Op.getOperand(0).getValueType().isVector() &&
6718 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6719 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6720 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6722 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6724 return lowerINSERT_VECTOR_ELT(Op, DAG);
6726 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6727 case ISD::SCALAR_TO_VECTOR: {
6728 MVT VT = Op.getSimpleValueType();
6729 SDLoc DL(Op);
6730 SDValue Scalar = Op.getOperand(0);
6731 if (VT.getVectorElementType() == MVT::i1) {
6732 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6733 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6734 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6735 }
6736 MVT ContainerVT = VT;
6737 if (VT.isFixedLengthVector())
6738 ContainerVT = getContainerForFixedLengthVector(VT);
6739 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6740
6741 SDValue V;
6742 if (VT.isFloatingPoint()) {
6743 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6744 DAG.getUNDEF(ContainerVT), Scalar, VL);
6745 } else {
6746 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6747 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6748 DAG.getUNDEF(ContainerVT), Scalar, VL);
6749 }
6750 if (VT.isFixedLengthVector())
6751 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6752 return V;
6753 }
6754 case ISD::VSCALE: {
6755 MVT XLenVT = Subtarget.getXLenVT();
6756 MVT VT = Op.getSimpleValueType();
6757 SDLoc DL(Op);
6758 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6759 // We define our scalable vector types for lmul=1 to use a 64 bit known
6760 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6761 // vscale as VLENB / 8.
6762 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6763 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6764 report_fatal_error("Support for VLEN==32 is incomplete.");
6765 // We assume VLENB is a multiple of 8. We manually choose the best shift
6766 // here because SimplifyDemandedBits isn't always able to simplify it.
6767 uint64_t Val = Op.getConstantOperandVal(0);
6768 if (isPowerOf2_64(Val)) {
6769 uint64_t Log2 = Log2_64(Val);
6770 if (Log2 < 3)
6771 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6772 DAG.getConstant(3 - Log2, DL, VT));
6773 else if (Log2 > 3)
6774 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6775 DAG.getConstant(Log2 - 3, DL, XLenVT));
6776 } else if ((Val % 8) == 0) {
6777 // If the multiplier is a multiple of 8, scale it down to avoid needing
6778 // to shift the VLENB value.
6779 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6780 DAG.getConstant(Val / 8, DL, XLenVT));
6781 } else {
6782 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6783 DAG.getConstant(3, DL, XLenVT));
6784 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6785 DAG.getConstant(Val, DL, XLenVT));
6786 }
6787 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6788 }
6789 case ISD::FPOWI: {
6790 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6791 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6792 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6793 Op.getOperand(1).getValueType() == MVT::i32) {
6794 SDLoc DL(Op);
6795 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6796 SDValue Powi =
6797 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6798 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6799 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6800 }
6801 return SDValue();
6802 }
6803 case ISD::FMAXIMUM:
6804 case ISD::FMINIMUM:
6805 if (isPromotedOpNeedingSplit(Op, Subtarget))
6806 return SplitVectorOp(Op, DAG);
6807 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6808 case ISD::FP_EXTEND:
6809 case ISD::FP_ROUND:
6810 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6813 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6814 case ISD::SINT_TO_FP:
6815 case ISD::UINT_TO_FP:
6816 if (Op.getValueType().isVector() &&
6817 ((Op.getValueType().getScalarType() == MVT::f16 &&
6818 (Subtarget.hasVInstructionsF16Minimal() &&
6819 !Subtarget.hasVInstructionsF16())) ||
6820 Op.getValueType().getScalarType() == MVT::bf16)) {
6821 if (isPromotedOpNeedingSplit(Op, Subtarget))
6822 return SplitVectorOp(Op, DAG);
6823 // int -> f32
6824 SDLoc DL(Op);
6825 MVT NVT =
6826 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6827 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6828 // f32 -> [b]f16
6829 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6830 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6831 }
6832 [[fallthrough]];
6833 case ISD::FP_TO_SINT:
6834 case ISD::FP_TO_UINT:
6835 if (SDValue Op1 = Op.getOperand(0);
6836 Op1.getValueType().isVector() &&
6837 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6838 (Subtarget.hasVInstructionsF16Minimal() &&
6839 !Subtarget.hasVInstructionsF16())) ||
6840 Op1.getValueType().getScalarType() == MVT::bf16)) {
6841 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6842 return SplitVectorOp(Op, DAG);
6843 // [b]f16 -> f32
6844 SDLoc DL(Op);
6845 MVT NVT = MVT::getVectorVT(MVT::f32,
6846 Op1.getValueType().getVectorElementCount());
6847 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6848 // f32 -> int
6849 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6850 }
6851 [[fallthrough]];
6856 // RVV can only do fp<->int conversions to types half/double the size as
6857 // the source. We custom-lower any conversions that do two hops into
6858 // sequences.
6859 MVT VT = Op.getSimpleValueType();
6860 if (VT.isScalarInteger())
6861 return lowerFP_TO_INT(Op, DAG, Subtarget);
6862 bool IsStrict = Op->isStrictFPOpcode();
6863 SDValue Src = Op.getOperand(0 + IsStrict);
6864 MVT SrcVT = Src.getSimpleValueType();
6865 if (SrcVT.isScalarInteger())
6866 return lowerINT_TO_FP(Op, DAG, Subtarget);
6867 if (!VT.isVector())
6868 return Op;
6869 SDLoc DL(Op);
6870 MVT EltVT = VT.getVectorElementType();
6871 MVT SrcEltVT = SrcVT.getVectorElementType();
6872 unsigned EltSize = EltVT.getSizeInBits();
6873 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6874 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6875 "Unexpected vector element types");
6876
6877 bool IsInt2FP = SrcEltVT.isInteger();
6878 // Widening conversions
6879 if (EltSize > (2 * SrcEltSize)) {
6880 if (IsInt2FP) {
6881 // Do a regular integer sign/zero extension then convert to float.
6882 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6884 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6885 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6888 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6889 if (IsStrict)
6890 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6891 Op.getOperand(0), Ext);
6892 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6893 }
6894 // FP2Int
6895 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6896 // Do one doubling fp_extend then complete the operation by converting
6897 // to int.
6898 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6899 if (IsStrict) {
6900 auto [FExt, Chain] =
6901 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6902 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6903 }
6904 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6905 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6906 }
6907
6908 // Narrowing conversions
6909 if (SrcEltSize > (2 * EltSize)) {
6910 if (IsInt2FP) {
6911 // One narrowing int_to_fp, then an fp_round.
6912 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6913 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6914 if (IsStrict) {
6915 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6916 DAG.getVTList(InterimFVT, MVT::Other),
6917 Op.getOperand(0), Src);
6918 SDValue Chain = Int2FP.getValue(1);
6919 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6920 }
6921 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6922 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6923 }
6924 // FP2Int
6925 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6926 // representable by the integer, the result is poison.
6927 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6929 if (IsStrict) {
6930 SDValue FP2Int =
6931 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6932 Op.getOperand(0), Src);
6933 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6934 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6935 }
6936 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6937 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6938 }
6939
6940 // Scalable vectors can exit here. Patterns will handle equally-sized
6941 // conversions halving/doubling ones.
6942 if (!VT.isFixedLengthVector())
6943 return Op;
6944
6945 // For fixed-length vectors we lower to a custom "VL" node.
6946 unsigned RVVOpc = 0;
6947 switch (Op.getOpcode()) {
6948 default:
6949 llvm_unreachable("Impossible opcode");
6950 case ISD::FP_TO_SINT:
6952 break;
6953 case ISD::FP_TO_UINT:
6955 break;
6956 case ISD::SINT_TO_FP:
6957 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6958 break;
6959 case ISD::UINT_TO_FP:
6960 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6961 break;
6964 break;
6967 break;
6970 break;
6973 break;
6974 }
6975
6976 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6977 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6978 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6979 "Expected same element count");
6980
6981 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6982
6983 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6984 if (IsStrict) {
6985 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6986 Op.getOperand(0), Src, Mask, VL);
6987 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6988 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6989 }
6990 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6991 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6992 }
6995 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6996 case ISD::FP_TO_BF16: {
6997 // Custom lower to ensure the libcall return is passed in an FPR on hard
6998 // float ABIs.
6999 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7000 SDLoc DL(Op);
7001 MakeLibCallOptions CallOptions;
7002 RTLIB::Libcall LC =
7003 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7004 SDValue Res =
7005 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7006 if (Subtarget.is64Bit())
7007 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7008 return DAG.getBitcast(MVT::i32, Res);
7009 }
7010 case ISD::BF16_TO_FP: {
7011 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7012 MVT VT = Op.getSimpleValueType();
7013 SDLoc DL(Op);
7014 Op = DAG.getNode(
7015 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7016 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7017 SDValue Res = Subtarget.is64Bit()
7018 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7019 : DAG.getBitcast(MVT::f32, Op);
7020 // fp_extend if the target VT is bigger than f32.
7021 if (VT != MVT::f32)
7022 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7023 return Res;
7024 }
7026 case ISD::FP_TO_FP16: {
7027 // Custom lower to ensure the libcall return is passed in an FPR on hard
7028 // float ABIs.
7029 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7030 SDLoc DL(Op);
7031 MakeLibCallOptions CallOptions;
7032 bool IsStrict = Op->isStrictFPOpcode();
7033 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7034 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7035 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7036 SDValue Res;
7037 std::tie(Res, Chain) =
7038 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7039 if (Subtarget.is64Bit())
7040 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7041 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7042 if (IsStrict)
7043 return DAG.getMergeValues({Result, Chain}, DL);
7044 return Result;
7045 }
7047 case ISD::FP16_TO_FP: {
7048 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7049 // float ABIs.
7050 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7051 SDLoc DL(Op);
7052 MakeLibCallOptions CallOptions;
7053 bool IsStrict = Op->isStrictFPOpcode();
7054 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7055 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7056 SDValue Arg = Subtarget.is64Bit()
7057 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7058 : DAG.getBitcast(MVT::f32, Op0);
7059 SDValue Res;
7060 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7061 CallOptions, DL, Chain);
7062 if (IsStrict)
7063 return DAG.getMergeValues({Res, Chain}, DL);
7064 return Res;
7065 }
7066 case ISD::FTRUNC:
7067 case ISD::FCEIL:
7068 case ISD::FFLOOR:
7069 case ISD::FNEARBYINT:
7070 case ISD::FRINT:
7071 case ISD::FROUND:
7072 case ISD::FROUNDEVEN:
7073 if (isPromotedOpNeedingSplit(Op, Subtarget))
7074 return SplitVectorOp(Op, DAG);
7075 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7076 case ISD::LRINT:
7077 case ISD::LLRINT:
7078 if (Op.getValueType().isVector())
7079 return lowerVectorXRINT(Op, DAG, Subtarget);
7080 [[fallthrough]];
7081 case ISD::LROUND:
7082 case ISD::LLROUND: {
7083 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7084 "Unexpected custom legalisation");
7085 SDLoc DL(Op);
7086 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7087 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7088 }
7089 case ISD::STRICT_LRINT:
7090 case ISD::STRICT_LLRINT:
7091 case ISD::STRICT_LROUND:
7092 case ISD::STRICT_LLROUND: {
7093 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7094 "Unexpected custom legalisation");
7095 SDLoc DL(Op);
7096 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7097 {Op.getOperand(0), Op.getOperand(1)});
7098 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7099 {Ext.getValue(1), Ext.getValue(0)});
7100 }
7101 case ISD::VECREDUCE_ADD:
7106 return lowerVECREDUCE(Op, DAG);
7107 case ISD::VECREDUCE_AND:
7108 case ISD::VECREDUCE_OR:
7109 case ISD::VECREDUCE_XOR:
7110 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7111 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7112 return lowerVECREDUCE(Op, DAG);
7119 return lowerFPVECREDUCE(Op, DAG);
7120 case ISD::VP_REDUCE_ADD:
7121 case ISD::VP_REDUCE_UMAX:
7122 case ISD::VP_REDUCE_SMAX:
7123 case ISD::VP_REDUCE_UMIN:
7124 case ISD::VP_REDUCE_SMIN:
7125 case ISD::VP_REDUCE_FADD:
7126 case ISD::VP_REDUCE_SEQ_FADD:
7127 case ISD::VP_REDUCE_FMIN:
7128 case ISD::VP_REDUCE_FMAX:
7129 case ISD::VP_REDUCE_FMINIMUM:
7130 case ISD::VP_REDUCE_FMAXIMUM:
7131 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7132 return SplitVectorReductionOp(Op, DAG);
7133 return lowerVPREDUCE(Op, DAG);
7134 case ISD::VP_REDUCE_AND:
7135 case ISD::VP_REDUCE_OR:
7136 case ISD::VP_REDUCE_XOR:
7137 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7138 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7139 return lowerVPREDUCE(Op, DAG);
7140 case ISD::VP_CTTZ_ELTS:
7141 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7142 return lowerVPCttzElements(Op, DAG);
7143 case ISD::UNDEF: {
7144 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7145 return convertFromScalableVector(Op.getSimpleValueType(),
7146 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7147 }
7149 return lowerINSERT_SUBVECTOR(Op, DAG);
7151 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7153 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7155 return lowerVECTOR_INTERLEAVE(Op, DAG);
7156 case ISD::STEP_VECTOR:
7157 return lowerSTEP_VECTOR(Op, DAG);
7159 return lowerVECTOR_REVERSE(Op, DAG);
7160 case ISD::VECTOR_SPLICE:
7161 return lowerVECTOR_SPLICE(Op, DAG);
7162 case ISD::BUILD_VECTOR:
7163 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7164 case ISD::SPLAT_VECTOR: {
7165 MVT VT = Op.getSimpleValueType();
7166 MVT EltVT = VT.getVectorElementType();
7167 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7168 EltVT == MVT::bf16) {
7169 SDLoc DL(Op);
7170 SDValue Elt;
7171 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7172 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7173 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7174 Op.getOperand(0));
7175 else
7176 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7177 MVT IVT = VT.changeVectorElementType(MVT::i16);
7178 return DAG.getNode(ISD::BITCAST, DL, VT,
7179 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7180 }
7181
7182 if (EltVT == MVT::i1)
7183 return lowerVectorMaskSplat(Op, DAG);
7184 return SDValue();
7185 }
7187 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7188 case ISD::CONCAT_VECTORS: {
7189 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7190 // better than going through the stack, as the default expansion does.
7191 SDLoc DL(Op);
7192 MVT VT = Op.getSimpleValueType();
7193 MVT ContainerVT = VT;
7194 if (VT.isFixedLengthVector())
7195 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7196
7197 // Recursively split concat_vectors with more than 2 operands:
7198 //
7199 // concat_vector op1, op2, op3, op4
7200 // ->
7201 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7202 //
7203 // This reduces the length of the chain of vslideups and allows us to
7204 // perform the vslideups at a smaller LMUL, limited to MF2.
7205 if (Op.getNumOperands() > 2 &&
7206 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7207 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7209 size_t HalfNumOps = Op.getNumOperands() / 2;
7210 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7211 Op->ops().take_front(HalfNumOps));
7212 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7213 Op->ops().drop_front(HalfNumOps));
7214 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7215 }
7216
7217 unsigned NumOpElts =
7218 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7219 SDValue Vec = DAG.getUNDEF(VT);
7220 for (const auto &OpIdx : enumerate(Op->ops())) {
7221 SDValue SubVec = OpIdx.value();
7222 // Don't insert undef subvectors.
7223 if (SubVec.isUndef())
7224 continue;
7225 Vec =
7226 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7227 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7228 }
7229 return Vec;
7230 }
7231 case ISD::LOAD: {
7232 auto *Load = cast<LoadSDNode>(Op);
7233 EVT VecTy = Load->getMemoryVT();
7234 // Handle normal vector tuple load.
7235 if (VecTy.isRISCVVectorTuple()) {
7236 SDLoc DL(Op);
7237 MVT XLenVT = Subtarget.getXLenVT();
7238 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7239 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7240 unsigned NumElts = Sz / (NF * 8);
7241 int Log2LMUL = Log2_64(NumElts) - 3;
7242
7243 auto Flag = SDNodeFlags();
7244 Flag.setNoUnsignedWrap(true);
7245 SDValue Ret = DAG.getUNDEF(VecTy);
7246 SDValue BasePtr = Load->getBasePtr();
7247 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7248 VROffset =
7249 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7250 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7251 SmallVector<SDValue, 8> OutChains;
7252
7253 // Load NF vector registers and combine them to a vector tuple.
7254 for (unsigned i = 0; i < NF; ++i) {
7255 SDValue LoadVal = DAG.getLoad(
7256 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7257 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7258 OutChains.push_back(LoadVal.getValue(1));
7259 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7260 DAG.getVectorIdxConstant(i, DL));
7261 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7262 }
7263 return DAG.getMergeValues(
7264 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7265 }
7266
7267 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7268 return V;
7269 if (Op.getValueType().isFixedLengthVector())
7270 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7271 return Op;
7272 }
7273 case ISD::STORE: {
7274 auto *Store = cast<StoreSDNode>(Op);
7275 SDValue StoredVal = Store->getValue();
7276 EVT VecTy = StoredVal.getValueType();
7277 // Handle normal vector tuple store.
7278 if (VecTy.isRISCVVectorTuple()) {
7279 SDLoc DL(Op);
7280 MVT XLenVT = Subtarget.getXLenVT();
7281 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7282 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7283 unsigned NumElts = Sz / (NF * 8);
7284 int Log2LMUL = Log2_64(NumElts) - 3;
7285
7286 auto Flag = SDNodeFlags();
7287 Flag.setNoUnsignedWrap(true);
7288 SDValue Ret;
7289 SDValue Chain = Store->getChain();
7290 SDValue BasePtr = Store->getBasePtr();
7291 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7292 VROffset =
7293 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7294 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7295
7296 // Extract subregisters in a vector tuple and store them individually.
7297 for (unsigned i = 0; i < NF; ++i) {
7298 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7299 MVT::getScalableVectorVT(MVT::i8, NumElts),
7300 StoredVal, DAG.getVectorIdxConstant(i, DL));
7301 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7302 MachinePointerInfo(Store->getAddressSpace()),
7303 Store->getOriginalAlign(),
7304 Store->getMemOperand()->getFlags());
7305 Chain = Ret.getValue(0);
7306 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7307 }
7308 return Ret;
7309 }
7310
7311 if (auto V = expandUnalignedRVVStore(Op, DAG))
7312 return V;
7313 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7314 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7315 return Op;
7316 }
7317 case ISD::MLOAD:
7318 case ISD::VP_LOAD:
7319 return lowerMaskedLoad(Op, DAG);
7320 case ISD::MSTORE:
7321 case ISD::VP_STORE:
7322 return lowerMaskedStore(Op, DAG);
7324 return lowerVectorCompress(Op, DAG);
7325 case ISD::SELECT_CC: {
7326 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7327 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7328 // into separate SETCC+SELECT just like LegalizeDAG.
7329 SDValue Tmp1 = Op.getOperand(0);
7330 SDValue Tmp2 = Op.getOperand(1);
7331 SDValue True = Op.getOperand(2);
7332 SDValue False = Op.getOperand(3);
7333 EVT VT = Op.getValueType();
7334 SDValue CC = Op.getOperand(4);
7335 EVT CmpVT = Tmp1.getValueType();
7336 EVT CCVT =
7337 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7338 SDLoc DL(Op);
7339 SDValue Cond =
7340 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7341 return DAG.getSelect(DL, VT, Cond, True, False);
7342 }
7343 case ISD::SETCC: {
7344 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7345 if (OpVT.isScalarInteger()) {
7346 MVT VT = Op.getSimpleValueType();
7347 SDValue LHS = Op.getOperand(0);
7348 SDValue RHS = Op.getOperand(1);
7349 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7350 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7351 "Unexpected CondCode");
7352
7353 SDLoc DL(Op);
7354
7355 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7356 // convert this to the equivalent of (set(u)ge X, C+1) by using
7357 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7358 // in a register.
7359 if (isa<ConstantSDNode>(RHS)) {
7360 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7361 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7362 // If this is an unsigned compare and the constant is -1, incrementing
7363 // the constant would change behavior. The result should be false.
7364 if (CCVal == ISD::SETUGT && Imm == -1)
7365 return DAG.getConstant(0, DL, VT);
7366 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7367 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7368 SDValue SetCC = DAG.getSetCC(
7369 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7370 return DAG.getLogicalNOT(DL, SetCC, VT);
7371 }
7372 }
7373
7374 // Not a constant we could handle, swap the operands and condition code to
7375 // SETLT/SETULT.
7376 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7377 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7378 }
7379
7380 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7381 return SplitVectorOp(Op, DAG);
7382
7383 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7384 }
7385 case ISD::ADD:
7386 case ISD::SUB:
7387 case ISD::MUL:
7388 case ISD::MULHS:
7389 case ISD::MULHU:
7390 case ISD::AND:
7391 case ISD::OR:
7392 case ISD::XOR:
7393 case ISD::SDIV:
7394 case ISD::SREM:
7395 case ISD::UDIV:
7396 case ISD::UREM:
7397 case ISD::BSWAP:
7398 case ISD::CTPOP:
7399 return lowerToScalableOp(Op, DAG);
7400 case ISD::SHL:
7401 case ISD::SRA:
7402 case ISD::SRL:
7403 if (Op.getSimpleValueType().isFixedLengthVector())
7404 return lowerToScalableOp(Op, DAG);
7405 // This can be called for an i32 shift amount that needs to be promoted.
7406 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7407 "Unexpected custom legalisation");
7408 return SDValue();
7409 case ISD::FABS:
7410 case ISD::FNEG:
7411 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7412 return lowerFABSorFNEG(Op, DAG, Subtarget);
7413 [[fallthrough]];
7414 case ISD::FADD:
7415 case ISD::FSUB:
7416 case ISD::FMUL:
7417 case ISD::FDIV:
7418 case ISD::FSQRT:
7419 case ISD::FMA:
7420 case ISD::FMINNUM:
7421 case ISD::FMAXNUM:
7422 if (isPromotedOpNeedingSplit(Op, Subtarget))
7423 return SplitVectorOp(Op, DAG);
7424 [[fallthrough]];
7425 case ISD::AVGFLOORS:
7426 case ISD::AVGFLOORU:
7427 case ISD::AVGCEILS:
7428 case ISD::AVGCEILU:
7429 case ISD::SMIN:
7430 case ISD::SMAX:
7431 case ISD::UMIN:
7432 case ISD::UMAX:
7433 case ISD::UADDSAT:
7434 case ISD::USUBSAT:
7435 case ISD::SADDSAT:
7436 case ISD::SSUBSAT:
7437 return lowerToScalableOp(Op, DAG);
7438 case ISD::ABDS:
7439 case ISD::ABDU: {
7440 SDLoc dl(Op);
7441 EVT VT = Op->getValueType(0);
7442 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7443 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7444 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7445
7446 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7447 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7448 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7449 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7450 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7451 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7452 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7453 }
7454 case ISD::ABS:
7455 case ISD::VP_ABS:
7456 return lowerABS(Op, DAG);
7457 case ISD::CTLZ:
7459 case ISD::CTTZ:
7461 if (Subtarget.hasStdExtZvbb())
7462 return lowerToScalableOp(Op, DAG);
7463 assert(Op.getOpcode() != ISD::CTTZ);
7464 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7465 case ISD::VSELECT:
7466 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7467 case ISD::FCOPYSIGN:
7468 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7469 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7470 if (isPromotedOpNeedingSplit(Op, Subtarget))
7471 return SplitVectorOp(Op, DAG);
7472 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7473 case ISD::STRICT_FADD:
7474 case ISD::STRICT_FSUB:
7475 case ISD::STRICT_FMUL:
7476 case ISD::STRICT_FDIV:
7477 case ISD::STRICT_FSQRT:
7478 case ISD::STRICT_FMA:
7479 if (isPromotedOpNeedingSplit(Op, Subtarget))
7480 return SplitStrictFPVectorOp(Op, DAG);
7481 return lowerToScalableOp(Op, DAG);
7482 case ISD::STRICT_FSETCC:
7484 return lowerVectorStrictFSetcc(Op, DAG);
7485 case ISD::STRICT_FCEIL:
7486 case ISD::STRICT_FRINT:
7487 case ISD::STRICT_FFLOOR:
7488 case ISD::STRICT_FTRUNC:
7490 case ISD::STRICT_FROUND:
7492 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7493 case ISD::MGATHER:
7494 case ISD::VP_GATHER:
7495 return lowerMaskedGather(Op, DAG);
7496 case ISD::MSCATTER:
7497 case ISD::VP_SCATTER:
7498 return lowerMaskedScatter(Op, DAG);
7499 case ISD::GET_ROUNDING:
7500 return lowerGET_ROUNDING(Op, DAG);
7501 case ISD::SET_ROUNDING:
7502 return lowerSET_ROUNDING(Op, DAG);
7503 case ISD::EH_DWARF_CFA:
7504 return lowerEH_DWARF_CFA(Op, DAG);
7505 case ISD::VP_MERGE:
7506 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7507 return lowerVPMergeMask(Op, DAG);
7508 [[fallthrough]];
7509 case ISD::VP_SELECT:
7510 case ISD::VP_ADD:
7511 case ISD::VP_SUB:
7512 case ISD::VP_MUL:
7513 case ISD::VP_SDIV:
7514 case ISD::VP_UDIV:
7515 case ISD::VP_SREM:
7516 case ISD::VP_UREM:
7517 case ISD::VP_UADDSAT:
7518 case ISD::VP_USUBSAT:
7519 case ISD::VP_SADDSAT:
7520 case ISD::VP_SSUBSAT:
7521 case ISD::VP_LRINT:
7522 case ISD::VP_LLRINT:
7523 return lowerVPOp(Op, DAG);
7524 case ISD::VP_AND:
7525 case ISD::VP_OR:
7526 case ISD::VP_XOR:
7527 return lowerLogicVPOp(Op, DAG);
7528 case ISD::VP_FADD:
7529 case ISD::VP_FSUB:
7530 case ISD::VP_FMUL:
7531 case ISD::VP_FDIV:
7532 case ISD::VP_FNEG:
7533 case ISD::VP_FABS:
7534 case ISD::VP_SQRT:
7535 case ISD::VP_FMA:
7536 case ISD::VP_FMINNUM:
7537 case ISD::VP_FMAXNUM:
7538 case ISD::VP_FCOPYSIGN:
7539 if (isPromotedOpNeedingSplit(Op, Subtarget))
7540 return SplitVPOp(Op, DAG);
7541 [[fallthrough]];
7542 case ISD::VP_SRA:
7543 case ISD::VP_SRL:
7544 case ISD::VP_SHL:
7545 return lowerVPOp(Op, DAG);
7546 case ISD::VP_IS_FPCLASS:
7547 return LowerIS_FPCLASS(Op, DAG);
7548 case ISD::VP_SIGN_EXTEND:
7549 case ISD::VP_ZERO_EXTEND:
7550 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7551 return lowerVPExtMaskOp(Op, DAG);
7552 return lowerVPOp(Op, DAG);
7553 case ISD::VP_TRUNCATE:
7554 return lowerVectorTruncLike(Op, DAG);
7555 case ISD::VP_FP_EXTEND:
7556 case ISD::VP_FP_ROUND:
7557 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7558 case ISD::VP_SINT_TO_FP:
7559 case ISD::VP_UINT_TO_FP:
7560 if (Op.getValueType().isVector() &&
7561 ((Op.getValueType().getScalarType() == MVT::f16 &&
7562 (Subtarget.hasVInstructionsF16Minimal() &&
7563 !Subtarget.hasVInstructionsF16())) ||
7564 Op.getValueType().getScalarType() == MVT::bf16)) {
7565 if (isPromotedOpNeedingSplit(Op, Subtarget))
7566 return SplitVectorOp(Op, DAG);
7567 // int -> f32
7568 SDLoc DL(Op);
7569 MVT NVT =
7570 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7571 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7572 // f32 -> [b]f16
7573 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7574 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7575 }
7576 [[fallthrough]];
7577 case ISD::VP_FP_TO_SINT:
7578 case ISD::VP_FP_TO_UINT:
7579 if (SDValue Op1 = Op.getOperand(0);
7580 Op1.getValueType().isVector() &&
7581 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7582 (Subtarget.hasVInstructionsF16Minimal() &&
7583 !Subtarget.hasVInstructionsF16())) ||
7584 Op1.getValueType().getScalarType() == MVT::bf16)) {
7585 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7586 return SplitVectorOp(Op, DAG);
7587 // [b]f16 -> f32
7588 SDLoc DL(Op);
7589 MVT NVT = MVT::getVectorVT(MVT::f32,
7590 Op1.getValueType().getVectorElementCount());
7591 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7592 // f32 -> int
7593 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7594 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7595 }
7596 return lowerVPFPIntConvOp(Op, DAG);
7597 case ISD::VP_SETCC:
7598 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7599 return SplitVPOp(Op, DAG);
7600 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7601 return lowerVPSetCCMaskOp(Op, DAG);
7602 [[fallthrough]];
7603 case ISD::VP_SMIN:
7604 case ISD::VP_SMAX:
7605 case ISD::VP_UMIN:
7606 case ISD::VP_UMAX:
7607 case ISD::VP_BITREVERSE:
7608 case ISD::VP_BSWAP:
7609 return lowerVPOp(Op, DAG);
7610 case ISD::VP_CTLZ:
7611 case ISD::VP_CTLZ_ZERO_UNDEF:
7612 if (Subtarget.hasStdExtZvbb())
7613 return lowerVPOp(Op, DAG);
7614 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7615 case ISD::VP_CTTZ:
7616 case ISD::VP_CTTZ_ZERO_UNDEF:
7617 if (Subtarget.hasStdExtZvbb())
7618 return lowerVPOp(Op, DAG);
7619 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7620 case ISD::VP_CTPOP:
7621 return lowerVPOp(Op, DAG);
7622 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7623 return lowerVPStridedLoad(Op, DAG);
7624 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7625 return lowerVPStridedStore(Op, DAG);
7626 case ISD::VP_FCEIL:
7627 case ISD::VP_FFLOOR:
7628 case ISD::VP_FRINT:
7629 case ISD::VP_FNEARBYINT:
7630 case ISD::VP_FROUND:
7631 case ISD::VP_FROUNDEVEN:
7632 case ISD::VP_FROUNDTOZERO:
7633 if (isPromotedOpNeedingSplit(Op, Subtarget))
7634 return SplitVPOp(Op, DAG);
7635 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7636 case ISD::VP_FMAXIMUM:
7637 case ISD::VP_FMINIMUM:
7638 if (isPromotedOpNeedingSplit(Op, Subtarget))
7639 return SplitVPOp(Op, DAG);
7640 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7641 case ISD::EXPERIMENTAL_VP_SPLICE:
7642 return lowerVPSpliceExperimental(Op, DAG);
7643 case ISD::EXPERIMENTAL_VP_REVERSE:
7644 return lowerVPReverseExperimental(Op, DAG);
7645 case ISD::EXPERIMENTAL_VP_SPLAT:
7646 return lowerVPSplatExperimental(Op, DAG);
7647 case ISD::CLEAR_CACHE: {
7648 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7649 "llvm.clear_cache only needs custom lower on Linux targets");
7650 SDLoc DL(Op);
7651 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7652 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7653 Op.getOperand(2), Flags, DL);
7654 }
7656 return lowerINIT_TRAMPOLINE(Op, DAG);
7658 return lowerADJUST_TRAMPOLINE(Op, DAG);
7659 }
7660}
7661
7662SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7663 SDValue Start, SDValue End,
7664 SDValue Flags, SDLoc DL) const {
7665 MakeLibCallOptions CallOptions;
7666 std::pair<SDValue, SDValue> CallResult =
7667 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7668 {Start, End, Flags}, CallOptions, DL, InChain);
7669
7670 // This function returns void so only the out chain matters.
7671 return CallResult.second;
7672}
7673
7674SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7675 SelectionDAG &DAG) const {
7676 if (!Subtarget.is64Bit())
7677 llvm::report_fatal_error("Trampolines only implemented for RV64");
7678
7679 // Create an MCCodeEmitter to encode instructions.
7681 assert(TLO);
7682 MCContext &MCCtx = TLO->getContext();
7683
7684 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7685 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7686
7687 SDValue Root = Op.getOperand(0);
7688 SDValue Trmp = Op.getOperand(1); // trampoline
7689 SDLoc dl(Op);
7690
7691 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7692
7693 // We store in the trampoline buffer the following instructions and data.
7694 // Offset:
7695 // 0: auipc t2, 0
7696 // 4: ld t0, 24(t2)
7697 // 8: ld t2, 16(t2)
7698 // 12: jalr t0
7699 // 16: <StaticChainOffset>
7700 // 24: <FunctionAddressOffset>
7701 // 32:
7702
7703 constexpr unsigned StaticChainOffset = 16;
7704 constexpr unsigned FunctionAddressOffset = 24;
7705
7707 assert(STI);
7708 auto GetEncoding = [&](const MCInst &MC) {
7711 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7712 uint32_t Encoding = support::endian::read32le(CB.data());
7713 return Encoding;
7714 };
7715
7716 SDValue OutChains[6];
7717
7718 uint32_t Encodings[] = {
7719 // auipc t2, 0
7720 // Loads the current PC into t2.
7721 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7722 // ld t0, 24(t2)
7723 // Loads the function address into t0. Note that we are using offsets
7724 // pc-relative to the first instruction of the trampoline.
7725 GetEncoding(
7726 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7727 FunctionAddressOffset)),
7728 // ld t2, 16(t2)
7729 // Load the value of the static chain.
7730 GetEncoding(
7731 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7732 StaticChainOffset)),
7733 // jalr t0
7734 // Jump to the function.
7735 GetEncoding(MCInstBuilder(RISCV::JALR)
7736 .addReg(RISCV::X0)
7737 .addReg(RISCV::X5)
7738 .addImm(0))};
7739
7740 // Store encoded instructions.
7741 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7742 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7743 DAG.getConstant(Idx * 4, dl, MVT::i64))
7744 : Trmp;
7745 OutChains[Idx] = DAG.getTruncStore(
7746 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7747 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7748 }
7749
7750 // Now store the variable part of the trampoline.
7751 SDValue FunctionAddress = Op.getOperand(2);
7752 SDValue StaticChain = Op.getOperand(3);
7753
7754 // Store the given static chain and function pointer in the trampoline buffer.
7755 struct OffsetValuePair {
7756 const unsigned Offset;
7757 const SDValue Value;
7758 SDValue Addr = SDValue(); // Used to cache the address.
7759 } OffsetValues[] = {
7760 {StaticChainOffset, StaticChain},
7761 {FunctionAddressOffset, FunctionAddress},
7762 };
7763 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7764 SDValue Addr =
7765 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7766 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7767 OffsetValue.Addr = Addr;
7768 OutChains[Idx + 4] =
7769 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7770 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7771 }
7772
7773 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7774
7775 // The end of instructions of trampoline is the same as the static chain
7776 // address that we computed earlier.
7777 SDValue EndOfTrmp = OffsetValues[0].Addr;
7778
7779 // Call clear cache on the trampoline instructions.
7780 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7781 Trmp, EndOfTrmp);
7782
7783 return Chain;
7784}
7785
7786SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7787 SelectionDAG &DAG) const {
7788 if (!Subtarget.is64Bit())
7789 llvm::report_fatal_error("Trampolines only implemented for RV64");
7790
7791 return Op.getOperand(0);
7792}
7793
7795 SelectionDAG &DAG, unsigned Flags) {
7796 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7797}
7798
7800 SelectionDAG &DAG, unsigned Flags) {
7801 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7802 Flags);
7803}
7804
7806 SelectionDAG &DAG, unsigned Flags) {
7807 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7808 N->getOffset(), Flags);
7809}
7810
7812 SelectionDAG &DAG, unsigned Flags) {
7813 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7814}
7815
7817 EVT Ty, SelectionDAG &DAG) {
7819 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7820 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7821 return DAG.getLoad(
7822 Ty, DL, DAG.getEntryNode(), LC,
7824}
7825
7827 EVT Ty, SelectionDAG &DAG) {
7829 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7830 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7831 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7832 return DAG.getLoad(
7833 Ty, DL, DAG.getEntryNode(), LC,
7835}
7836
7837template <class NodeTy>
7838SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7839 bool IsLocal, bool IsExternWeak) const {
7840 SDLoc DL(N);
7841 EVT Ty = getPointerTy(DAG.getDataLayout());
7842
7843 // When HWASAN is used and tagging of global variables is enabled
7844 // they should be accessed via the GOT, since the tagged address of a global
7845 // is incompatible with existing code models. This also applies to non-pic
7846 // mode.
7847 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7848 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7849 if (IsLocal && !Subtarget.allowTaggedGlobals())
7850 // Use PC-relative addressing to access the symbol. This generates the
7851 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7852 // %pcrel_lo(auipc)).
7853 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7854
7855 // Use PC-relative addressing to access the GOT for this symbol, then load
7856 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7857 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7858 SDValue Load =
7859 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7865 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7866 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7867 return Load;
7868 }
7869
7870 switch (getTargetMachine().getCodeModel()) {
7871 default:
7872 report_fatal_error("Unsupported code model for lowering");
7873 case CodeModel::Small: {
7874 // Generate a sequence for accessing addresses within the first 2 GiB of
7875 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7876 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7877 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7878 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7879 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7880 }
7881 case CodeModel::Medium: {
7882 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7883 if (IsExternWeak) {
7884 // An extern weak symbol may be undefined, i.e. have value 0, which may
7885 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7886 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7887 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7888 SDValue Load =
7889 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7895 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7896 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7897 return Load;
7898 }
7899
7900 // Generate a sequence for accessing addresses within any 2GiB range within
7901 // the address space. This generates the pattern (PseudoLLA sym), which
7902 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7903 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7904 }
7905 case CodeModel::Large: {
7906 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
7907 return getLargeGlobalAddress(G, DL, Ty, DAG);
7908
7909 // Using pc-relative mode for other node type.
7910 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7911 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7912 }
7913 }
7914}
7915
7916SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7917 SelectionDAG &DAG) const {
7918 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7919 assert(N->getOffset() == 0 && "unexpected offset in global node");
7920 const GlobalValue *GV = N->getGlobal();
7921 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7922}
7923
7924SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7925 SelectionDAG &DAG) const {
7926 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7927
7928 return getAddr(N, DAG);
7929}
7930
7931SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7932 SelectionDAG &DAG) const {
7933 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7934
7935 return getAddr(N, DAG);
7936}
7937
7938SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7939 SelectionDAG &DAG) const {
7940 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7941
7942 return getAddr(N, DAG);
7943}
7944
7945SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7946 SelectionDAG &DAG,
7947 bool UseGOT) const {
7948 SDLoc DL(N);
7949 EVT Ty = getPointerTy(DAG.getDataLayout());
7950 const GlobalValue *GV = N->getGlobal();
7951 MVT XLenVT = Subtarget.getXLenVT();
7952
7953 if (UseGOT) {
7954 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7955 // load the address from the GOT and add the thread pointer. This generates
7956 // the pattern (PseudoLA_TLS_IE sym), which expands to
7957 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7958 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7959 SDValue Load =
7960 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7966 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7967 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7968
7969 // Add the thread pointer.
7970 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7971 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7972 }
7973
7974 // Generate a sequence for accessing the address relative to the thread
7975 // pointer, with the appropriate adjustment for the thread pointer offset.
7976 // This generates the pattern
7977 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7978 SDValue AddrHi =
7980 SDValue AddrAdd =
7982 SDValue AddrLo =
7984
7985 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7986 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7987 SDValue MNAdd =
7988 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7989 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7990}
7991
7992SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7993 SelectionDAG &DAG) const {
7994 SDLoc DL(N);
7995 EVT Ty = getPointerTy(DAG.getDataLayout());
7996 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7997 const GlobalValue *GV = N->getGlobal();
7998
7999 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8000 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8001 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8002 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8003 SDValue Load =
8004 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8005
8006 // Prepare argument list to generate call.
8008 ArgListEntry Entry;
8009 Entry.Node = Load;
8010 Entry.Ty = CallTy;
8011 Args.push_back(Entry);
8012
8013 // Setup call to __tls_get_addr.
8015 CLI.setDebugLoc(DL)
8016 .setChain(DAG.getEntryNode())
8017 .setLibCallee(CallingConv::C, CallTy,
8018 DAG.getExternalSymbol("__tls_get_addr", Ty),
8019 std::move(Args));
8020
8021 return LowerCallTo(CLI).first;
8022}
8023
8024SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8025 SelectionDAG &DAG) const {
8026 SDLoc DL(N);
8027 EVT Ty = getPointerTy(DAG.getDataLayout());
8028 const GlobalValue *GV = N->getGlobal();
8029
8030 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8031 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8032 //
8033 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8034 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8035 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8036 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8037 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8038 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8039}
8040
8041SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8042 SelectionDAG &DAG) const {
8043 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8044 assert(N->getOffset() == 0 && "unexpected offset in global node");
8045
8046 if (DAG.getTarget().useEmulatedTLS())
8047 return LowerToTLSEmulatedModel(N, DAG);
8048
8050
8053 report_fatal_error("In GHC calling convention TLS is not supported");
8054
8055 SDValue Addr;
8056 switch (Model) {
8058 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8059 break;
8061 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8062 break;
8065 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8066 : getDynamicTLSAddr(N, DAG);
8067 break;
8068 }
8069
8070 return Addr;
8071}
8072
8073// Return true if Val is equal to (setcc LHS, RHS, CC).
8074// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8075// Otherwise, return std::nullopt.
8076static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8077 ISD::CondCode CC, SDValue Val) {
8078 assert(Val->getOpcode() == ISD::SETCC);
8079 SDValue LHS2 = Val.getOperand(0);
8080 SDValue RHS2 = Val.getOperand(1);
8081 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8082
8083 if (LHS == LHS2 && RHS == RHS2) {
8084 if (CC == CC2)
8085 return true;
8086 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8087 return false;
8088 } else if (LHS == RHS2 && RHS == LHS2) {
8090 if (CC == CC2)
8091 return true;
8092 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8093 return false;
8094 }
8095
8096 return std::nullopt;
8097}
8098
8100 const RISCVSubtarget &Subtarget) {
8101 SDValue CondV = N->getOperand(0);
8102 SDValue TrueV = N->getOperand(1);
8103 SDValue FalseV = N->getOperand(2);
8104 MVT VT = N->getSimpleValueType(0);
8105 SDLoc DL(N);
8106
8107 if (!Subtarget.hasConditionalMoveFusion()) {
8108 // (select c, -1, y) -> -c | y
8109 if (isAllOnesConstant(TrueV)) {
8110 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8111 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8112 }
8113 // (select c, y, -1) -> (c-1) | y
8114 if (isAllOnesConstant(FalseV)) {
8115 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8116 DAG.getAllOnesConstant(DL, VT));
8117 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8118 }
8119
8120 // (select c, 0, y) -> (c-1) & y
8121 if (isNullConstant(TrueV)) {
8122 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8123 DAG.getAllOnesConstant(DL, VT));
8124 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8125 }
8126 // (select c, y, 0) -> -c & y
8127 if (isNullConstant(FalseV)) {
8128 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8129 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8130 }
8131 }
8132
8133 // select c, ~x, x --> xor -c, x
8134 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8135 const APInt &TrueVal = TrueV->getAsAPIntVal();
8136 const APInt &FalseVal = FalseV->getAsAPIntVal();
8137 if (~TrueVal == FalseVal) {
8138 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8139 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8140 }
8141 }
8142
8143 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8144 // when both truev and falsev are also setcc.
8145 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8146 FalseV.getOpcode() == ISD::SETCC) {
8147 SDValue LHS = CondV.getOperand(0);
8148 SDValue RHS = CondV.getOperand(1);
8149 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8150
8151 // (select x, x, y) -> x | y
8152 // (select !x, x, y) -> x & y
8153 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8154 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8155 DAG.getFreeze(FalseV));
8156 }
8157 // (select x, y, x) -> x & y
8158 // (select !x, y, x) -> x | y
8159 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8160 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8161 DAG.getFreeze(TrueV), FalseV);
8162 }
8163 }
8164
8165 return SDValue();
8166}
8167
8168// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8169// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8170// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8171// being `0` or `-1`. In such cases we can replace `select` with `and`.
8172// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8173// than `c0`?
8174static SDValue
8176 const RISCVSubtarget &Subtarget) {
8177 if (Subtarget.hasShortForwardBranchOpt())
8178 return SDValue();
8179
8180 unsigned SelOpNo = 0;
8181 SDValue Sel = BO->getOperand(0);
8182 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8183 SelOpNo = 1;
8184 Sel = BO->getOperand(1);
8185 }
8186
8187 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8188 return SDValue();
8189
8190 unsigned ConstSelOpNo = 1;
8191 unsigned OtherSelOpNo = 2;
8192 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8193 ConstSelOpNo = 2;
8194 OtherSelOpNo = 1;
8195 }
8196 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8197 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8198 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8199 return SDValue();
8200
8201 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8202 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8203 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8204 return SDValue();
8205
8206 SDLoc DL(Sel);
8207 EVT VT = BO->getValueType(0);
8208
8209 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8210 if (SelOpNo == 1)
8211 std::swap(NewConstOps[0], NewConstOps[1]);
8212
8213 SDValue NewConstOp =
8214 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8215 if (!NewConstOp)
8216 return SDValue();
8217
8218 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8219 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8220 return SDValue();
8221
8222 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8223 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8224 if (SelOpNo == 1)
8225 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8226 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8227
8228 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8229 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8230 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8231}
8232
8233SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8234 SDValue CondV = Op.getOperand(0);
8235 SDValue TrueV = Op.getOperand(1);
8236 SDValue FalseV = Op.getOperand(2);
8237 SDLoc DL(Op);
8238 MVT VT = Op.getSimpleValueType();
8239 MVT XLenVT = Subtarget.getXLenVT();
8240
8241 // Lower vector SELECTs to VSELECTs by splatting the condition.
8242 if (VT.isVector()) {
8243 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8244 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8245 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8246 }
8247
8248 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8249 // nodes to implement the SELECT. Performing the lowering here allows for
8250 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8251 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8252 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8253 VT.isScalarInteger()) {
8254 // (select c, t, 0) -> (czero_eqz t, c)
8255 if (isNullConstant(FalseV))
8256 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8257 // (select c, 0, f) -> (czero_nez f, c)
8258 if (isNullConstant(TrueV))
8259 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8260
8261 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8262 if (TrueV.getOpcode() == ISD::AND &&
8263 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8264 return DAG.getNode(
8265 ISD::OR, DL, VT, TrueV,
8266 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8267 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8268 if (FalseV.getOpcode() == ISD::AND &&
8269 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8270 return DAG.getNode(
8271 ISD::OR, DL, VT, FalseV,
8272 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8273
8274 // Try some other optimizations before falling back to generic lowering.
8275 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8276 return V;
8277
8278 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8279 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8280 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8281 const APInt &TrueVal = TrueV->getAsAPIntVal();
8282 const APInt &FalseVal = FalseV->getAsAPIntVal();
8283 const int TrueValCost = RISCVMatInt::getIntMatCost(
8284 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8285 const int FalseValCost = RISCVMatInt::getIntMatCost(
8286 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8287 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8288 SDValue LHSVal = DAG.getConstant(
8289 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8290 SDValue RHSVal =
8291 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8292 SDValue CMOV =
8294 DL, VT, LHSVal, CondV);
8295 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8296 }
8297
8298 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8299 // Unless we have the short forward branch optimization.
8300 if (!Subtarget.hasConditionalMoveFusion())
8301 return DAG.getNode(
8302 ISD::OR, DL, VT,
8303 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8304 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8305 }
8306
8307 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8308 return V;
8309
8310 if (Op.hasOneUse()) {
8311 unsigned UseOpc = Op->user_begin()->getOpcode();
8312 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8313 SDNode *BinOp = *Op->user_begin();
8314 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8315 DAG, Subtarget)) {
8316 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8317 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8318 // may return a constant node and cause crash in lowerSELECT.
8319 if (NewSel.getOpcode() == ISD::SELECT)
8320 return lowerSELECT(NewSel, DAG);
8321 return NewSel;
8322 }
8323 }
8324 }
8325
8326 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8327 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8328 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8329 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8330 if (FPTV && FPFV) {
8331 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8332 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8333 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8334 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8335 DAG.getConstant(1, DL, XLenVT));
8336 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8337 }
8338 }
8339
8340 // If the condition is not an integer SETCC which operates on XLenVT, we need
8341 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8342 // (select condv, truev, falsev)
8343 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8344 if (CondV.getOpcode() != ISD::SETCC ||
8345 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8346 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8347 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8348
8349 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8350
8351 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8352 }
8353
8354 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8355 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8356 // advantage of the integer compare+branch instructions. i.e.:
8357 // (select (setcc lhs, rhs, cc), truev, falsev)
8358 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8359 SDValue LHS = CondV.getOperand(0);
8360 SDValue RHS = CondV.getOperand(1);
8361 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8362
8363 // Special case for a select of 2 constants that have a diffence of 1.
8364 // Normally this is done by DAGCombine, but if the select is introduced by
8365 // type legalization or op legalization, we miss it. Restricting to SETLT
8366 // case for now because that is what signed saturating add/sub need.
8367 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8368 // but we would probably want to swap the true/false values if the condition
8369 // is SETGE/SETLE to avoid an XORI.
8370 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8371 CCVal == ISD::SETLT) {
8372 const APInt &TrueVal = TrueV->getAsAPIntVal();
8373 const APInt &FalseVal = FalseV->getAsAPIntVal();
8374 if (TrueVal - 1 == FalseVal)
8375 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8376 if (TrueVal + 1 == FalseVal)
8377 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8378 }
8379
8380 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8381 // 1 < x ? x : 1 -> 0 < x ? x : 1
8382 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8383 RHS == TrueV && LHS == FalseV) {
8384 LHS = DAG.getConstant(0, DL, VT);
8385 // 0 <u x is the same as x != 0.
8386 if (CCVal == ISD::SETULT) {
8387 std::swap(LHS, RHS);
8388 CCVal = ISD::SETNE;
8389 }
8390 }
8391
8392 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8393 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8394 RHS == FalseV) {
8395 RHS = DAG.getConstant(0, DL, VT);
8396 }
8397
8398 SDValue TargetCC = DAG.getCondCode(CCVal);
8399
8400 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8401 // (select (setcc lhs, rhs, CC), constant, falsev)
8402 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8403 std::swap(TrueV, FalseV);
8404 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8405 }
8406
8407 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8408 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8409}
8410
8411SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8412 SDValue CondV = Op.getOperand(1);
8413 SDLoc DL(Op);
8414 MVT XLenVT = Subtarget.getXLenVT();
8415
8416 if (CondV.getOpcode() == ISD::SETCC &&
8417 CondV.getOperand(0).getValueType() == XLenVT) {
8418 SDValue LHS = CondV.getOperand(0);
8419 SDValue RHS = CondV.getOperand(1);
8420 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8421
8422 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8423
8424 SDValue TargetCC = DAG.getCondCode(CCVal);
8425 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8426 LHS, RHS, TargetCC, Op.getOperand(2));
8427 }
8428
8429 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8430 CondV, DAG.getConstant(0, DL, XLenVT),
8431 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8432}
8433
8434SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8437
8438 SDLoc DL(Op);
8439 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8441
8442 // vastart just stores the address of the VarArgsFrameIndex slot into the
8443 // memory location argument.
8444 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8445 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8446 MachinePointerInfo(SV));
8447}
8448
8449SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8450 SelectionDAG &DAG) const {
8451 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8453 MachineFrameInfo &MFI = MF.getFrameInfo();
8454 MFI.setFrameAddressIsTaken(true);
8455 Register FrameReg = RI.getFrameRegister(MF);
8456 int XLenInBytes = Subtarget.getXLen() / 8;
8457
8458 EVT VT = Op.getValueType();
8459 SDLoc DL(Op);
8460 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8461 unsigned Depth = Op.getConstantOperandVal(0);
8462 while (Depth--) {
8463 int Offset = -(XLenInBytes * 2);
8464 SDValue Ptr = DAG.getNode(
8465 ISD::ADD, DL, VT, FrameAddr,
8467 FrameAddr =
8468 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8469 }
8470 return FrameAddr;
8471}
8472
8473SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8474 SelectionDAG &DAG) const {
8475 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8477 MachineFrameInfo &MFI = MF.getFrameInfo();
8478 MFI.setReturnAddressIsTaken(true);
8479 MVT XLenVT = Subtarget.getXLenVT();
8480 int XLenInBytes = Subtarget.getXLen() / 8;
8481
8483 return SDValue();
8484
8485 EVT VT = Op.getValueType();
8486 SDLoc DL(Op);
8487 unsigned Depth = Op.getConstantOperandVal(0);
8488 if (Depth) {
8489 int Off = -XLenInBytes;
8490 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8491 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8492 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8493 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8495 }
8496
8497 // Return the value of the return address register, marking it an implicit
8498 // live-in.
8499 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8500 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8501}
8502
8503SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8504 SelectionDAG &DAG) const {
8505 SDLoc DL(Op);
8506 SDValue Lo = Op.getOperand(0);
8507 SDValue Hi = Op.getOperand(1);
8508 SDValue Shamt = Op.getOperand(2);
8509 EVT VT = Lo.getValueType();
8510
8511 // if Shamt-XLEN < 0: // Shamt < XLEN
8512 // Lo = Lo << Shamt
8513 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8514 // else:
8515 // Lo = 0
8516 // Hi = Lo << (Shamt-XLEN)
8517
8518 SDValue Zero = DAG.getConstant(0, DL, VT);
8519 SDValue One = DAG.getConstant(1, DL, VT);
8520 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8521 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8522 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8523 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8524
8525 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8526 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8527 SDValue ShiftRightLo =
8528 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8529 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8530 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8531 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8532
8533 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8534
8535 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8536 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8537
8538 SDValue Parts[2] = {Lo, Hi};
8539 return DAG.getMergeValues(Parts, DL);
8540}
8541
8542SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8543 bool IsSRA) const {
8544 SDLoc DL(Op);
8545 SDValue Lo = Op.getOperand(0);
8546 SDValue Hi = Op.getOperand(1);
8547 SDValue Shamt = Op.getOperand(2);
8548 EVT VT = Lo.getValueType();
8549
8550 // SRA expansion:
8551 // if Shamt-XLEN < 0: // Shamt < XLEN
8552 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8553 // Hi = Hi >>s Shamt
8554 // else:
8555 // Lo = Hi >>s (Shamt-XLEN);
8556 // Hi = Hi >>s (XLEN-1)
8557 //
8558 // SRL expansion:
8559 // if Shamt-XLEN < 0: // Shamt < XLEN
8560 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8561 // Hi = Hi >>u Shamt
8562 // else:
8563 // Lo = Hi >>u (Shamt-XLEN);
8564 // Hi = 0;
8565
8566 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8567
8568 SDValue Zero = DAG.getConstant(0, DL, VT);
8569 SDValue One = DAG.getConstant(1, DL, VT);
8570 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8571 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8572 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8573 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8574
8575 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8576 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8577 SDValue ShiftLeftHi =
8578 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8579 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8580 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8581 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8582 SDValue HiFalse =
8583 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8584
8585 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8586
8587 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8588 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8589
8590 SDValue Parts[2] = {Lo, Hi};
8591 return DAG.getMergeValues(Parts, DL);
8592}
8593
8594// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8595// legal equivalently-sized i8 type, so we can use that as a go-between.
8596SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8597 SelectionDAG &DAG) const {
8598 SDLoc DL(Op);
8599 MVT VT = Op.getSimpleValueType();
8600 SDValue SplatVal = Op.getOperand(0);
8601 // All-zeros or all-ones splats are handled specially.
8602 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8603 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8604 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8605 }
8606 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8607 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8608 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8609 }
8610 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8611 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8612 DAG.getConstant(1, DL, SplatVal.getValueType()));
8613 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8614 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8615 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8616}
8617
8618// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8619// illegal (currently only vXi64 RV32).
8620// FIXME: We could also catch non-constant sign-extended i32 values and lower
8621// them to VMV_V_X_VL.
8622SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8623 SelectionDAG &DAG) const {
8624 SDLoc DL(Op);
8625 MVT VecVT = Op.getSimpleValueType();
8626 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8627 "Unexpected SPLAT_VECTOR_PARTS lowering");
8628
8629 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8630 SDValue Lo = Op.getOperand(0);
8631 SDValue Hi = Op.getOperand(1);
8632
8633 MVT ContainerVT = VecVT;
8634 if (VecVT.isFixedLengthVector())
8635 ContainerVT = getContainerForFixedLengthVector(VecVT);
8636
8637 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8638
8639 SDValue Res =
8640 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8641
8642 if (VecVT.isFixedLengthVector())
8643 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8644
8645 return Res;
8646}
8647
8648// Custom-lower extensions from mask vectors by using a vselect either with 1
8649// for zero/any-extension or -1 for sign-extension:
8650// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8651// Note that any-extension is lowered identically to zero-extension.
8652SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8653 int64_t ExtTrueVal) const {
8654 SDLoc DL(Op);
8655 MVT VecVT = Op.getSimpleValueType();
8656 SDValue Src = Op.getOperand(0);
8657 // Only custom-lower extensions from mask types
8658 assert(Src.getValueType().isVector() &&
8659 Src.getValueType().getVectorElementType() == MVT::i1);
8660
8661 if (VecVT.isScalableVector()) {
8662 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8663 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8664 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8665 }
8666
8667 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8668 MVT I1ContainerVT =
8669 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8670
8671 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8672
8673 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8674
8675 MVT XLenVT = Subtarget.getXLenVT();
8676 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8677 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8678
8679 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8680 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8681 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8682 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8683 SDValue Select =
8684 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8685 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8686
8687 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8688}
8689
8690SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8691 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8692 MVT ExtVT = Op.getSimpleValueType();
8693 // Only custom-lower extensions from fixed-length vector types.
8694 if (!ExtVT.isFixedLengthVector())
8695 return Op;
8696 MVT VT = Op.getOperand(0).getSimpleValueType();
8697 // Grab the canonical container type for the extended type. Infer the smaller
8698 // type from that to ensure the same number of vector elements, as we know
8699 // the LMUL will be sufficient to hold the smaller type.
8700 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8701 // Get the extended container type manually to ensure the same number of
8702 // vector elements between source and dest.
8703 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8704 ContainerExtVT.getVectorElementCount());
8705
8706 SDValue Op1 =
8707 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8708
8709 SDLoc DL(Op);
8710 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8711
8712 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8713
8714 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8715}
8716
8717// Custom-lower truncations from vectors to mask vectors by using a mask and a
8718// setcc operation:
8719// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8720SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8721 SelectionDAG &DAG) const {
8722 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8723 SDLoc DL(Op);
8724 EVT MaskVT = Op.getValueType();
8725 // Only expect to custom-lower truncations to mask types
8726 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8727 "Unexpected type for vector mask lowering");
8728 SDValue Src = Op.getOperand(0);
8729 MVT VecVT = Src.getSimpleValueType();
8730 SDValue Mask, VL;
8731 if (IsVPTrunc) {
8732 Mask = Op.getOperand(1);
8733 VL = Op.getOperand(2);
8734 }
8735 // If this is a fixed vector, we need to convert it to a scalable vector.
8736 MVT ContainerVT = VecVT;
8737
8738 if (VecVT.isFixedLengthVector()) {
8739 ContainerVT = getContainerForFixedLengthVector(VecVT);
8740 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8741 if (IsVPTrunc) {
8742 MVT MaskContainerVT =
8743 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8744 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8745 }
8746 }
8747
8748 if (!IsVPTrunc) {
8749 std::tie(Mask, VL) =
8750 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8751 }
8752
8753 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8754 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8755
8756 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8757 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8758 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8759 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8760
8761 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8762 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8763 DAG.getUNDEF(ContainerVT), Mask, VL);
8764 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8765 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8766 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8767 if (MaskVT.isFixedLengthVector())
8768 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8769 return Trunc;
8770}
8771
8772SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8773 SelectionDAG &DAG) const {
8774 unsigned Opc = Op.getOpcode();
8775 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8776 SDLoc DL(Op);
8777
8778 MVT VT = Op.getSimpleValueType();
8779 // Only custom-lower vector truncates
8780 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8781
8782 // Truncates to mask types are handled differently
8783 if (VT.getVectorElementType() == MVT::i1)
8784 return lowerVectorMaskTruncLike(Op, DAG);
8785
8786 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8787 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8788 // truncate by one power of two at a time.
8789 MVT DstEltVT = VT.getVectorElementType();
8790
8791 SDValue Src = Op.getOperand(0);
8792 MVT SrcVT = Src.getSimpleValueType();
8793 MVT SrcEltVT = SrcVT.getVectorElementType();
8794
8795 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8796 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8797 "Unexpected vector truncate lowering");
8798
8799 MVT ContainerVT = SrcVT;
8800 SDValue Mask, VL;
8801 if (IsVPTrunc) {
8802 Mask = Op.getOperand(1);
8803 VL = Op.getOperand(2);
8804 }
8805 if (SrcVT.isFixedLengthVector()) {
8806 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8807 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8808 if (IsVPTrunc) {
8809 MVT MaskVT = getMaskTypeFor(ContainerVT);
8810 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8811 }
8812 }
8813
8814 SDValue Result = Src;
8815 if (!IsVPTrunc) {
8816 std::tie(Mask, VL) =
8817 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8818 }
8819
8820 unsigned NewOpc;
8821 if (Opc == ISD::TRUNCATE_SSAT_S)
8823 else if (Opc == ISD::TRUNCATE_USAT_U)
8825 else
8827
8828 do {
8829 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8830 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8831 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8832 } while (SrcEltVT != DstEltVT);
8833
8834 if (SrcVT.isFixedLengthVector())
8835 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8836
8837 return Result;
8838}
8839
8840SDValue
8841RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8842 SelectionDAG &DAG) const {
8843 SDLoc DL(Op);
8844 SDValue Chain = Op.getOperand(0);
8845 SDValue Src = Op.getOperand(1);
8846 MVT VT = Op.getSimpleValueType();
8847 MVT SrcVT = Src.getSimpleValueType();
8848 MVT ContainerVT = VT;
8849 if (VT.isFixedLengthVector()) {
8850 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8851 ContainerVT =
8852 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8853 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8854 }
8855
8856 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8857
8858 // RVV can only widen/truncate fp to types double/half the size as the source.
8859 if ((VT.getVectorElementType() == MVT::f64 &&
8860 (SrcVT.getVectorElementType() == MVT::f16 ||
8861 SrcVT.getVectorElementType() == MVT::bf16)) ||
8862 ((VT.getVectorElementType() == MVT::f16 ||
8863 VT.getVectorElementType() == MVT::bf16) &&
8864 SrcVT.getVectorElementType() == MVT::f64)) {
8865 // For double rounding, the intermediate rounding should be round-to-odd.
8866 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8869 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8870 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8871 Chain, Src, Mask, VL);
8872 Chain = Src.getValue(1);
8873 }
8874
8875 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8878 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8879 Chain, Src, Mask, VL);
8880 if (VT.isFixedLengthVector()) {
8881 // StrictFP operations have two result values. Their lowered result should
8882 // have same result count.
8883 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8884 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8885 }
8886 return Res;
8887}
8888
8889SDValue
8890RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8891 SelectionDAG &DAG) const {
8892 bool IsVP =
8893 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8894 bool IsExtend =
8895 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8896 // RVV can only do truncate fp to types half the size as the source. We
8897 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8898 // conversion instruction.
8899 SDLoc DL(Op);
8900 MVT VT = Op.getSimpleValueType();
8901
8902 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8903
8904 SDValue Src = Op.getOperand(0);
8905 MVT SrcVT = Src.getSimpleValueType();
8906
8907 bool IsDirectExtend =
8908 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8909 (SrcVT.getVectorElementType() != MVT::f16 &&
8910 SrcVT.getVectorElementType() != MVT::bf16));
8911 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8912 VT.getVectorElementType() != MVT::bf16) ||
8913 SrcVT.getVectorElementType() != MVT::f64);
8914
8915 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8916
8917 // Prepare any fixed-length vector operands.
8918 MVT ContainerVT = VT;
8919 SDValue Mask, VL;
8920 if (IsVP) {
8921 Mask = Op.getOperand(1);
8922 VL = Op.getOperand(2);
8923 }
8924 if (VT.isFixedLengthVector()) {
8925 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8926 ContainerVT =
8927 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8928 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8929 if (IsVP) {
8930 MVT MaskVT = getMaskTypeFor(ContainerVT);
8931 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8932 }
8933 }
8934
8935 if (!IsVP)
8936 std::tie(Mask, VL) =
8937 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8938
8939 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8940
8941 if (IsDirectConv) {
8942 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8943 if (VT.isFixedLengthVector())
8944 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8945 return Src;
8946 }
8947
8948 unsigned InterConvOpc =
8950
8951 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8952 SDValue IntermediateConv =
8953 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8954 SDValue Result =
8955 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8956 if (VT.isFixedLengthVector())
8957 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8958 return Result;
8959}
8960
8961// Given a scalable vector type and an index into it, returns the type for the
8962// smallest subvector that the index fits in. This can be used to reduce LMUL
8963// for operations like vslidedown.
8964//
8965// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8966static std::optional<MVT>
8967getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8968 const RISCVSubtarget &Subtarget) {
8969 assert(VecVT.isScalableVector());
8970 const unsigned EltSize = VecVT.getScalarSizeInBits();
8971 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8972 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8973 MVT SmallerVT;
8974 if (MaxIdx < MinVLMAX)
8975 SmallerVT = getLMUL1VT(VecVT);
8976 else if (MaxIdx < MinVLMAX * 2)
8977 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8978 else if (MaxIdx < MinVLMAX * 4)
8979 SmallerVT = getLMUL1VT(VecVT)
8982 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8983 return std::nullopt;
8984 return SmallerVT;
8985}
8986
8987// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8988// first position of a vector, and that vector is slid up to the insert index.
8989// By limiting the active vector length to index+1 and merging with the
8990// original vector (with an undisturbed tail policy for elements >= VL), we
8991// achieve the desired result of leaving all elements untouched except the one
8992// at VL-1, which is replaced with the desired value.
8993SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8994 SelectionDAG &DAG) const {
8995 SDLoc DL(Op);
8996 MVT VecVT = Op.getSimpleValueType();
8997 MVT XLenVT = Subtarget.getXLenVT();
8998 SDValue Vec = Op.getOperand(0);
8999 SDValue Val = Op.getOperand(1);
9000 MVT ValVT = Val.getSimpleValueType();
9001 SDValue Idx = Op.getOperand(2);
9002
9003 if (VecVT.getVectorElementType() == MVT::i1) {
9004 // FIXME: For now we just promote to an i8 vector and insert into that,
9005 // but this is probably not optimal.
9006 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9007 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9008 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9009 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9010 }
9011
9012 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9013 ValVT == MVT::bf16) {
9014 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9015 MVT IntVT = VecVT.changeTypeToInteger();
9016 SDValue IntInsert = DAG.getNode(
9017 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9018 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9019 return DAG.getBitcast(VecVT, IntInsert);
9020 }
9021
9022 MVT ContainerVT = VecVT;
9023 // If the operand is a fixed-length vector, convert to a scalable one.
9024 if (VecVT.isFixedLengthVector()) {
9025 ContainerVT = getContainerForFixedLengthVector(VecVT);
9026 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9027 }
9028
9029 // If we know the index we're going to insert at, we can shrink Vec so that
9030 // we're performing the scalar inserts and slideup on a smaller LMUL.
9031 MVT OrigContainerVT = ContainerVT;
9032 SDValue OrigVec = Vec;
9033 SDValue AlignedIdx;
9034 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9035 const unsigned OrigIdx = IdxC->getZExtValue();
9036 // Do we know an upper bound on LMUL?
9037 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9038 DL, DAG, Subtarget)) {
9039 ContainerVT = *ShrunkVT;
9040 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9041 }
9042
9043 // If we're compiling for an exact VLEN value, we can always perform
9044 // the insert in m1 as we can determine the register corresponding to
9045 // the index in the register group.
9046 const MVT M1VT = getLMUL1VT(ContainerVT);
9047 if (auto VLEN = Subtarget.getRealVLen();
9048 VLEN && ContainerVT.bitsGT(M1VT)) {
9049 EVT ElemVT = VecVT.getVectorElementType();
9050 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9051 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9052 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9053 unsigned ExtractIdx =
9054 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9055 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9056 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9057 ContainerVT = M1VT;
9058 }
9059
9060 if (AlignedIdx)
9061 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9062 AlignedIdx);
9063 }
9064
9065 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9066 // Even i64-element vectors on RV32 can be lowered without scalar
9067 // legalization if the most-significant 32 bits of the value are not affected
9068 // by the sign-extension of the lower 32 bits.
9069 // TODO: We could also catch sign extensions of a 32-bit value.
9070 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9071 const auto *CVal = cast<ConstantSDNode>(Val);
9072 if (isInt<32>(CVal->getSExtValue())) {
9073 IsLegalInsert = true;
9074 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9075 }
9076 }
9077
9078 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9079
9080 SDValue ValInVec;
9081
9082 if (IsLegalInsert) {
9083 unsigned Opc =
9085 if (isNullConstant(Idx)) {
9086 if (!VecVT.isFloatingPoint())
9087 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9088 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9089
9090 if (AlignedIdx)
9091 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9092 Vec, AlignedIdx);
9093 if (!VecVT.isFixedLengthVector())
9094 return Vec;
9095 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9096 }
9097 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9098 } else {
9099 // On RV32, i64-element vectors must be specially handled to place the
9100 // value at element 0, by using two vslide1down instructions in sequence on
9101 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9102 // this.
9103 SDValue ValLo, ValHi;
9104 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9105 MVT I32ContainerVT =
9106 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9107 SDValue I32Mask =
9108 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9109 // Limit the active VL to two.
9110 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9111 // If the Idx is 0 we can insert directly into the vector.
9112 if (isNullConstant(Idx)) {
9113 // First slide in the lo value, then the hi in above it. We use slide1down
9114 // to avoid the register group overlap constraint of vslide1up.
9115 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9116 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9117 // If the source vector is undef don't pass along the tail elements from
9118 // the previous slide1down.
9119 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9120 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9121 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9122 // Bitcast back to the right container type.
9123 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9124
9125 if (AlignedIdx)
9126 ValInVec =
9127 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9128 ValInVec, AlignedIdx);
9129 if (!VecVT.isFixedLengthVector())
9130 return ValInVec;
9131 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9132 }
9133
9134 // First slide in the lo value, then the hi in above it. We use slide1down
9135 // to avoid the register group overlap constraint of vslide1up.
9136 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9137 DAG.getUNDEF(I32ContainerVT),
9138 DAG.getUNDEF(I32ContainerVT), ValLo,
9139 I32Mask, InsertI64VL);
9140 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9141 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9142 I32Mask, InsertI64VL);
9143 // Bitcast back to the right container type.
9144 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9145 }
9146
9147 // Now that the value is in a vector, slide it into position.
9148 SDValue InsertVL =
9149 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9150
9151 // Use tail agnostic policy if Idx is the last index of Vec.
9153 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9154 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9155 Policy = RISCVII::TAIL_AGNOSTIC;
9156 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9157 Idx, Mask, InsertVL, Policy);
9158
9159 if (AlignedIdx)
9160 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9161 Slideup, AlignedIdx);
9162 if (!VecVT.isFixedLengthVector())
9163 return Slideup;
9164 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9165}
9166
9167// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9168// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9169// types this is done using VMV_X_S to allow us to glean information about the
9170// sign bits of the result.
9171SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9172 SelectionDAG &DAG) const {
9173 SDLoc DL(Op);
9174 SDValue Idx = Op.getOperand(1);
9175 SDValue Vec = Op.getOperand(0);
9176 EVT EltVT = Op.getValueType();
9177 MVT VecVT = Vec.getSimpleValueType();
9178 MVT XLenVT = Subtarget.getXLenVT();
9179
9180 if (VecVT.getVectorElementType() == MVT::i1) {
9181 // Use vfirst.m to extract the first bit.
9182 if (isNullConstant(Idx)) {
9183 MVT ContainerVT = VecVT;
9184 if (VecVT.isFixedLengthVector()) {
9185 ContainerVT = getContainerForFixedLengthVector(VecVT);
9186 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9187 }
9188 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9189 SDValue Vfirst =
9190 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9191 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9192 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9193 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9194 }
9195 if (VecVT.isFixedLengthVector()) {
9196 unsigned NumElts = VecVT.getVectorNumElements();
9197 if (NumElts >= 8) {
9198 MVT WideEltVT;
9199 unsigned WidenVecLen;
9200 SDValue ExtractElementIdx;
9201 SDValue ExtractBitIdx;
9202 unsigned MaxEEW = Subtarget.getELen();
9203 MVT LargestEltVT = MVT::getIntegerVT(
9204 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9205 if (NumElts <= LargestEltVT.getSizeInBits()) {
9206 assert(isPowerOf2_32(NumElts) &&
9207 "the number of elements should be power of 2");
9208 WideEltVT = MVT::getIntegerVT(NumElts);
9209 WidenVecLen = 1;
9210 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9211 ExtractBitIdx = Idx;
9212 } else {
9213 WideEltVT = LargestEltVT;
9214 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9215 // extract element index = index / element width
9216 ExtractElementIdx = DAG.getNode(
9217 ISD::SRL, DL, XLenVT, Idx,
9218 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9219 // mask bit index = index % element width
9220 ExtractBitIdx = DAG.getNode(
9221 ISD::AND, DL, XLenVT, Idx,
9222 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9223 }
9224 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9225 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9226 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9227 Vec, ExtractElementIdx);
9228 // Extract the bit from GPR.
9229 SDValue ShiftRight =
9230 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9231 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9232 DAG.getConstant(1, DL, XLenVT));
9233 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9234 }
9235 }
9236 // Otherwise, promote to an i8 vector and extract from that.
9237 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9238 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9239 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9240 }
9241
9242 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9243 EltVT == MVT::bf16) {
9244 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9245 MVT IntVT = VecVT.changeTypeToInteger();
9246 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9247 SDValue IntExtract =
9248 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9249 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9250 }
9251
9252 // If this is a fixed vector, we need to convert it to a scalable vector.
9253 MVT ContainerVT = VecVT;
9254 if (VecVT.isFixedLengthVector()) {
9255 ContainerVT = getContainerForFixedLengthVector(VecVT);
9256 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9257 }
9258
9259 // If we're compiling for an exact VLEN value and we have a known
9260 // constant index, we can always perform the extract in m1 (or
9261 // smaller) as we can determine the register corresponding to
9262 // the index in the register group.
9263 const auto VLen = Subtarget.getRealVLen();
9264 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9265 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9266 MVT M1VT = getLMUL1VT(ContainerVT);
9267 unsigned OrigIdx = IdxC->getZExtValue();
9268 EVT ElemVT = VecVT.getVectorElementType();
9269 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9270 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9271 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9272 unsigned ExtractIdx =
9273 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9274 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9275 DAG.getVectorIdxConstant(ExtractIdx, DL));
9276 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9277 ContainerVT = M1VT;
9278 }
9279
9280 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9281 // contains our index.
9282 std::optional<uint64_t> MaxIdx;
9283 if (VecVT.isFixedLengthVector())
9284 MaxIdx = VecVT.getVectorNumElements() - 1;
9285 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9286 MaxIdx = IdxC->getZExtValue();
9287 if (MaxIdx) {
9288 if (auto SmallerVT =
9289 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9290 ContainerVT = *SmallerVT;
9291 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9292 DAG.getConstant(0, DL, XLenVT));
9293 }
9294 }
9295
9296 // If after narrowing, the required slide is still greater than LMUL2,
9297 // fallback to generic expansion and go through the stack. This is done
9298 // for a subtle reason: extracting *all* elements out of a vector is
9299 // widely expected to be linear in vector size, but because vslidedown
9300 // is linear in LMUL, performing N extracts using vslidedown becomes
9301 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9302 // seems to have the same problem (the store is linear in LMUL), but the
9303 // generic expansion *memoizes* the store, and thus for many extracts of
9304 // the same vector we end up with one store and a bunch of loads.
9305 // TODO: We don't have the same code for insert_vector_elt because we
9306 // have BUILD_VECTOR and handle the degenerate case there. Should we
9307 // consider adding an inverse BUILD_VECTOR node?
9308 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9309 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9310 return SDValue();
9311
9312 // If the index is 0, the vector is already in the right position.
9313 if (!isNullConstant(Idx)) {
9314 // Use a VL of 1 to avoid processing more elements than we need.
9315 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9316 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9317 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9318 }
9319
9320 if (!EltVT.isInteger()) {
9321 // Floating-point extracts are handled in TableGen.
9322 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9323 DAG.getVectorIdxConstant(0, DL));
9324 }
9325
9326 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9327 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9328}
9329
9330// Some RVV intrinsics may claim that they want an integer operand to be
9331// promoted or expanded.
9333 const RISCVSubtarget &Subtarget) {
9334 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9335 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9336 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9337 "Unexpected opcode");
9338
9339 if (!Subtarget.hasVInstructions())
9340 return SDValue();
9341
9342 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9343 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9344 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9345
9346 SDLoc DL(Op);
9347
9349 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9350 if (!II || !II->hasScalarOperand())
9351 return SDValue();
9352
9353 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9354 assert(SplatOp < Op.getNumOperands());
9355
9357 SDValue &ScalarOp = Operands[SplatOp];
9358 MVT OpVT = ScalarOp.getSimpleValueType();
9359 MVT XLenVT = Subtarget.getXLenVT();
9360
9361 // If this isn't a scalar, or its type is XLenVT we're done.
9362 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9363 return SDValue();
9364
9365 // Simplest case is that the operand needs to be promoted to XLenVT.
9366 if (OpVT.bitsLT(XLenVT)) {
9367 // If the operand is a constant, sign extend to increase our chances
9368 // of being able to use a .vi instruction. ANY_EXTEND would become a
9369 // a zero extend and the simm5 check in isel would fail.
9370 // FIXME: Should we ignore the upper bits in isel instead?
9371 unsigned ExtOpc =
9372 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9373 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9374 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9375 }
9376
9377 // Use the previous operand to get the vXi64 VT. The result might be a mask
9378 // VT for compares. Using the previous operand assumes that the previous
9379 // operand will never have a smaller element size than a scalar operand and
9380 // that a widening operation never uses SEW=64.
9381 // NOTE: If this fails the below assert, we can probably just find the
9382 // element count from any operand or result and use it to construct the VT.
9383 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9384 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9385
9386 // The more complex case is when the scalar is larger than XLenVT.
9387 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9388 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9389
9390 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9391 // instruction to sign-extend since SEW>XLEN.
9392 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9393 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9394 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9395 }
9396
9397 switch (IntNo) {
9398 case Intrinsic::riscv_vslide1up:
9399 case Intrinsic::riscv_vslide1down:
9400 case Intrinsic::riscv_vslide1up_mask:
9401 case Intrinsic::riscv_vslide1down_mask: {
9402 // We need to special case these when the scalar is larger than XLen.
9403 unsigned NumOps = Op.getNumOperands();
9404 bool IsMasked = NumOps == 7;
9405
9406 // Convert the vector source to the equivalent nxvXi32 vector.
9407 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9408 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9409 SDValue ScalarLo, ScalarHi;
9410 std::tie(ScalarLo, ScalarHi) =
9411 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9412
9413 // Double the VL since we halved SEW.
9414 SDValue AVL = getVLOperand(Op);
9415 SDValue I32VL;
9416
9417 // Optimize for constant AVL
9418 if (isa<ConstantSDNode>(AVL)) {
9419 const auto [MinVLMAX, MaxVLMAX] =
9421
9422 uint64_t AVLInt = AVL->getAsZExtVal();
9423 if (AVLInt <= MinVLMAX) {
9424 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9425 } else if (AVLInt >= 2 * MaxVLMAX) {
9426 // Just set vl to VLMAX in this situation
9427 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9428 } else {
9429 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9430 // is related to the hardware implementation.
9431 // So let the following code handle
9432 }
9433 }
9434 if (!I32VL) {
9436 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9437 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9438 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9439 SDValue SETVL =
9440 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9441 // Using vsetvli instruction to get actually used length which related to
9442 // the hardware implementation
9443 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9444 SEW, LMUL);
9445 I32VL =
9446 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9447 }
9448
9449 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9450
9451 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9452 // instructions.
9453 SDValue Passthru;
9454 if (IsMasked)
9455 Passthru = DAG.getUNDEF(I32VT);
9456 else
9457 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9458
9459 if (IntNo == Intrinsic::riscv_vslide1up ||
9460 IntNo == Intrinsic::riscv_vslide1up_mask) {
9461 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9462 ScalarHi, I32Mask, I32VL);
9463 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9464 ScalarLo, I32Mask, I32VL);
9465 } else {
9466 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9467 ScalarLo, I32Mask, I32VL);
9468 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9469 ScalarHi, I32Mask, I32VL);
9470 }
9471
9472 // Convert back to nxvXi64.
9473 Vec = DAG.getBitcast(VT, Vec);
9474
9475 if (!IsMasked)
9476 return Vec;
9477 // Apply mask after the operation.
9478 SDValue Mask = Operands[NumOps - 3];
9479 SDValue MaskedOff = Operands[1];
9480 // Assume Policy operand is the last operand.
9481 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9482 // We don't need to select maskedoff if it's undef.
9483 if (MaskedOff.isUndef())
9484 return Vec;
9485 // TAMU
9486 if (Policy == RISCVII::TAIL_AGNOSTIC)
9487 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9488 DAG.getUNDEF(VT), AVL);
9489 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9490 // It's fine because vmerge does not care mask policy.
9491 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9492 MaskedOff, AVL);
9493 }
9494 }
9495
9496 // We need to convert the scalar to a splat vector.
9497 SDValue VL = getVLOperand(Op);
9498 assert(VL.getValueType() == XLenVT);
9499 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9500 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9501}
9502
9503// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9504// scalable vector llvm.get.vector.length for now.
9505//
9506// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9507// (vscale * VF). The vscale and VF are independent of element width. We use
9508// SEW=8 for the vsetvli because it is the only element width that supports all
9509// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9510// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9511// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9512// SEW and LMUL are better for the surrounding vector instructions.
9514 const RISCVSubtarget &Subtarget) {
9515 MVT XLenVT = Subtarget.getXLenVT();
9516
9517 // The smallest LMUL is only valid for the smallest element width.
9518 const unsigned ElementWidth = 8;
9519
9520 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9521 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9522 // We don't support VF==1 with ELEN==32.
9523 [[maybe_unused]] unsigned MinVF =
9524 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9525
9526 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9527 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9528 "Unexpected VF");
9529
9530 bool Fractional = VF < LMul1VF;
9531 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9532 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9533 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9534
9535 SDLoc DL(N);
9536
9537 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9538 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9539
9540 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9541
9542 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9543 SDValue Res =
9544 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9545 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9546}
9547
9549 const RISCVSubtarget &Subtarget) {
9550 SDValue Op0 = N->getOperand(1);
9551 MVT OpVT = Op0.getSimpleValueType();
9552 MVT ContainerVT = OpVT;
9553 if (OpVT.isFixedLengthVector()) {
9554 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9555 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9556 }
9557 MVT XLenVT = Subtarget.getXLenVT();
9558 SDLoc DL(N);
9559 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9560 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9561 if (isOneConstant(N->getOperand(2)))
9562 return Res;
9563
9564 // Convert -1 to VL.
9565 SDValue Setcc =
9566 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9567 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9568 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9569}
9570
9571static inline void promoteVCIXScalar(const SDValue &Op,
9573 SelectionDAG &DAG) {
9574 const RISCVSubtarget &Subtarget =
9576
9577 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9578 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9579 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9580 SDLoc DL(Op);
9581
9583 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9584 if (!II || !II->hasScalarOperand())
9585 return;
9586
9587 unsigned SplatOp = II->ScalarOperand + 1;
9588 assert(SplatOp < Op.getNumOperands());
9589
9590 SDValue &ScalarOp = Operands[SplatOp];
9591 MVT OpVT = ScalarOp.getSimpleValueType();
9592 MVT XLenVT = Subtarget.getXLenVT();
9593
9594 // The code below is partially copied from lowerVectorIntrinsicScalars.
9595 // If this isn't a scalar, or its type is XLenVT we're done.
9596 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9597 return;
9598
9599 // Manually emit promote operation for scalar operation.
9600 if (OpVT.bitsLT(XLenVT)) {
9601 unsigned ExtOpc =
9602 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9603 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9604 }
9605}
9606
9607static void processVCIXOperands(SDValue &OrigOp,
9609 SelectionDAG &DAG) {
9610 promoteVCIXScalar(OrigOp, Operands, DAG);
9611 const RISCVSubtarget &Subtarget =
9613 for (SDValue &V : Operands) {
9614 EVT ValType = V.getValueType();
9615 if (ValType.isVector() && ValType.isFloatingPoint()) {
9616 MVT InterimIVT =
9617 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9618 ValType.getVectorElementCount());
9619 V = DAG.getBitcast(InterimIVT, V);
9620 }
9621 if (ValType.isFixedLengthVector()) {
9622 MVT OpContainerVT = getContainerForFixedLengthVector(
9623 DAG, V.getSimpleValueType(), Subtarget);
9624 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9625 }
9626 }
9627}
9628
9629// LMUL * VLEN should be greater than or equal to EGS * SEW
9630static inline bool isValidEGW(int EGS, EVT VT,
9631 const RISCVSubtarget &Subtarget) {
9632 return (Subtarget.getRealMinVLen() *
9634 EGS * VT.getScalarSizeInBits();
9635}
9636
9637SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9638 SelectionDAG &DAG) const {
9639 unsigned IntNo = Op.getConstantOperandVal(0);
9640 SDLoc DL(Op);
9641 MVT XLenVT = Subtarget.getXLenVT();
9642
9643 switch (IntNo) {
9644 default:
9645 break; // Don't custom lower most intrinsics.
9646 case Intrinsic::riscv_tuple_insert: {
9647 SDValue Vec = Op.getOperand(1);
9648 SDValue SubVec = Op.getOperand(2);
9649 SDValue Index = Op.getOperand(3);
9650
9651 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9652 SubVec, Index);
9653 }
9654 case Intrinsic::riscv_tuple_extract: {
9655 SDValue Vec = Op.getOperand(1);
9656 SDValue Index = Op.getOperand(2);
9657
9658 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9659 Index);
9660 }
9661 case Intrinsic::thread_pointer: {
9662 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9663 return DAG.getRegister(RISCV::X4, PtrVT);
9664 }
9665 case Intrinsic::riscv_orc_b:
9666 case Intrinsic::riscv_brev8:
9667 case Intrinsic::riscv_sha256sig0:
9668 case Intrinsic::riscv_sha256sig1:
9669 case Intrinsic::riscv_sha256sum0:
9670 case Intrinsic::riscv_sha256sum1:
9671 case Intrinsic::riscv_sm3p0:
9672 case Intrinsic::riscv_sm3p1: {
9673 unsigned Opc;
9674 switch (IntNo) {
9675 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9676 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9677 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9678 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9679 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9680 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9681 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9682 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9683 }
9684
9685 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9686 }
9687 case Intrinsic::riscv_sm4ks:
9688 case Intrinsic::riscv_sm4ed: {
9689 unsigned Opc =
9690 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9691
9692 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9693 Op.getOperand(3));
9694 }
9695 case Intrinsic::riscv_zip:
9696 case Intrinsic::riscv_unzip: {
9697 unsigned Opc =
9698 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9699 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9700 }
9701 case Intrinsic::riscv_mopr:
9702 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9703 Op.getOperand(2));
9704
9705 case Intrinsic::riscv_moprr: {
9706 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9707 Op.getOperand(2), Op.getOperand(3));
9708 }
9709 case Intrinsic::riscv_clmul:
9710 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9711 Op.getOperand(2));
9712 case Intrinsic::riscv_clmulh:
9713 case Intrinsic::riscv_clmulr: {
9714 unsigned Opc =
9715 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9716 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9717 }
9718 case Intrinsic::experimental_get_vector_length:
9719 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9720 case Intrinsic::experimental_cttz_elts:
9721 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9722 case Intrinsic::riscv_vmv_x_s: {
9723 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9724 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9725 }
9726 case Intrinsic::riscv_vfmv_f_s:
9727 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9728 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9729 case Intrinsic::riscv_vmv_v_x:
9730 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9731 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9732 Subtarget);
9733 case Intrinsic::riscv_vfmv_v_f:
9734 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9735 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9736 case Intrinsic::riscv_vmv_s_x: {
9737 SDValue Scalar = Op.getOperand(2);
9738
9739 if (Scalar.getValueType().bitsLE(XLenVT)) {
9740 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9741 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9742 Op.getOperand(1), Scalar, Op.getOperand(3));
9743 }
9744
9745 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9746
9747 // This is an i64 value that lives in two scalar registers. We have to
9748 // insert this in a convoluted way. First we build vXi64 splat containing
9749 // the two values that we assemble using some bit math. Next we'll use
9750 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9751 // to merge element 0 from our splat into the source vector.
9752 // FIXME: This is probably not the best way to do this, but it is
9753 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9754 // point.
9755 // sw lo, (a0)
9756 // sw hi, 4(a0)
9757 // vlse vX, (a0)
9758 //
9759 // vid.v vVid
9760 // vmseq.vx mMask, vVid, 0
9761 // vmerge.vvm vDest, vSrc, vVal, mMask
9762 MVT VT = Op.getSimpleValueType();
9763 SDValue Vec = Op.getOperand(1);
9764 SDValue VL = getVLOperand(Op);
9765
9766 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9767 if (Op.getOperand(1).isUndef())
9768 return SplattedVal;
9769 SDValue SplattedIdx =
9770 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9771 DAG.getConstant(0, DL, MVT::i32), VL);
9772
9773 MVT MaskVT = getMaskTypeFor(VT);
9774 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9775 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9776 SDValue SelectCond =
9777 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9778 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9779 DAG.getUNDEF(MaskVT), Mask, VL});
9780 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9781 Vec, DAG.getUNDEF(VT), VL);
9782 }
9783 case Intrinsic::riscv_vfmv_s_f:
9784 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9785 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9786 // EGS * EEW >= 128 bits
9787 case Intrinsic::riscv_vaesdf_vv:
9788 case Intrinsic::riscv_vaesdf_vs:
9789 case Intrinsic::riscv_vaesdm_vv:
9790 case Intrinsic::riscv_vaesdm_vs:
9791 case Intrinsic::riscv_vaesef_vv:
9792 case Intrinsic::riscv_vaesef_vs:
9793 case Intrinsic::riscv_vaesem_vv:
9794 case Intrinsic::riscv_vaesem_vs:
9795 case Intrinsic::riscv_vaeskf1:
9796 case Intrinsic::riscv_vaeskf2:
9797 case Intrinsic::riscv_vaesz_vs:
9798 case Intrinsic::riscv_vsm4k:
9799 case Intrinsic::riscv_vsm4r_vv:
9800 case Intrinsic::riscv_vsm4r_vs: {
9801 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9802 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9803 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9804 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9805 return Op;
9806 }
9807 // EGS * EEW >= 256 bits
9808 case Intrinsic::riscv_vsm3c:
9809 case Intrinsic::riscv_vsm3me: {
9810 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9811 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9812 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9813 return Op;
9814 }
9815 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9816 case Intrinsic::riscv_vsha2ch:
9817 case Intrinsic::riscv_vsha2cl:
9818 case Intrinsic::riscv_vsha2ms: {
9819 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9820 !Subtarget.hasStdExtZvknhb())
9821 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9822 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9823 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9824 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9825 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9826 return Op;
9827 }
9828 case Intrinsic::riscv_sf_vc_v_x:
9829 case Intrinsic::riscv_sf_vc_v_i:
9830 case Intrinsic::riscv_sf_vc_v_xv:
9831 case Intrinsic::riscv_sf_vc_v_iv:
9832 case Intrinsic::riscv_sf_vc_v_vv:
9833 case Intrinsic::riscv_sf_vc_v_fv:
9834 case Intrinsic::riscv_sf_vc_v_xvv:
9835 case Intrinsic::riscv_sf_vc_v_ivv:
9836 case Intrinsic::riscv_sf_vc_v_vvv:
9837 case Intrinsic::riscv_sf_vc_v_fvv:
9838 case Intrinsic::riscv_sf_vc_v_xvw:
9839 case Intrinsic::riscv_sf_vc_v_ivw:
9840 case Intrinsic::riscv_sf_vc_v_vvw:
9841 case Intrinsic::riscv_sf_vc_v_fvw: {
9842 MVT VT = Op.getSimpleValueType();
9843
9844 SmallVector<SDValue> Operands{Op->op_values()};
9846
9847 MVT RetVT = VT;
9848 if (VT.isFixedLengthVector())
9850 else if (VT.isFloatingPoint())
9853
9854 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9855
9856 if (VT.isFixedLengthVector())
9857 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9858 else if (VT.isFloatingPoint())
9859 NewNode = DAG.getBitcast(VT, NewNode);
9860
9861 if (Op == NewNode)
9862 break;
9863
9864 return NewNode;
9865 }
9866 }
9867
9868 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9869}
9870
9872 unsigned Type) {
9873 SDLoc DL(Op);
9874 SmallVector<SDValue> Operands{Op->op_values()};
9875 Operands.erase(Operands.begin() + 1);
9876
9877 const RISCVSubtarget &Subtarget =
9879 MVT VT = Op.getSimpleValueType();
9880 MVT RetVT = VT;
9881 MVT FloatVT = VT;
9882
9883 if (VT.isFloatingPoint()) {
9884 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9886 FloatVT = RetVT;
9887 }
9888 if (VT.isFixedLengthVector())
9890 Subtarget);
9891
9893
9894 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9895 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9896 SDValue Chain = NewNode.getValue(1);
9897
9898 if (VT.isFixedLengthVector())
9899 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9900 if (VT.isFloatingPoint())
9901 NewNode = DAG.getBitcast(VT, NewNode);
9902
9903 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9904
9905 return NewNode;
9906}
9907
9909 unsigned Type) {
9910 SmallVector<SDValue> Operands{Op->op_values()};
9911 Operands.erase(Operands.begin() + 1);
9913
9914 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9915}
9916
9917SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9918 SelectionDAG &DAG) const {
9919 unsigned IntNo = Op.getConstantOperandVal(1);
9920 switch (IntNo) {
9921 default:
9922 break;
9923 case Intrinsic::riscv_seg2_load:
9924 case Intrinsic::riscv_seg3_load:
9925 case Intrinsic::riscv_seg4_load:
9926 case Intrinsic::riscv_seg5_load:
9927 case Intrinsic::riscv_seg6_load:
9928 case Intrinsic::riscv_seg7_load:
9929 case Intrinsic::riscv_seg8_load: {
9930 SDLoc DL(Op);
9931 static const Intrinsic::ID VlsegInts[7] = {
9932 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9933 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9934 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9935 Intrinsic::riscv_vlseg8};
9936 unsigned NF = Op->getNumValues() - 1;
9937 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9938 MVT XLenVT = Subtarget.getXLenVT();
9939 MVT VT = Op->getSimpleValueType(0);
9940 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9941 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
9942 ContainerVT.getScalarSizeInBits();
9943 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
9944
9945 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9946 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9947 auto *Load = cast<MemIntrinsicSDNode>(Op);
9948
9949 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
9950 SDValue Ops[] = {
9951 Load->getChain(),
9952 IntID,
9953 DAG.getUNDEF(VecTupTy),
9954 Op.getOperand(2),
9955 VL,
9956 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
9957 SDValue Result =
9959 Load->getMemoryVT(), Load->getMemOperand());
9961 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
9962 SDValue SubVec =
9963 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
9964 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
9965 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
9966 }
9967 Results.push_back(Result.getValue(1));
9968 return DAG.getMergeValues(Results, DL);
9969 }
9970 case Intrinsic::riscv_sf_vc_v_x_se:
9972 case Intrinsic::riscv_sf_vc_v_i_se:
9974 case Intrinsic::riscv_sf_vc_v_xv_se:
9976 case Intrinsic::riscv_sf_vc_v_iv_se:
9978 case Intrinsic::riscv_sf_vc_v_vv_se:
9980 case Intrinsic::riscv_sf_vc_v_fv_se:
9982 case Intrinsic::riscv_sf_vc_v_xvv_se:
9984 case Intrinsic::riscv_sf_vc_v_ivv_se:
9986 case Intrinsic::riscv_sf_vc_v_vvv_se:
9988 case Intrinsic::riscv_sf_vc_v_fvv_se:
9990 case Intrinsic::riscv_sf_vc_v_xvw_se:
9992 case Intrinsic::riscv_sf_vc_v_ivw_se:
9994 case Intrinsic::riscv_sf_vc_v_vvw_se:
9996 case Intrinsic::riscv_sf_vc_v_fvw_se:
9998 }
9999
10000 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10001}
10002
10003SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10004 SelectionDAG &DAG) const {
10005 unsigned IntNo = Op.getConstantOperandVal(1);
10006 switch (IntNo) {
10007 default:
10008 break;
10009 case Intrinsic::riscv_seg2_store:
10010 case Intrinsic::riscv_seg3_store:
10011 case Intrinsic::riscv_seg4_store:
10012 case Intrinsic::riscv_seg5_store:
10013 case Intrinsic::riscv_seg6_store:
10014 case Intrinsic::riscv_seg7_store:
10015 case Intrinsic::riscv_seg8_store: {
10016 SDLoc DL(Op);
10017 static const Intrinsic::ID VssegInts[] = {
10018 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10019 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10020 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10021 Intrinsic::riscv_vsseg8};
10022 // Operands are (chain, int_id, vec*, ptr, vl)
10023 unsigned NF = Op->getNumOperands() - 4;
10024 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10025 MVT XLenVT = Subtarget.getXLenVT();
10026 MVT VT = Op->getOperand(2).getSimpleValueType();
10027 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10028 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10029 ContainerVT.getScalarSizeInBits();
10030 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10031
10032 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10033 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10034 SDValue Ptr = Op->getOperand(NF + 2);
10035
10036 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10037
10038 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10039 for (unsigned i = 0; i < NF; i++)
10040 StoredVal = DAG.getNode(
10041 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10043 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10044 DAG.getVectorIdxConstant(i, DL));
10045
10046 SDValue Ops[] = {
10047 FixedIntrinsic->getChain(),
10048 IntID,
10049 StoredVal,
10050 Ptr,
10051 VL,
10052 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10053
10054 return DAG.getMemIntrinsicNode(
10055 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10056 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10057 }
10058 case Intrinsic::riscv_sf_vc_xv_se:
10060 case Intrinsic::riscv_sf_vc_iv_se:
10062 case Intrinsic::riscv_sf_vc_vv_se:
10064 case Intrinsic::riscv_sf_vc_fv_se:
10066 case Intrinsic::riscv_sf_vc_xvv_se:
10068 case Intrinsic::riscv_sf_vc_ivv_se:
10070 case Intrinsic::riscv_sf_vc_vvv_se:
10072 case Intrinsic::riscv_sf_vc_fvv_se:
10074 case Intrinsic::riscv_sf_vc_xvw_se:
10076 case Intrinsic::riscv_sf_vc_ivw_se:
10078 case Intrinsic::riscv_sf_vc_vvw_se:
10080 case Intrinsic::riscv_sf_vc_fvw_se:
10082 }
10083
10084 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10085}
10086
10087static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10088 switch (ISDOpcode) {
10089 default:
10090 llvm_unreachable("Unhandled reduction");
10091 case ISD::VP_REDUCE_ADD:
10092 case ISD::VECREDUCE_ADD:
10094 case ISD::VP_REDUCE_UMAX:
10097 case ISD::VP_REDUCE_SMAX:
10100 case ISD::VP_REDUCE_UMIN:
10103 case ISD::VP_REDUCE_SMIN:
10106 case ISD::VP_REDUCE_AND:
10107 case ISD::VECREDUCE_AND:
10109 case ISD::VP_REDUCE_OR:
10110 case ISD::VECREDUCE_OR:
10112 case ISD::VP_REDUCE_XOR:
10113 case ISD::VECREDUCE_XOR:
10115 case ISD::VP_REDUCE_FADD:
10117 case ISD::VP_REDUCE_SEQ_FADD:
10119 case ISD::VP_REDUCE_FMAX:
10120 case ISD::VP_REDUCE_FMAXIMUM:
10122 case ISD::VP_REDUCE_FMIN:
10123 case ISD::VP_REDUCE_FMINIMUM:
10125 }
10126
10127}
10128
10129SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10130 SelectionDAG &DAG,
10131 bool IsVP) const {
10132 SDLoc DL(Op);
10133 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10134 MVT VecVT = Vec.getSimpleValueType();
10135 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10136 Op.getOpcode() == ISD::VECREDUCE_OR ||
10137 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10138 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10139 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10140 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10141 "Unexpected reduction lowering");
10142
10143 MVT XLenVT = Subtarget.getXLenVT();
10144
10145 MVT ContainerVT = VecVT;
10146 if (VecVT.isFixedLengthVector()) {
10147 ContainerVT = getContainerForFixedLengthVector(VecVT);
10148 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10149 }
10150
10151 SDValue Mask, VL;
10152 if (IsVP) {
10153 Mask = Op.getOperand(2);
10154 VL = Op.getOperand(3);
10155 } else {
10156 std::tie(Mask, VL) =
10157 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10158 }
10159
10161 switch (Op.getOpcode()) {
10162 default:
10163 llvm_unreachable("Unhandled reduction");
10164 case ISD::VECREDUCE_AND:
10165 case ISD::VP_REDUCE_AND: {
10166 // vcpop ~x == 0
10167 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10168 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10169 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10170 CC = ISD::SETEQ;
10171 break;
10172 }
10173 case ISD::VECREDUCE_OR:
10174 case ISD::VP_REDUCE_OR:
10175 // vcpop x != 0
10176 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10177 CC = ISD::SETNE;
10178 break;
10179 case ISD::VECREDUCE_XOR:
10180 case ISD::VP_REDUCE_XOR: {
10181 // ((vcpop x) & 1) != 0
10182 SDValue One = DAG.getConstant(1, DL, XLenVT);
10183 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10184 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10185 CC = ISD::SETNE;
10186 break;
10187 }
10188 }
10189
10190 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10191 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10192 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10193
10194 if (!IsVP)
10195 return SetCC;
10196
10197 // Now include the start value in the operation.
10198 // Note that we must return the start value when no elements are operated
10199 // upon. The vcpop instructions we've emitted in each case above will return
10200 // 0 for an inactive vector, and so we've already received the neutral value:
10201 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10202 // can simply include the start value.
10203 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10204 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10205}
10206
10207static bool isNonZeroAVL(SDValue AVL) {
10208 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10209 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10210 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10211 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10212}
10213
10214/// Helper to lower a reduction sequence of the form:
10215/// scalar = reduce_op vec, scalar_start
10216static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10217 SDValue StartValue, SDValue Vec, SDValue Mask,
10218 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10219 const RISCVSubtarget &Subtarget) {
10220 const MVT VecVT = Vec.getSimpleValueType();
10221 const MVT M1VT = getLMUL1VT(VecVT);
10222 const MVT XLenVT = Subtarget.getXLenVT();
10223 const bool NonZeroAVL = isNonZeroAVL(VL);
10224
10225 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10226 // or the original VT if fractional.
10227 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10228 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10229 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10230 // be the result of the reduction operation.
10231 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10232 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10233 DAG, Subtarget);
10234 if (M1VT != InnerVT)
10235 InitialValue =
10236 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10237 InitialValue, DAG.getVectorIdxConstant(0, DL));
10238 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10239 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10240 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10241 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10242 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10243 DAG.getVectorIdxConstant(0, DL));
10244}
10245
10246SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10247 SelectionDAG &DAG) const {
10248 SDLoc DL(Op);
10249 SDValue Vec = Op.getOperand(0);
10250 EVT VecEVT = Vec.getValueType();
10251
10252 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10253
10254 // Due to ordering in legalize types we may have a vector type that needs to
10255 // be split. Do that manually so we can get down to a legal type.
10256 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10258 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10259 VecEVT = Lo.getValueType();
10260 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10261 }
10262
10263 // TODO: The type may need to be widened rather than split. Or widened before
10264 // it can be split.
10265 if (!isTypeLegal(VecEVT))
10266 return SDValue();
10267
10268 MVT VecVT = VecEVT.getSimpleVT();
10269 MVT VecEltVT = VecVT.getVectorElementType();
10270 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10271
10272 MVT ContainerVT = VecVT;
10273 if (VecVT.isFixedLengthVector()) {
10274 ContainerVT = getContainerForFixedLengthVector(VecVT);
10275 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10276 }
10277
10278 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10279
10280 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10281 switch (BaseOpc) {
10282 case ISD::AND:
10283 case ISD::OR:
10284 case ISD::UMAX:
10285 case ISD::UMIN:
10286 case ISD::SMAX:
10287 case ISD::SMIN:
10288 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10289 DAG.getVectorIdxConstant(0, DL));
10290 }
10291 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10292 Mask, VL, DL, DAG, Subtarget);
10293}
10294
10295// Given a reduction op, this function returns the matching reduction opcode,
10296// the vector SDValue and the scalar SDValue required to lower this to a
10297// RISCVISD node.
10298static std::tuple<unsigned, SDValue, SDValue>
10300 const RISCVSubtarget &Subtarget) {
10301 SDLoc DL(Op);
10302 auto Flags = Op->getFlags();
10303 unsigned Opcode = Op.getOpcode();
10304 switch (Opcode) {
10305 default:
10306 llvm_unreachable("Unhandled reduction");
10307 case ISD::VECREDUCE_FADD: {
10308 // Use positive zero if we can. It is cheaper to materialize.
10309 SDValue Zero =
10310 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10311 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10312 }
10314 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10315 Op.getOperand(0));
10319 case ISD::VECREDUCE_FMAX: {
10320 SDValue Front =
10321 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10322 DAG.getVectorIdxConstant(0, DL));
10323 unsigned RVVOpc =
10324 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10327 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10328 }
10329 }
10330}
10331
10332SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10333 SelectionDAG &DAG) const {
10334 SDLoc DL(Op);
10335 MVT VecEltVT = Op.getSimpleValueType();
10336
10337 unsigned RVVOpcode;
10338 SDValue VectorVal, ScalarVal;
10339 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10340 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10341 MVT VecVT = VectorVal.getSimpleValueType();
10342
10343 MVT ContainerVT = VecVT;
10344 if (VecVT.isFixedLengthVector()) {
10345 ContainerVT = getContainerForFixedLengthVector(VecVT);
10346 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10347 }
10348
10349 MVT ResVT = Op.getSimpleValueType();
10350 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10351 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10352 VL, DL, DAG, Subtarget);
10353 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10354 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10355 return Res;
10356
10357 if (Op->getFlags().hasNoNaNs())
10358 return Res;
10359
10360 // Force output to NaN if any element is Nan.
10361 SDValue IsNan =
10362 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10363 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10364 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10365 MVT XLenVT = Subtarget.getXLenVT();
10366 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10367 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10368 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10369 return DAG.getSelect(
10370 DL, ResVT, NoNaNs, Res,
10371 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10372}
10373
10374SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10375 SelectionDAG &DAG) const {
10376 SDLoc DL(Op);
10377 unsigned Opc = Op.getOpcode();
10378 SDValue Start = Op.getOperand(0);
10379 SDValue Vec = Op.getOperand(1);
10380 EVT VecEVT = Vec.getValueType();
10381 MVT XLenVT = Subtarget.getXLenVT();
10382
10383 // TODO: The type may need to be widened rather than split. Or widened before
10384 // it can be split.
10385 if (!isTypeLegal(VecEVT))
10386 return SDValue();
10387
10388 MVT VecVT = VecEVT.getSimpleVT();
10389 unsigned RVVOpcode = getRVVReductionOp(Opc);
10390
10391 if (VecVT.isFixedLengthVector()) {
10392 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10393 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10394 }
10395
10396 SDValue VL = Op.getOperand(3);
10397 SDValue Mask = Op.getOperand(2);
10398 SDValue Res =
10399 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10400 Vec, Mask, VL, DL, DAG, Subtarget);
10401 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10402 Op->getFlags().hasNoNaNs())
10403 return Res;
10404
10405 // Propagate NaNs.
10406 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10407 // Check if any of the elements in Vec is NaN.
10408 SDValue IsNaN = DAG.getNode(
10409 RISCVISD::SETCC_VL, DL, PredVT,
10410 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10411 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10412 // Check if the start value is NaN.
10413 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10414 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10415 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10416 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10417 MVT ResVT = Res.getSimpleValueType();
10418 return DAG.getSelect(
10419 DL, ResVT, NoNaNs, Res,
10420 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10421}
10422
10423SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10424 SelectionDAG &DAG) const {
10425 SDValue Vec = Op.getOperand(0);
10426 SDValue SubVec = Op.getOperand(1);
10427 MVT VecVT = Vec.getSimpleValueType();
10428 MVT SubVecVT = SubVec.getSimpleValueType();
10429
10430 SDLoc DL(Op);
10431 MVT XLenVT = Subtarget.getXLenVT();
10432 unsigned OrigIdx = Op.getConstantOperandVal(2);
10433 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10434
10435 if (OrigIdx == 0 && Vec.isUndef())
10436 return Op;
10437
10438 // We don't have the ability to slide mask vectors up indexed by their i1
10439 // elements; the smallest we can do is i8. Often we are able to bitcast to
10440 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10441 // into a scalable one, we might not necessarily have enough scalable
10442 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10443 if (SubVecVT.getVectorElementType() == MVT::i1) {
10444 if (VecVT.getVectorMinNumElements() >= 8 &&
10445 SubVecVT.getVectorMinNumElements() >= 8) {
10446 assert(OrigIdx % 8 == 0 && "Invalid index");
10447 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10448 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10449 "Unexpected mask vector lowering");
10450 OrigIdx /= 8;
10451 SubVecVT =
10452 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10453 SubVecVT.isScalableVector());
10454 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10455 VecVT.isScalableVector());
10456 Vec = DAG.getBitcast(VecVT, Vec);
10457 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10458 } else {
10459 // We can't slide this mask vector up indexed by its i1 elements.
10460 // This poses a problem when we wish to insert a scalable vector which
10461 // can't be re-expressed as a larger type. Just choose the slow path and
10462 // extend to a larger type, then truncate back down.
10463 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10464 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10465 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10466 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10467 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10468 Op.getOperand(2));
10469 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10470 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10471 }
10472 }
10473
10474 // If the subvector vector is a fixed-length type and we don't know VLEN
10475 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10476 // don't know which register of a LMUL group contains the specific subvector
10477 // as we only know the minimum register size. Therefore we must slide the
10478 // vector group up the full amount.
10479 const auto VLen = Subtarget.getRealVLen();
10480 if (SubVecVT.isFixedLengthVector() && !VLen) {
10481 MVT ContainerVT = VecVT;
10482 if (VecVT.isFixedLengthVector()) {
10483 ContainerVT = getContainerForFixedLengthVector(VecVT);
10484 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10485 }
10486
10487 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10488 DAG.getUNDEF(ContainerVT), SubVec,
10489 DAG.getVectorIdxConstant(0, DL));
10490
10491 SDValue Mask =
10492 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10493 // Set the vector length to only the number of elements we care about. Note
10494 // that for slideup this includes the offset.
10495 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10496 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10497
10498 // Use tail agnostic policy if we're inserting over Vec's tail.
10500 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10501 Policy = RISCVII::TAIL_AGNOSTIC;
10502
10503 // If we're inserting into the lowest elements, use a tail undisturbed
10504 // vmv.v.v.
10505 if (OrigIdx == 0) {
10506 SubVec =
10507 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10508 } else {
10509 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10510 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10511 SlideupAmt, Mask, VL, Policy);
10512 }
10513
10514 if (VecVT.isFixedLengthVector())
10515 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10516 return DAG.getBitcast(Op.getValueType(), SubVec);
10517 }
10518
10519 MVT ContainerVecVT = VecVT;
10520 if (VecVT.isFixedLengthVector()) {
10521 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10522 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10523 }
10524
10525 MVT ContainerSubVecVT = SubVecVT;
10526 if (SubVecVT.isFixedLengthVector()) {
10527 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10528 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10529 }
10530
10531 unsigned SubRegIdx;
10532 ElementCount RemIdx;
10533 // insert_subvector scales the index by vscale if the subvector is scalable,
10534 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10535 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10536 if (SubVecVT.isFixedLengthVector()) {
10537 assert(VLen);
10538 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10539 auto Decompose =
10541 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10542 SubRegIdx = Decompose.first;
10543 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10544 (OrigIdx % Vscale));
10545 } else {
10546 auto Decompose =
10548 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10549 SubRegIdx = Decompose.first;
10550 RemIdx = ElementCount::getScalable(Decompose.second);
10551 }
10552
10555 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10556 bool ExactlyVecRegSized =
10557 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10558 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10559
10560 // 1. If the Idx has been completely eliminated and this subvector's size is
10561 // a vector register or a multiple thereof, or the surrounding elements are
10562 // undef, then this is a subvector insert which naturally aligns to a vector
10563 // register. These can easily be handled using subregister manipulation.
10564 // 2. If the subvector isn't an exact multiple of a valid register group size,
10565 // then the insertion must preserve the undisturbed elements of the register.
10566 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10567 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10568 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10569 // of that LMUL=1 type back into the larger vector (resolving to another
10570 // subregister operation). See below for how our VSLIDEUP works. We go via a
10571 // LMUL=1 type to avoid allocating a large register group to hold our
10572 // subvector.
10573 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10574 if (SubVecVT.isFixedLengthVector()) {
10575 // We may get NoSubRegister if inserting at index 0 and the subvec
10576 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10577 if (SubRegIdx == RISCV::NoSubRegister) {
10578 assert(OrigIdx == 0);
10579 return Op;
10580 }
10581
10582 // Use a insert_subvector that will resolve to an insert subreg.
10583 assert(VLen);
10584 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10585 SDValue Insert =
10586 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10587 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10588 if (VecVT.isFixedLengthVector())
10589 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10590 return Insert;
10591 }
10592 return Op;
10593 }
10594
10595 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10596 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10597 // (in our case undisturbed). This means we can set up a subvector insertion
10598 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10599 // size of the subvector.
10600 MVT InterSubVT = ContainerVecVT;
10601 SDValue AlignedExtract = Vec;
10602 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10603 if (SubVecVT.isFixedLengthVector()) {
10604 assert(VLen);
10605 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10606 }
10607 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10608 InterSubVT = getLMUL1VT(ContainerVecVT);
10609 // Extract a subvector equal to the nearest full vector register type. This
10610 // should resolve to a EXTRACT_SUBREG instruction.
10611 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10612 DAG.getVectorIdxConstant(AlignedIdx, DL));
10613 }
10614
10615 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10616 DAG.getUNDEF(InterSubVT), SubVec,
10617 DAG.getVectorIdxConstant(0, DL));
10618
10619 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10620
10621 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10622 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10623
10624 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10626 if (Subtarget.expandVScale(EndIndex) ==
10627 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10628 Policy = RISCVII::TAIL_AGNOSTIC;
10629
10630 // If we're inserting into the lowest elements, use a tail undisturbed
10631 // vmv.v.v.
10632 if (RemIdx.isZero()) {
10633 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10634 SubVec, VL);
10635 } else {
10636 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10637
10638 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10639 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10640
10641 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10642 SlideupAmt, Mask, VL, Policy);
10643 }
10644
10645 // If required, insert this subvector back into the correct vector register.
10646 // This should resolve to an INSERT_SUBREG instruction.
10647 if (ContainerVecVT.bitsGT(InterSubVT))
10648 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10649 DAG.getVectorIdxConstant(AlignedIdx, DL));
10650
10651 if (VecVT.isFixedLengthVector())
10652 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10653
10654 // We might have bitcast from a mask type: cast back to the original type if
10655 // required.
10656 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10657}
10658
10659SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10660 SelectionDAG &DAG) const {
10661 SDValue Vec = Op.getOperand(0);
10662 MVT SubVecVT = Op.getSimpleValueType();
10663 MVT VecVT = Vec.getSimpleValueType();
10664
10665 SDLoc DL(Op);
10666 MVT XLenVT = Subtarget.getXLenVT();
10667 unsigned OrigIdx = Op.getConstantOperandVal(1);
10668 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10669
10670 // With an index of 0 this is a cast-like subvector, which can be performed
10671 // with subregister operations.
10672 if (OrigIdx == 0)
10673 return Op;
10674
10675 // We don't have the ability to slide mask vectors down indexed by their i1
10676 // elements; the smallest we can do is i8. Often we are able to bitcast to
10677 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10678 // from a scalable one, we might not necessarily have enough scalable
10679 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10680 if (SubVecVT.getVectorElementType() == MVT::i1) {
10681 if (VecVT.getVectorMinNumElements() >= 8 &&
10682 SubVecVT.getVectorMinNumElements() >= 8) {
10683 assert(OrigIdx % 8 == 0 && "Invalid index");
10684 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10685 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10686 "Unexpected mask vector lowering");
10687 OrigIdx /= 8;
10688 SubVecVT =
10689 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10690 SubVecVT.isScalableVector());
10691 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10692 VecVT.isScalableVector());
10693 Vec = DAG.getBitcast(VecVT, Vec);
10694 } else {
10695 // We can't slide this mask vector down, indexed by its i1 elements.
10696 // This poses a problem when we wish to extract a scalable vector which
10697 // can't be re-expressed as a larger type. Just choose the slow path and
10698 // extend to a larger type, then truncate back down.
10699 // TODO: We could probably improve this when extracting certain fixed
10700 // from fixed, where we can extract as i8 and shift the correct element
10701 // right to reach the desired subvector?
10702 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10703 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10704 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10705 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10706 Op.getOperand(1));
10707 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10708 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10709 }
10710 }
10711
10712 const auto VLen = Subtarget.getRealVLen();
10713
10714 // If the subvector vector is a fixed-length type and we don't know VLEN
10715 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10716 // don't know which register of a LMUL group contains the specific subvector
10717 // as we only know the minimum register size. Therefore we must slide the
10718 // vector group down the full amount.
10719 if (SubVecVT.isFixedLengthVector() && !VLen) {
10720 MVT ContainerVT = VecVT;
10721 if (VecVT.isFixedLengthVector()) {
10722 ContainerVT = getContainerForFixedLengthVector(VecVT);
10723 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10724 }
10725
10726 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10727 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10728 if (auto ShrunkVT =
10729 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10730 ContainerVT = *ShrunkVT;
10731 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10732 DAG.getVectorIdxConstant(0, DL));
10733 }
10734
10735 SDValue Mask =
10736 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10737 // Set the vector length to only the number of elements we care about. This
10738 // avoids sliding down elements we're going to discard straight away.
10739 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10740 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10741 SDValue Slidedown =
10742 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10743 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10744 // Now we can use a cast-like subvector extract to get the result.
10745 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10746 DAG.getVectorIdxConstant(0, DL));
10747 return DAG.getBitcast(Op.getValueType(), Slidedown);
10748 }
10749
10750 if (VecVT.isFixedLengthVector()) {
10751 VecVT = getContainerForFixedLengthVector(VecVT);
10752 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10753 }
10754
10755 MVT ContainerSubVecVT = SubVecVT;
10756 if (SubVecVT.isFixedLengthVector())
10757 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10758
10759 unsigned SubRegIdx;
10760 ElementCount RemIdx;
10761 // extract_subvector scales the index by vscale if the subvector is scalable,
10762 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10763 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10764 if (SubVecVT.isFixedLengthVector()) {
10765 assert(VLen);
10766 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10767 auto Decompose =
10769 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10770 SubRegIdx = Decompose.first;
10771 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10772 (OrigIdx % Vscale));
10773 } else {
10774 auto Decompose =
10776 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10777 SubRegIdx = Decompose.first;
10778 RemIdx = ElementCount::getScalable(Decompose.second);
10779 }
10780
10781 // If the Idx has been completely eliminated then this is a subvector extract
10782 // which naturally aligns to a vector register. These can easily be handled
10783 // using subregister manipulation. We use an extract_subvector that will
10784 // resolve to an extract subreg.
10785 if (RemIdx.isZero()) {
10786 if (SubVecVT.isFixedLengthVector()) {
10787 assert(VLen);
10788 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10789 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10790 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10791 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10792 }
10793 return Op;
10794 }
10795
10796 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10797 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10798 // divide exactly.
10799 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10800 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10801
10802 // If the vector type is an LMUL-group type, extract a subvector equal to the
10803 // nearest full vector register type.
10804 MVT InterSubVT = VecVT;
10805 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10806 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10807 // we should have successfully decomposed the extract into a subregister.
10808 // We use an extract_subvector that will resolve to a subreg extract.
10809 assert(SubRegIdx != RISCV::NoSubRegister);
10810 (void)SubRegIdx;
10811 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10812 if (SubVecVT.isFixedLengthVector()) {
10813 assert(VLen);
10814 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10815 }
10816 InterSubVT = getLMUL1VT(VecVT);
10817 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10818 DAG.getConstant(Idx, DL, XLenVT));
10819 }
10820
10821 // Slide this vector register down by the desired number of elements in order
10822 // to place the desired subvector starting at element 0.
10823 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10824 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10825 if (SubVecVT.isFixedLengthVector())
10826 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10827 SDValue Slidedown =
10828 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10829 Vec, SlidedownAmt, Mask, VL);
10830
10831 // Now the vector is in the right position, extract our final subvector. This
10832 // should resolve to a COPY.
10833 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10834 DAG.getVectorIdxConstant(0, DL));
10835
10836 // We might have bitcast from a mask type: cast back to the original type if
10837 // required.
10838 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10839}
10840
10841// Widen a vector's operands to i8, then truncate its results back to the
10842// original type, typically i1. All operand and result types must be the same.
10844 SelectionDAG &DAG) {
10845 MVT VT = N.getSimpleValueType();
10846 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10848 for (SDValue Op : N->ops()) {
10849 assert(Op.getSimpleValueType() == VT &&
10850 "Operands and result must be same type");
10851 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10852 }
10853
10854 unsigned NumVals = N->getNumValues();
10855
10857 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10858 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10859 SmallVector<SDValue, 4> TruncVals;
10860 for (unsigned I = 0; I < NumVals; I++) {
10861 TruncVals.push_back(
10862 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10863 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10864 }
10865
10866 if (TruncVals.size() > 1)
10867 return DAG.getMergeValues(TruncVals, DL);
10868 return TruncVals.front();
10869}
10870
10871SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10872 SelectionDAG &DAG) const {
10873 SDLoc DL(Op);
10874 MVT VecVT = Op.getSimpleValueType();
10875
10876 assert(VecVT.isScalableVector() &&
10877 "vector_interleave on non-scalable vector!");
10878
10879 // 1 bit element vectors need to be widened to e8
10880 if (VecVT.getVectorElementType() == MVT::i1)
10881 return widenVectorOpsToi8(Op, DL, DAG);
10882
10883 // If the VT is LMUL=8, we need to split and reassemble.
10884 if (VecVT.getSizeInBits().getKnownMinValue() ==
10885 (8 * RISCV::RVVBitsPerBlock)) {
10886 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10887 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10888 EVT SplitVT = Op0Lo.getValueType();
10889
10891 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10893 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10894
10895 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10896 ResLo.getValue(0), ResHi.getValue(0));
10897 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10898 ResHi.getValue(1));
10899 return DAG.getMergeValues({Even, Odd}, DL);
10900 }
10901
10902 // Concatenate the two vectors as one vector to deinterleave
10903 MVT ConcatVT =
10906 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10907 Op.getOperand(0), Op.getOperand(1));
10908
10909 // We can deinterleave through vnsrl.wi if the element type is smaller than
10910 // ELEN
10911 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10912 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
10913 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
10914 return DAG.getMergeValues({Even, Odd}, DL);
10915 }
10916
10917 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
10918 // possibly mask vector, then extract the required subvector. Doing this
10919 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
10920 // creation to be rematerialized during register allocation to reduce
10921 // register pressure if needed.
10922
10923 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
10924
10925 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
10926 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
10927 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
10928 DAG.getVectorIdxConstant(0, DL));
10929
10930 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
10931 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
10932 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
10933 DAG.getVectorIdxConstant(0, DL));
10934
10935 // vcompress the even and odd elements into two separate vectors
10936 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
10937 EvenMask, DAG.getUNDEF(ConcatVT));
10938 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
10939 OddMask, DAG.getUNDEF(ConcatVT));
10940
10941 // Extract the result half of the gather for even and odd
10942 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10943 DAG.getVectorIdxConstant(0, DL));
10944 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10945 DAG.getVectorIdxConstant(0, DL));
10946
10947 return DAG.getMergeValues({Even, Odd}, DL);
10948}
10949
10950SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10951 SelectionDAG &DAG) const {
10952 SDLoc DL(Op);
10953 MVT VecVT = Op.getSimpleValueType();
10954
10955 assert(VecVT.isScalableVector() &&
10956 "vector_interleave on non-scalable vector!");
10957
10958 // i1 vectors need to be widened to i8
10959 if (VecVT.getVectorElementType() == MVT::i1)
10960 return widenVectorOpsToi8(Op, DL, DAG);
10961
10962 MVT XLenVT = Subtarget.getXLenVT();
10963 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10964
10965 // If the VT is LMUL=8, we need to split and reassemble.
10966 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10967 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10968 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10969 EVT SplitVT = Op0Lo.getValueType();
10970
10972 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10974 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10975
10976 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10977 ResLo.getValue(0), ResLo.getValue(1));
10978 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10979 ResHi.getValue(0), ResHi.getValue(1));
10980 return DAG.getMergeValues({Lo, Hi}, DL);
10981 }
10982
10983 SDValue Interleaved;
10984
10985 // If the element type is smaller than ELEN, then we can interleave with
10986 // vwaddu.vv and vwmaccu.vx
10987 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10988 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10989 DAG, Subtarget);
10990 } else {
10991 // Otherwise, fallback to using vrgathere16.vv
10992 MVT ConcatVT =
10995 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10996 Op.getOperand(0), Op.getOperand(1));
10997
10998 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10999
11000 // 0 1 2 3 4 5 6 7 ...
11001 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11002
11003 // 1 1 1 1 1 1 1 1 ...
11004 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11005
11006 // 1 0 1 0 1 0 1 0 ...
11007 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11008 OddMask = DAG.getSetCC(
11009 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11010 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11012
11013 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11014
11015 // Build up the index vector for interleaving the concatenated vector
11016 // 0 0 1 1 2 2 3 3 ...
11017 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11018 // 0 n 1 n+1 2 n+2 3 n+3 ...
11019 Idx =
11020 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11021
11022 // Then perform the interleave
11023 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11024 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11025 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11026 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11027 }
11028
11029 // Extract the two halves from the interleaved result
11030 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11031 DAG.getVectorIdxConstant(0, DL));
11032 SDValue Hi = DAG.getNode(
11033 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11035
11036 return DAG.getMergeValues({Lo, Hi}, DL);
11037}
11038
11039// Lower step_vector to the vid instruction. Any non-identity step value must
11040// be accounted for my manual expansion.
11041SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11042 SelectionDAG &DAG) const {
11043 SDLoc DL(Op);
11044 MVT VT = Op.getSimpleValueType();
11045 assert(VT.isScalableVector() && "Expected scalable vector");
11046 MVT XLenVT = Subtarget.getXLenVT();
11047 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11048 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11049 uint64_t StepValImm = Op.getConstantOperandVal(0);
11050 if (StepValImm != 1) {
11051 if (isPowerOf2_64(StepValImm)) {
11052 SDValue StepVal =
11053 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11054 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11055 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11056 } else {
11057 SDValue StepVal = lowerScalarSplat(
11058 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11059 VL, VT, DL, DAG, Subtarget);
11060 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11061 }
11062 }
11063 return StepVec;
11064}
11065
11066// Implement vector_reverse using vrgather.vv with indices determined by
11067// subtracting the id of each element from (VLMAX-1). This will convert
11068// the indices like so:
11069// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11070// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11071SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11072 SelectionDAG &DAG) const {
11073 SDLoc DL(Op);
11074 MVT VecVT = Op.getSimpleValueType();
11075 if (VecVT.getVectorElementType() == MVT::i1) {
11076 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11077 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11078 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11079 return DAG.getSetCC(DL, VecVT, Op2,
11080 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11081 }
11082
11083 MVT ContainerVT = VecVT;
11084 SDValue Vec = Op.getOperand(0);
11085 if (VecVT.isFixedLengthVector()) {
11086 ContainerVT = getContainerForFixedLengthVector(VecVT);
11087 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11088 }
11089
11090 MVT XLenVT = Subtarget.getXLenVT();
11091 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11092
11093 // On some uarchs vrgather.vv will read from every input register for each
11094 // output register, regardless of the indices. However to reverse a vector
11095 // each output register only needs to read from one register. So decompose it
11096 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11097 // O(LMUL^2).
11098 //
11099 // vsetvli a1, zero, e64, m4, ta, ma
11100 // vrgatherei16.vv v12, v8, v16
11101 // ->
11102 // vsetvli a1, zero, e64, m1, ta, ma
11103 // vrgather.vv v15, v8, v16
11104 // vrgather.vv v14, v9, v16
11105 // vrgather.vv v13, v10, v16
11106 // vrgather.vv v12, v11, v16
11107 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11108 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11109 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11110 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11111 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11112 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11113
11114 // Fixed length vectors might not fit exactly into their container, and so
11115 // leave a gap in the front of the vector after being reversed. Slide this
11116 // away.
11117 //
11118 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11119 // 0 1 2 3 x x x x <- reverse
11120 // x x x x 0 1 2 3 <- vslidedown.vx
11121 if (VecVT.isFixedLengthVector()) {
11122 SDValue Offset = DAG.getNode(
11123 ISD::SUB, DL, XLenVT,
11124 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11125 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11126 Concat =
11127 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11128 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11129 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11130 }
11131 return Concat;
11132 }
11133
11134 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11135 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11136 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11137 unsigned MaxVLMAX =
11138 VecVT.isFixedLengthVector()
11139 ? VecVT.getVectorNumElements()
11140 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11141
11142 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11143 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11144
11145 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11146 // to use vrgatherei16.vv.
11147 if (MaxVLMAX > 256 && EltSize == 8) {
11148 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11149 // Reverse each half, then reassemble them in reverse order.
11150 // NOTE: It's also possible that after splitting that VLMAX no longer
11151 // requires vrgatherei16.vv.
11152 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11153 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11154 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11155 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11156 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11157 // Reassemble the low and high pieces reversed.
11158 // FIXME: This is a CONCAT_VECTORS.
11159 SDValue Res =
11160 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11161 DAG.getVectorIdxConstant(0, DL));
11162 return DAG.getNode(
11163 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11164 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11165 }
11166
11167 // Just promote the int type to i16 which will double the LMUL.
11168 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11169 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11170 }
11171
11172 // At LMUL > 1, do the index computation in 16 bits to reduce register
11173 // pressure.
11174 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11175 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11176 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11177 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11178 IntVT = IntVT.changeVectorElementType(MVT::i16);
11179 }
11180
11181 // Calculate VLMAX-1 for the desired SEW.
11182 SDValue VLMinus1 = DAG.getNode(
11183 ISD::SUB, DL, XLenVT,
11184 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11185 DAG.getConstant(1, DL, XLenVT));
11186
11187 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11188 bool IsRV32E64 =
11189 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11190 SDValue SplatVL;
11191 if (!IsRV32E64)
11192 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11193 else
11194 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11195 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11196
11197 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11198 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11199 DAG.getUNDEF(IntVT), Mask, VL);
11200
11201 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11202 DAG.getUNDEF(ContainerVT), Mask, VL);
11203 if (VecVT.isFixedLengthVector())
11204 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11205 return Gather;
11206}
11207
11208SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11209 SelectionDAG &DAG) const {
11210 SDLoc DL(Op);
11211 SDValue V1 = Op.getOperand(0);
11212 SDValue V2 = Op.getOperand(1);
11213 MVT XLenVT = Subtarget.getXLenVT();
11214 MVT VecVT = Op.getSimpleValueType();
11215
11216 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11217
11218 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11219 SDValue DownOffset, UpOffset;
11220 if (ImmValue >= 0) {
11221 // The operand is a TargetConstant, we need to rebuild it as a regular
11222 // constant.
11223 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11224 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11225 } else {
11226 // The operand is a TargetConstant, we need to rebuild it as a regular
11227 // constant rather than negating the original operand.
11228 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11229 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11230 }
11231
11232 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11233
11234 SDValue SlideDown =
11235 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11236 DownOffset, TrueMask, UpOffset);
11237 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11238 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11240}
11241
11242SDValue
11243RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11244 SelectionDAG &DAG) const {
11245 SDLoc DL(Op);
11246 auto *Load = cast<LoadSDNode>(Op);
11247
11249 Load->getMemoryVT(),
11250 *Load->getMemOperand()) &&
11251 "Expecting a correctly-aligned load");
11252
11253 MVT VT = Op.getSimpleValueType();
11254 MVT XLenVT = Subtarget.getXLenVT();
11255 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11256
11257 // If we know the exact VLEN and our fixed length vector completely fills
11258 // the container, use a whole register load instead.
11259 const auto [MinVLMAX, MaxVLMAX] =
11260 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11261 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11262 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11263 MachineMemOperand *MMO = Load->getMemOperand();
11264 SDValue NewLoad =
11265 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11266 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11267 MMO->getAAInfo(), MMO->getRanges());
11268 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11269 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11270 }
11271
11272 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11273
11274 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11275 SDValue IntID = DAG.getTargetConstant(
11276 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11277 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11278 if (!IsMaskOp)
11279 Ops.push_back(DAG.getUNDEF(ContainerVT));
11280 Ops.push_back(Load->getBasePtr());
11281 Ops.push_back(VL);
11282 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11283 SDValue NewLoad =
11285 Load->getMemoryVT(), Load->getMemOperand());
11286
11287 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11288 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11289}
11290
11291SDValue
11292RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11293 SelectionDAG &DAG) const {
11294 SDLoc DL(Op);
11295 auto *Store = cast<StoreSDNode>(Op);
11296
11298 Store->getMemoryVT(),
11299 *Store->getMemOperand()) &&
11300 "Expecting a correctly-aligned store");
11301
11302 SDValue StoreVal = Store->getValue();
11303 MVT VT = StoreVal.getSimpleValueType();
11304 MVT XLenVT = Subtarget.getXLenVT();
11305
11306 // If the size less than a byte, we need to pad with zeros to make a byte.
11307 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11308 VT = MVT::v8i1;
11309 StoreVal =
11310 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11311 StoreVal, DAG.getVectorIdxConstant(0, DL));
11312 }
11313
11314 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11315
11316 SDValue NewValue =
11317 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11318
11319 // If we know the exact VLEN and our fixed length vector completely fills
11320 // the container, use a whole register store instead.
11321 const auto [MinVLMAX, MaxVLMAX] =
11322 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11323 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11324 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11325 MachineMemOperand *MMO = Store->getMemOperand();
11326 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11327 MMO->getPointerInfo(), MMO->getBaseAlign(),
11328 MMO->getFlags(), MMO->getAAInfo());
11329 }
11330
11331 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11332
11333 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11334 SDValue IntID = DAG.getTargetConstant(
11335 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11336 return DAG.getMemIntrinsicNode(
11337 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11338 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11339 Store->getMemoryVT(), Store->getMemOperand());
11340}
11341
11342SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11343 SelectionDAG &DAG) const {
11344 SDLoc DL(Op);
11345 MVT VT = Op.getSimpleValueType();
11346
11347 const auto *MemSD = cast<MemSDNode>(Op);
11348 EVT MemVT = MemSD->getMemoryVT();
11349 MachineMemOperand *MMO = MemSD->getMemOperand();
11350 SDValue Chain = MemSD->getChain();
11351 SDValue BasePtr = MemSD->getBasePtr();
11352
11353 SDValue Mask, PassThru, VL;
11354 bool IsExpandingLoad = false;
11355 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11356 Mask = VPLoad->getMask();
11357 PassThru = DAG.getUNDEF(VT);
11358 VL = VPLoad->getVectorLength();
11359 } else {
11360 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11361 Mask = MLoad->getMask();
11362 PassThru = MLoad->getPassThru();
11363 IsExpandingLoad = MLoad->isExpandingLoad();
11364 }
11365
11366 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11367
11368 MVT XLenVT = Subtarget.getXLenVT();
11369
11370 MVT ContainerVT = VT;
11371 if (VT.isFixedLengthVector()) {
11372 ContainerVT = getContainerForFixedLengthVector(VT);
11373 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11374 if (!IsUnmasked) {
11375 MVT MaskVT = getMaskTypeFor(ContainerVT);
11376 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11377 }
11378 }
11379
11380 if (!VL)
11381 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11382
11383 SDValue ExpandingVL;
11384 if (!IsUnmasked && IsExpandingLoad) {
11385 ExpandingVL = VL;
11386 VL =
11387 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11388 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11389 }
11390
11391 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11392 : Intrinsic::riscv_vle_mask;
11393 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11394 if (IntID == Intrinsic::riscv_vle)
11395 Ops.push_back(DAG.getUNDEF(ContainerVT));
11396 else
11397 Ops.push_back(PassThru);
11398 Ops.push_back(BasePtr);
11399 if (IntID == Intrinsic::riscv_vle_mask)
11400 Ops.push_back(Mask);
11401 Ops.push_back(VL);
11402 if (IntID == Intrinsic::riscv_vle_mask)
11403 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11404
11405 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11406
11407 SDValue Result =
11408 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11409 Chain = Result.getValue(1);
11410 if (ExpandingVL) {
11411 MVT IndexVT = ContainerVT;
11412 if (ContainerVT.isFloatingPoint())
11413 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11414
11415 MVT IndexEltVT = IndexVT.getVectorElementType();
11416 bool UseVRGATHEREI16 = false;
11417 // If index vector is an i8 vector and the element count exceeds 256, we
11418 // should change the element type of index vector to i16 to avoid
11419 // overflow.
11420 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11421 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11422 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11423 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11424 UseVRGATHEREI16 = true;
11425 }
11426
11427 SDValue Iota =
11428 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11429 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11430 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11431 Result =
11432 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11434 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11435 }
11436
11437 if (VT.isFixedLengthVector())
11438 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11439
11440 return DAG.getMergeValues({Result, Chain}, DL);
11441}
11442
11443SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11444 SelectionDAG &DAG) const {
11445 SDLoc DL(Op);
11446
11447 const auto *MemSD = cast<MemSDNode>(Op);
11448 EVT MemVT = MemSD->getMemoryVT();
11449 MachineMemOperand *MMO = MemSD->getMemOperand();
11450 SDValue Chain = MemSD->getChain();
11451 SDValue BasePtr = MemSD->getBasePtr();
11452 SDValue Val, Mask, VL;
11453
11454 bool IsCompressingStore = false;
11455 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11456 Val = VPStore->getValue();
11457 Mask = VPStore->getMask();
11458 VL = VPStore->getVectorLength();
11459 } else {
11460 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11461 Val = MStore->getValue();
11462 Mask = MStore->getMask();
11463 IsCompressingStore = MStore->isCompressingStore();
11464 }
11465
11466 bool IsUnmasked =
11467 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11468
11469 MVT VT = Val.getSimpleValueType();
11470 MVT XLenVT = Subtarget.getXLenVT();
11471
11472 MVT ContainerVT = VT;
11473 if (VT.isFixedLengthVector()) {
11474 ContainerVT = getContainerForFixedLengthVector(VT);
11475
11476 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11477 if (!IsUnmasked || IsCompressingStore) {
11478 MVT MaskVT = getMaskTypeFor(ContainerVT);
11479 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11480 }
11481 }
11482
11483 if (!VL)
11484 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11485
11486 if (IsCompressingStore) {
11487 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11488 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11489 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11490 VL =
11491 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11492 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11493 }
11494
11495 unsigned IntID =
11496 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11497 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11498 Ops.push_back(Val);
11499 Ops.push_back(BasePtr);
11500 if (!IsUnmasked)
11501 Ops.push_back(Mask);
11502 Ops.push_back(VL);
11503
11505 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11506}
11507
11508SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11509 SelectionDAG &DAG) const {
11510 SDLoc DL(Op);
11511 SDValue Val = Op.getOperand(0);
11512 SDValue Mask = Op.getOperand(1);
11513 SDValue Passthru = Op.getOperand(2);
11514
11515 MVT VT = Val.getSimpleValueType();
11516 MVT XLenVT = Subtarget.getXLenVT();
11517 MVT ContainerVT = VT;
11518 if (VT.isFixedLengthVector()) {
11519 ContainerVT = getContainerForFixedLengthVector(VT);
11520 MVT MaskVT = getMaskTypeFor(ContainerVT);
11521 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11522 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11523 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11524 }
11525
11526 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11527 SDValue Res =
11528 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11529 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11530 Passthru, Val, Mask, VL);
11531
11532 if (VT.isFixedLengthVector())
11533 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11534
11535 return Res;
11536}
11537
11538SDValue
11539RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11540 SelectionDAG &DAG) const {
11541 MVT InVT = Op.getOperand(0).getSimpleValueType();
11542 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11543
11544 MVT VT = Op.getSimpleValueType();
11545
11546 SDValue Op1 =
11547 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11548 SDValue Op2 =
11549 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11550
11551 SDLoc DL(Op);
11552 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11553 DAG, Subtarget);
11554 MVT MaskVT = getMaskTypeFor(ContainerVT);
11555
11556 SDValue Cmp =
11557 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11558 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11559
11560 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11561}
11562
11563SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11564 SelectionDAG &DAG) const {
11565 unsigned Opc = Op.getOpcode();
11566 SDLoc DL(Op);
11567 SDValue Chain = Op.getOperand(0);
11568 SDValue Op1 = Op.getOperand(1);
11569 SDValue Op2 = Op.getOperand(2);
11570 SDValue CC = Op.getOperand(3);
11571 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11572 MVT VT = Op.getSimpleValueType();
11573 MVT InVT = Op1.getSimpleValueType();
11574
11575 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11576 // condition code.
11577 if (Opc == ISD::STRICT_FSETCCS) {
11578 // Expand strict_fsetccs(x, oeq) to
11579 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11580 SDVTList VTList = Op->getVTList();
11581 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11582 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11583 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11584 Op2, OLECCVal);
11585 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11586 Op1, OLECCVal);
11587 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11588 Tmp1.getValue(1), Tmp2.getValue(1));
11589 // Tmp1 and Tmp2 might be the same node.
11590 if (Tmp1 != Tmp2)
11591 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11592 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11593 }
11594
11595 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11596 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11597 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11598 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11599 Op2, OEQCCVal);
11600 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11601 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11602 }
11603 }
11604
11605 MVT ContainerInVT = InVT;
11606 if (InVT.isFixedLengthVector()) {
11607 ContainerInVT = getContainerForFixedLengthVector(InVT);
11608 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11609 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11610 }
11611 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11612
11613 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11614
11615 SDValue Res;
11616 if (Opc == ISD::STRICT_FSETCC &&
11617 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11618 CCVal == ISD::SETOLE)) {
11619 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11620 // active when both input elements are ordered.
11621 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11622 SDValue OrderMask1 = DAG.getNode(
11623 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11624 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11625 True, VL});
11626 SDValue OrderMask2 = DAG.getNode(
11627 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11628 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11629 True, VL});
11630 Mask =
11631 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11632 // Use Mask as the passthru operand to let the result be 0 if either of the
11633 // inputs is unordered.
11635 DAG.getVTList(MaskVT, MVT::Other),
11636 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11637 } else {
11638 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11640 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11641 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11642 }
11643
11644 if (VT.isFixedLengthVector()) {
11645 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11646 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11647 }
11648 return Res;
11649}
11650
11651// Lower vector ABS to smax(X, sub(0, X)).
11652SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11653 SDLoc DL(Op);
11654 MVT VT = Op.getSimpleValueType();
11655 SDValue X = Op.getOperand(0);
11656
11657 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11658 "Unexpected type for ISD::ABS");
11659
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector()) {
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11664 }
11665
11666 SDValue Mask, VL;
11667 if (Op->getOpcode() == ISD::VP_ABS) {
11668 Mask = Op->getOperand(1);
11669 if (VT.isFixedLengthVector())
11670 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11671 Subtarget);
11672 VL = Op->getOperand(2);
11673 } else
11674 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11675
11676 SDValue SplatZero = DAG.getNode(
11677 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11678 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11679 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11680 DAG.getUNDEF(ContainerVT), Mask, VL);
11681 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11682 DAG.getUNDEF(ContainerVT), Mask, VL);
11683
11684 if (VT.isFixedLengthVector())
11685 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11686 return Max;
11687}
11688
11689SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11690 SDValue Op, SelectionDAG &DAG) const {
11691 SDLoc DL(Op);
11692 MVT VT = Op.getSimpleValueType();
11693 SDValue Mag = Op.getOperand(0);
11694 SDValue Sign = Op.getOperand(1);
11695 assert(Mag.getValueType() == Sign.getValueType() &&
11696 "Can only handle COPYSIGN with matching types.");
11697
11698 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11699 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11700 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11701
11702 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11703
11704 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11705 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11706
11707 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11708}
11709
11710SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11711 SDValue Op, SelectionDAG &DAG) const {
11712 MVT VT = Op.getSimpleValueType();
11713 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11714
11715 MVT I1ContainerVT =
11716 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11717
11718 SDValue CC =
11719 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11720 SDValue Op1 =
11721 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11722 SDValue Op2 =
11723 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11724
11725 SDLoc DL(Op);
11726 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11727
11728 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11729 Op2, DAG.getUNDEF(ContainerVT), VL);
11730
11731 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11732}
11733
11734SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11735 SelectionDAG &DAG) const {
11736 unsigned NewOpc = getRISCVVLOp(Op);
11737 bool HasPassthruOp = hasPassthruOp(NewOpc);
11738 bool HasMask = hasMaskOp(NewOpc);
11739
11740 MVT VT = Op.getSimpleValueType();
11741 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11742
11743 // Create list of operands by converting existing ones to scalable types.
11745 for (const SDValue &V : Op->op_values()) {
11746 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11747
11748 // Pass through non-vector operands.
11749 if (!V.getValueType().isVector()) {
11750 Ops.push_back(V);
11751 continue;
11752 }
11753
11754 // "cast" fixed length vector to a scalable vector.
11755 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11756 "Only fixed length vectors are supported!");
11757 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11758 }
11759
11760 SDLoc DL(Op);
11761 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11762 if (HasPassthruOp)
11763 Ops.push_back(DAG.getUNDEF(ContainerVT));
11764 if (HasMask)
11765 Ops.push_back(Mask);
11766 Ops.push_back(VL);
11767
11768 // StrictFP operations have two result values. Their lowered result should
11769 // have same result count.
11770 if (Op->isStrictFPOpcode()) {
11771 SDValue ScalableRes =
11772 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11773 Op->getFlags());
11774 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11775 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11776 }
11777
11778 SDValue ScalableRes =
11779 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11780 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11781}
11782
11783// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11784// * Operands of each node are assumed to be in the same order.
11785// * The EVL operand is promoted from i32 to i64 on RV64.
11786// * Fixed-length vectors are converted to their scalable-vector container
11787// types.
11788SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11789 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11790 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11791
11792 SDLoc DL(Op);
11793 MVT VT = Op.getSimpleValueType();
11795
11796 MVT ContainerVT = VT;
11797 if (VT.isFixedLengthVector())
11798 ContainerVT = getContainerForFixedLengthVector(VT);
11799
11800 for (const auto &OpIdx : enumerate(Op->ops())) {
11801 SDValue V = OpIdx.value();
11802 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11803 // Add dummy passthru value before the mask. Or if there isn't a mask,
11804 // before EVL.
11805 if (HasPassthruOp) {
11806 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11807 if (MaskIdx) {
11808 if (*MaskIdx == OpIdx.index())
11809 Ops.push_back(DAG.getUNDEF(ContainerVT));
11810 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11811 OpIdx.index()) {
11812 if (Op.getOpcode() == ISD::VP_MERGE) {
11813 // For VP_MERGE, copy the false operand instead of an undef value.
11814 Ops.push_back(Ops.back());
11815 } else {
11816 assert(Op.getOpcode() == ISD::VP_SELECT);
11817 // For VP_SELECT, add an undef value.
11818 Ops.push_back(DAG.getUNDEF(ContainerVT));
11819 }
11820 }
11821 }
11822 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11823 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11824 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11826 Subtarget.getXLenVT()));
11827 // Pass through operands which aren't fixed-length vectors.
11828 if (!V.getValueType().isFixedLengthVector()) {
11829 Ops.push_back(V);
11830 continue;
11831 }
11832 // "cast" fixed length vector to a scalable vector.
11833 MVT OpVT = V.getSimpleValueType();
11834 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11835 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11836 "Only fixed length vectors are supported!");
11837 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11838 }
11839
11840 if (!VT.isFixedLengthVector())
11841 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11842
11843 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11844
11845 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11846}
11847
11848SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11849 SelectionDAG &DAG) const {
11850 SDLoc DL(Op);
11851 MVT VT = Op.getSimpleValueType();
11852
11853 SDValue Src = Op.getOperand(0);
11854 // NOTE: Mask is dropped.
11855 SDValue VL = Op.getOperand(2);
11856
11857 MVT ContainerVT = VT;
11858 if (VT.isFixedLengthVector()) {
11859 ContainerVT = getContainerForFixedLengthVector(VT);
11860 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11861 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11862 }
11863
11864 MVT XLenVT = Subtarget.getXLenVT();
11865 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11866 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11867 DAG.getUNDEF(ContainerVT), Zero, VL);
11868
11869 SDValue SplatValue = DAG.getSignedConstant(
11870 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11871 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11872 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11873
11874 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11875 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11876 if (!VT.isFixedLengthVector())
11877 return Result;
11878 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11879}
11880
11881SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11882 SelectionDAG &DAG) const {
11883 SDLoc DL(Op);
11884 MVT VT = Op.getSimpleValueType();
11885
11886 SDValue Op1 = Op.getOperand(0);
11887 SDValue Op2 = Op.getOperand(1);
11888 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11889 // NOTE: Mask is dropped.
11890 SDValue VL = Op.getOperand(4);
11891
11892 MVT ContainerVT = VT;
11893 if (VT.isFixedLengthVector()) {
11894 ContainerVT = getContainerForFixedLengthVector(VT);
11895 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11896 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11897 }
11898
11900 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11901
11902 switch (Condition) {
11903 default:
11904 break;
11905 // X != Y --> (X^Y)
11906 case ISD::SETNE:
11907 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11908 break;
11909 // X == Y --> ~(X^Y)
11910 case ISD::SETEQ: {
11911 SDValue Temp =
11912 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11913 Result =
11914 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11915 break;
11916 }
11917 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11918 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11919 case ISD::SETGT:
11920 case ISD::SETULT: {
11921 SDValue Temp =
11922 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11923 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11924 break;
11925 }
11926 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11927 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11928 case ISD::SETLT:
11929 case ISD::SETUGT: {
11930 SDValue Temp =
11931 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11932 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11933 break;
11934 }
11935 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11936 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11937 case ISD::SETGE:
11938 case ISD::SETULE: {
11939 SDValue Temp =
11940 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11941 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11942 break;
11943 }
11944 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11945 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11946 case ISD::SETLE:
11947 case ISD::SETUGE: {
11948 SDValue Temp =
11949 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11950 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11951 break;
11952 }
11953 }
11954
11955 if (!VT.isFixedLengthVector())
11956 return Result;
11957 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11958}
11959
11960// Lower Floating-Point/Integer Type-Convert VP SDNodes
11961SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11962 SelectionDAG &DAG) const {
11963 SDLoc DL(Op);
11964
11965 SDValue Src = Op.getOperand(0);
11966 SDValue Mask = Op.getOperand(1);
11967 SDValue VL = Op.getOperand(2);
11968 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11969
11970 MVT DstVT = Op.getSimpleValueType();
11971 MVT SrcVT = Src.getSimpleValueType();
11972 if (DstVT.isFixedLengthVector()) {
11973 DstVT = getContainerForFixedLengthVector(DstVT);
11974 SrcVT = getContainerForFixedLengthVector(SrcVT);
11975 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11976 MVT MaskVT = getMaskTypeFor(DstVT);
11977 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11978 }
11979
11980 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11981 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11982
11984 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11985 if (SrcVT.isInteger()) {
11986 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11987
11988 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11991
11992 // Do we need to do any pre-widening before converting?
11993 if (SrcEltSize == 1) {
11994 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11995 MVT XLenVT = Subtarget.getXLenVT();
11996 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11997 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11998 DAG.getUNDEF(IntVT), Zero, VL);
11999 SDValue One = DAG.getSignedConstant(
12000 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12001 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12002 DAG.getUNDEF(IntVT), One, VL);
12003 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12004 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12005 } else if (DstEltSize > (2 * SrcEltSize)) {
12006 // Widen before converting.
12007 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12008 DstVT.getVectorElementCount());
12009 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12010 }
12011
12012 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12013 } else {
12014 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12015 "Wrong input/output vector types");
12016
12017 // Convert f16 to f32 then convert f32 to i64.
12018 if (DstEltSize > (2 * SrcEltSize)) {
12019 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12020 MVT InterimFVT =
12021 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12022 Src =
12023 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12024 }
12025
12026 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12027 }
12028 } else { // Narrowing + Conversion
12029 if (SrcVT.isInteger()) {
12030 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12031 // First do a narrowing convert to an FP type half the size, then round
12032 // the FP type to a small FP type if needed.
12033
12034 MVT InterimFVT = DstVT;
12035 if (SrcEltSize > (2 * DstEltSize)) {
12036 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12037 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12038 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12039 }
12040
12041 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12042
12043 if (InterimFVT != DstVT) {
12044 Src = Result;
12045 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12046 }
12047 } else {
12048 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12049 "Wrong input/output vector types");
12050 // First do a narrowing conversion to an integer half the size, then
12051 // truncate if needed.
12052
12053 if (DstEltSize == 1) {
12054 // First convert to the same size integer, then convert to mask using
12055 // setcc.
12056 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12057 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12058 DstVT.getVectorElementCount());
12059 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12060
12061 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12062 // otherwise the conversion was undefined.
12063 MVT XLenVT = Subtarget.getXLenVT();
12064 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12065 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12066 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12067 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12068 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12069 DAG.getUNDEF(DstVT), Mask, VL});
12070 } else {
12071 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12072 DstVT.getVectorElementCount());
12073
12074 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12075
12076 while (InterimIVT != DstVT) {
12077 SrcEltSize /= 2;
12078 Src = Result;
12079 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12080 DstVT.getVectorElementCount());
12081 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12082 Src, Mask, VL);
12083 }
12084 }
12085 }
12086 }
12087
12088 MVT VT = Op.getSimpleValueType();
12089 if (!VT.isFixedLengthVector())
12090 return Result;
12091 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12092}
12093
12094SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12095 SelectionDAG &DAG) const {
12096 SDLoc DL(Op);
12097 MVT VT = Op.getSimpleValueType();
12098 MVT XLenVT = Subtarget.getXLenVT();
12099
12100 SDValue Mask = Op.getOperand(0);
12101 SDValue TrueVal = Op.getOperand(1);
12102 SDValue FalseVal = Op.getOperand(2);
12103 SDValue VL = Op.getOperand(3);
12104
12105 // Use default legalization if a vector of EVL type would be legal.
12106 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12108 if (isTypeLegal(EVLVecVT))
12109 return SDValue();
12110
12111 MVT ContainerVT = VT;
12112 if (VT.isFixedLengthVector()) {
12113 ContainerVT = getContainerForFixedLengthVector(VT);
12114 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12115 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12116 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12117 }
12118
12119 // Promote to a vector of i8.
12120 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12121
12122 // Promote TrueVal and FalseVal using VLMax.
12123 // FIXME: Is there a better way to do this?
12124 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12125 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12126 DAG.getUNDEF(PromotedVT),
12127 DAG.getConstant(1, DL, XLenVT), VLMax);
12128 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12129 DAG.getUNDEF(PromotedVT),
12130 DAG.getConstant(0, DL, XLenVT), VLMax);
12131 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12132 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12133 // Any element past VL uses FalseVal, so use VLMax
12134 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12135 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12136
12137 // VP_MERGE the two promoted values.
12138 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12139 TrueVal, FalseVal, FalseVal, VL);
12140
12141 // Convert back to mask.
12142 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12143 SDValue Result = DAG.getNode(
12144 RISCVISD::SETCC_VL, DL, ContainerVT,
12145 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12146 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12147
12148 if (VT.isFixedLengthVector())
12149 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12150 return Result;
12151}
12152
12153SDValue
12154RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12155 SelectionDAG &DAG) const {
12156 SDLoc DL(Op);
12157
12158 SDValue Op1 = Op.getOperand(0);
12159 SDValue Op2 = Op.getOperand(1);
12160 SDValue Offset = Op.getOperand(2);
12161 SDValue Mask = Op.getOperand(3);
12162 SDValue EVL1 = Op.getOperand(4);
12163 SDValue EVL2 = Op.getOperand(5);
12164
12165 const MVT XLenVT = Subtarget.getXLenVT();
12166 MVT VT = Op.getSimpleValueType();
12167 MVT ContainerVT = VT;
12168 if (VT.isFixedLengthVector()) {
12169 ContainerVT = getContainerForFixedLengthVector(VT);
12170 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12171 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12172 MVT MaskVT = getMaskTypeFor(ContainerVT);
12173 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12174 }
12175
12176 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12177 if (IsMaskVector) {
12178 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12179
12180 // Expand input operands
12181 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12182 DAG.getUNDEF(ContainerVT),
12183 DAG.getConstant(1, DL, XLenVT), EVL1);
12184 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12185 DAG.getUNDEF(ContainerVT),
12186 DAG.getConstant(0, DL, XLenVT), EVL1);
12187 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12188 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12189
12190 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12191 DAG.getUNDEF(ContainerVT),
12192 DAG.getConstant(1, DL, XLenVT), EVL2);
12193 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12194 DAG.getUNDEF(ContainerVT),
12195 DAG.getConstant(0, DL, XLenVT), EVL2);
12196 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12197 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12198 }
12199
12200 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12201 SDValue DownOffset, UpOffset;
12202 if (ImmValue >= 0) {
12203 // The operand is a TargetConstant, we need to rebuild it as a regular
12204 // constant.
12205 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12206 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12207 } else {
12208 // The operand is a TargetConstant, we need to rebuild it as a regular
12209 // constant rather than negating the original operand.
12210 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12211 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12212 }
12213
12214 SDValue SlideDown =
12215 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12216 Op1, DownOffset, Mask, UpOffset);
12217 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12218 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12219
12220 if (IsMaskVector) {
12221 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12222 Result = DAG.getNode(
12223 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12224 {Result, DAG.getConstant(0, DL, ContainerVT),
12225 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12226 Mask, EVL2});
12227 }
12228
12229 if (!VT.isFixedLengthVector())
12230 return Result;
12231 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12232}
12233
12234SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12235 SelectionDAG &DAG) const {
12236 SDLoc DL(Op);
12237 SDValue Val = Op.getOperand(0);
12238 SDValue Mask = Op.getOperand(1);
12239 SDValue VL = Op.getOperand(2);
12240 MVT VT = Op.getSimpleValueType();
12241
12242 MVT ContainerVT = VT;
12243 if (VT.isFixedLengthVector()) {
12244 ContainerVT = getContainerForFixedLengthVector(VT);
12245 MVT MaskVT = getMaskTypeFor(ContainerVT);
12246 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12247 }
12248
12249 SDValue Result =
12250 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12251
12252 if (!VT.isFixedLengthVector())
12253 return Result;
12254 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12255}
12256
12257SDValue
12258RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12259 SelectionDAG &DAG) const {
12260 SDLoc DL(Op);
12261 MVT VT = Op.getSimpleValueType();
12262 MVT XLenVT = Subtarget.getXLenVT();
12263
12264 SDValue Op1 = Op.getOperand(0);
12265 SDValue Mask = Op.getOperand(1);
12266 SDValue EVL = Op.getOperand(2);
12267
12268 MVT ContainerVT = VT;
12269 if (VT.isFixedLengthVector()) {
12270 ContainerVT = getContainerForFixedLengthVector(VT);
12271 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12272 MVT MaskVT = getMaskTypeFor(ContainerVT);
12273 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12274 }
12275
12276 MVT GatherVT = ContainerVT;
12277 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12278 // Check if we are working with mask vectors
12279 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12280 if (IsMaskVector) {
12281 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12282
12283 // Expand input operand
12284 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12285 DAG.getUNDEF(IndicesVT),
12286 DAG.getConstant(1, DL, XLenVT), EVL);
12287 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12288 DAG.getUNDEF(IndicesVT),
12289 DAG.getConstant(0, DL, XLenVT), EVL);
12290 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12291 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12292 }
12293
12294 unsigned EltSize = GatherVT.getScalarSizeInBits();
12295 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12296 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12297 unsigned MaxVLMAX =
12298 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12299
12300 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12301 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12302 // to use vrgatherei16.vv.
12303 // TODO: It's also possible to use vrgatherei16.vv for other types to
12304 // decrease register width for the index calculation.
12305 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12306 if (MaxVLMAX > 256 && EltSize == 8) {
12307 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12308 // Split the vector in half and reverse each half using a full register
12309 // reverse.
12310 // Swap the halves and concatenate them.
12311 // Slide the concatenated result by (VLMax - VL).
12312 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12313 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12314 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12315
12316 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12317 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12318
12319 // Reassemble the low and high pieces reversed.
12320 // NOTE: this Result is unmasked (because we do not need masks for
12321 // shuffles). If in the future this has to change, we can use a SELECT_VL
12322 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12323 SDValue Result =
12324 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12325
12326 // Slide off any elements from past EVL that were reversed into the low
12327 // elements.
12328 unsigned MinElts = GatherVT.getVectorMinNumElements();
12329 SDValue VLMax =
12330 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12331 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12332
12333 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12334 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12335
12336 if (IsMaskVector) {
12337 // Truncate Result back to a mask vector
12338 Result =
12339 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12340 {Result, DAG.getConstant(0, DL, GatherVT),
12342 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12343 }
12344
12345 if (!VT.isFixedLengthVector())
12346 return Result;
12347 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12348 }
12349
12350 // Just promote the int type to i16 which will double the LMUL.
12351 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12352 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12353 }
12354
12355 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12356 SDValue VecLen =
12357 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12358 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12359 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12360 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12361 DAG.getUNDEF(IndicesVT), Mask, EVL);
12362 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12363 DAG.getUNDEF(GatherVT), Mask, EVL);
12364
12365 if (IsMaskVector) {
12366 // Truncate Result back to a mask vector
12367 Result = DAG.getNode(
12368 RISCVISD::SETCC_VL, DL, ContainerVT,
12369 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12370 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12371 }
12372
12373 if (!VT.isFixedLengthVector())
12374 return Result;
12375 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12376}
12377
12378SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12379 SelectionDAG &DAG) const {
12380 MVT VT = Op.getSimpleValueType();
12381 if (VT.getVectorElementType() != MVT::i1)
12382 return lowerVPOp(Op, DAG);
12383
12384 // It is safe to drop mask parameter as masked-off elements are undef.
12385 SDValue Op1 = Op->getOperand(0);
12386 SDValue Op2 = Op->getOperand(1);
12387 SDValue VL = Op->getOperand(3);
12388
12389 MVT ContainerVT = VT;
12390 const bool IsFixed = VT.isFixedLengthVector();
12391 if (IsFixed) {
12392 ContainerVT = getContainerForFixedLengthVector(VT);
12393 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12394 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12395 }
12396
12397 SDLoc DL(Op);
12398 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12399 if (!IsFixed)
12400 return Val;
12401 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12402}
12403
12404SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12405 SelectionDAG &DAG) const {
12406 SDLoc DL(Op);
12407 MVT XLenVT = Subtarget.getXLenVT();
12408 MVT VT = Op.getSimpleValueType();
12409 MVT ContainerVT = VT;
12410 if (VT.isFixedLengthVector())
12411 ContainerVT = getContainerForFixedLengthVector(VT);
12412
12413 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12414
12415 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12416 // Check if the mask is known to be all ones
12417 SDValue Mask = VPNode->getMask();
12418 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12419
12420 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12421 : Intrinsic::riscv_vlse_mask,
12422 DL, XLenVT);
12423 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12424 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12425 VPNode->getStride()};
12426 if (!IsUnmasked) {
12427 if (VT.isFixedLengthVector()) {
12428 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12429 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12430 }
12431 Ops.push_back(Mask);
12432 }
12433 Ops.push_back(VPNode->getVectorLength());
12434 if (!IsUnmasked) {
12435 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12436 Ops.push_back(Policy);
12437 }
12438
12439 SDValue Result =
12441 VPNode->getMemoryVT(), VPNode->getMemOperand());
12442 SDValue Chain = Result.getValue(1);
12443
12444 if (VT.isFixedLengthVector())
12445 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12446
12447 return DAG.getMergeValues({Result, Chain}, DL);
12448}
12449
12450SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12451 SelectionDAG &DAG) const {
12452 SDLoc DL(Op);
12453 MVT XLenVT = Subtarget.getXLenVT();
12454
12455 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12456 SDValue StoreVal = VPNode->getValue();
12457 MVT VT = StoreVal.getSimpleValueType();
12458 MVT ContainerVT = VT;
12459 if (VT.isFixedLengthVector()) {
12460 ContainerVT = getContainerForFixedLengthVector(VT);
12461 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12462 }
12463
12464 // Check if the mask is known to be all ones
12465 SDValue Mask = VPNode->getMask();
12466 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12467
12468 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12469 : Intrinsic::riscv_vsse_mask,
12470 DL, XLenVT);
12471 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12472 VPNode->getBasePtr(), VPNode->getStride()};
12473 if (!IsUnmasked) {
12474 if (VT.isFixedLengthVector()) {
12475 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12476 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12477 }
12478 Ops.push_back(Mask);
12479 }
12480 Ops.push_back(VPNode->getVectorLength());
12481
12482 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12483 Ops, VPNode->getMemoryVT(),
12484 VPNode->getMemOperand());
12485}
12486
12487// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12488// matched to a RVV indexed load. The RVV indexed load instructions only
12489// support the "unsigned unscaled" addressing mode; indices are implicitly
12490// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12491// signed or scaled indexing is extended to the XLEN value type and scaled
12492// accordingly.
12493SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12494 SelectionDAG &DAG) const {
12495 SDLoc DL(Op);
12496 MVT VT = Op.getSimpleValueType();
12497
12498 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12499 EVT MemVT = MemSD->getMemoryVT();
12500 MachineMemOperand *MMO = MemSD->getMemOperand();
12501 SDValue Chain = MemSD->getChain();
12502 SDValue BasePtr = MemSD->getBasePtr();
12503
12504 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12505 SDValue Index, Mask, PassThru, VL;
12506
12507 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12508 Index = VPGN->getIndex();
12509 Mask = VPGN->getMask();
12510 PassThru = DAG.getUNDEF(VT);
12511 VL = VPGN->getVectorLength();
12512 // VP doesn't support extending loads.
12514 } else {
12515 // Else it must be a MGATHER.
12516 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12517 Index = MGN->getIndex();
12518 Mask = MGN->getMask();
12519 PassThru = MGN->getPassThru();
12520 LoadExtType = MGN->getExtensionType();
12521 }
12522
12523 MVT IndexVT = Index.getSimpleValueType();
12524 MVT XLenVT = Subtarget.getXLenVT();
12525
12527 "Unexpected VTs!");
12528 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12529 // Targets have to explicitly opt-in for extending vector loads.
12530 assert(LoadExtType == ISD::NON_EXTLOAD &&
12531 "Unexpected extending MGATHER/VP_GATHER");
12532
12533 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12534 // the selection of the masked intrinsics doesn't do this for us.
12535 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12536
12537 MVT ContainerVT = VT;
12538 if (VT.isFixedLengthVector()) {
12539 ContainerVT = getContainerForFixedLengthVector(VT);
12540 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12541 ContainerVT.getVectorElementCount());
12542
12543 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12544
12545 if (!IsUnmasked) {
12546 MVT MaskVT = getMaskTypeFor(ContainerVT);
12547 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12548 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12549 }
12550 }
12551
12552 if (!VL)
12553 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12554
12555 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12556 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12557 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12558 }
12559
12560 unsigned IntID =
12561 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12562 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12563 if (IsUnmasked)
12564 Ops.push_back(DAG.getUNDEF(ContainerVT));
12565 else
12566 Ops.push_back(PassThru);
12567 Ops.push_back(BasePtr);
12568 Ops.push_back(Index);
12569 if (!IsUnmasked)
12570 Ops.push_back(Mask);
12571 Ops.push_back(VL);
12572 if (!IsUnmasked)
12574
12575 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12576 SDValue Result =
12577 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12578 Chain = Result.getValue(1);
12579
12580 if (VT.isFixedLengthVector())
12581 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12582
12583 return DAG.getMergeValues({Result, Chain}, DL);
12584}
12585
12586// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12587// matched to a RVV indexed store. The RVV indexed store instructions only
12588// support the "unsigned unscaled" addressing mode; indices are implicitly
12589// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12590// signed or scaled indexing is extended to the XLEN value type and scaled
12591// accordingly.
12592SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12593 SelectionDAG &DAG) const {
12594 SDLoc DL(Op);
12595 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12596 EVT MemVT = MemSD->getMemoryVT();
12597 MachineMemOperand *MMO = MemSD->getMemOperand();
12598 SDValue Chain = MemSD->getChain();
12599 SDValue BasePtr = MemSD->getBasePtr();
12600
12601 [[maybe_unused]] bool IsTruncatingStore = false;
12602 SDValue Index, Mask, Val, VL;
12603
12604 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12605 Index = VPSN->getIndex();
12606 Mask = VPSN->getMask();
12607 Val = VPSN->getValue();
12608 VL = VPSN->getVectorLength();
12609 // VP doesn't support truncating stores.
12610 IsTruncatingStore = false;
12611 } else {
12612 // Else it must be a MSCATTER.
12613 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12614 Index = MSN->getIndex();
12615 Mask = MSN->getMask();
12616 Val = MSN->getValue();
12617 IsTruncatingStore = MSN->isTruncatingStore();
12618 }
12619
12620 MVT VT = Val.getSimpleValueType();
12621 MVT IndexVT = Index.getSimpleValueType();
12622 MVT XLenVT = Subtarget.getXLenVT();
12623
12625 "Unexpected VTs!");
12626 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12627 // Targets have to explicitly opt-in for extending vector loads and
12628 // truncating vector stores.
12629 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12630
12631 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12632 // the selection of the masked intrinsics doesn't do this for us.
12633 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12634
12635 MVT ContainerVT = VT;
12636 if (VT.isFixedLengthVector()) {
12637 ContainerVT = getContainerForFixedLengthVector(VT);
12638 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12639 ContainerVT.getVectorElementCount());
12640
12641 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12642 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12643
12644 if (!IsUnmasked) {
12645 MVT MaskVT = getMaskTypeFor(ContainerVT);
12646 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12647 }
12648 }
12649
12650 if (!VL)
12651 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12652
12653 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12654 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12655 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12656 }
12657
12658 unsigned IntID =
12659 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12660 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12661 Ops.push_back(Val);
12662 Ops.push_back(BasePtr);
12663 Ops.push_back(Index);
12664 if (!IsUnmasked)
12665 Ops.push_back(Mask);
12666 Ops.push_back(VL);
12667
12669 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12670}
12671
12672SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12673 SelectionDAG &DAG) const {
12674 const MVT XLenVT = Subtarget.getXLenVT();
12675 SDLoc DL(Op);
12676 SDValue Chain = Op->getOperand(0);
12677 SDValue SysRegNo = DAG.getTargetConstant(
12678 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12679 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12680 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12681
12682 // Encoding used for rounding mode in RISC-V differs from that used in
12683 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12684 // table, which consists of a sequence of 4-bit fields, each representing
12685 // corresponding FLT_ROUNDS mode.
12686 static const int Table =
12692
12693 SDValue Shift =
12694 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12695 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12696 DAG.getConstant(Table, DL, XLenVT), Shift);
12697 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12698 DAG.getConstant(7, DL, XLenVT));
12699
12700 return DAG.getMergeValues({Masked, Chain}, DL);
12701}
12702
12703SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12704 SelectionDAG &DAG) const {
12705 const MVT XLenVT = Subtarget.getXLenVT();
12706 SDLoc DL(Op);
12707 SDValue Chain = Op->getOperand(0);
12708 SDValue RMValue = Op->getOperand(1);
12709 SDValue SysRegNo = DAG.getTargetConstant(
12710 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12711
12712 // Encoding used for rounding mode in RISC-V differs from that used in
12713 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12714 // a table, which consists of a sequence of 4-bit fields, each representing
12715 // corresponding RISC-V mode.
12716 static const unsigned Table =
12722
12723 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12724
12725 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12726 DAG.getConstant(2, DL, XLenVT));
12727 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12728 DAG.getConstant(Table, DL, XLenVT), Shift);
12729 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12730 DAG.getConstant(0x7, DL, XLenVT));
12731 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12732 RMValue);
12733}
12734
12735SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12736 SelectionDAG &DAG) const {
12738
12739 bool isRISCV64 = Subtarget.is64Bit();
12740 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12741
12742 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12743 return DAG.getFrameIndex(FI, PtrVT);
12744}
12745
12746// Returns the opcode of the target-specific SDNode that implements the 32-bit
12747// form of the given Opcode.
12748static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12749 switch (Opcode) {
12750 default:
12751 llvm_unreachable("Unexpected opcode");
12752 case ISD::SHL:
12753 return RISCVISD::SLLW;
12754 case ISD::SRA:
12755 return RISCVISD::SRAW;
12756 case ISD::SRL:
12757 return RISCVISD::SRLW;
12758 case ISD::SDIV:
12759 return RISCVISD::DIVW;
12760 case ISD::UDIV:
12761 return RISCVISD::DIVUW;
12762 case ISD::UREM:
12763 return RISCVISD::REMUW;
12764 case ISD::ROTL:
12765 return RISCVISD::ROLW;
12766 case ISD::ROTR:
12767 return RISCVISD::RORW;
12768 }
12769}
12770
12771// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12772// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12773// otherwise be promoted to i64, making it difficult to select the
12774// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12775// type i8/i16/i32 is lost.
12777 unsigned ExtOpc = ISD::ANY_EXTEND) {
12778 SDLoc DL(N);
12779 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12780 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12781 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12782 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12783 // ReplaceNodeResults requires we maintain the same type for the return value.
12784 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12785}
12786
12787// Converts the given 32-bit operation to a i64 operation with signed extension
12788// semantic to reduce the signed extension instructions.
12790 SDLoc DL(N);
12791 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12792 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12793 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12794 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12795 DAG.getValueType(MVT::i32));
12796 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12797}
12798
12801 SelectionDAG &DAG) const {
12802 SDLoc DL(N);
12803 switch (N->getOpcode()) {
12804 default:
12805 llvm_unreachable("Don't know how to custom type legalize this operation!");
12808 case ISD::FP_TO_SINT:
12809 case ISD::FP_TO_UINT: {
12810 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12811 "Unexpected custom legalisation");
12812 bool IsStrict = N->isStrictFPOpcode();
12813 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12814 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12815 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12816 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12818 if (!isTypeLegal(Op0.getValueType()))
12819 return;
12820 if (IsStrict) {
12821 SDValue Chain = N->getOperand(0);
12822 // In absense of Zfh, promote f16 to f32, then convert.
12823 if (Op0.getValueType() == MVT::f16 &&
12824 !Subtarget.hasStdExtZfhOrZhinx()) {
12825 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12826 {Chain, Op0});
12827 Chain = Op0.getValue(1);
12828 }
12829 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12831 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12832 SDValue Res = DAG.getNode(
12833 Opc, DL, VTs, Chain, Op0,
12834 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12835 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12836 Results.push_back(Res.getValue(1));
12837 return;
12838 }
12839 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12840 // convert.
12841 if ((Op0.getValueType() == MVT::f16 &&
12842 !Subtarget.hasStdExtZfhOrZhinx()) ||
12843 Op0.getValueType() == MVT::bf16)
12844 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12845
12846 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12847 SDValue Res =
12848 DAG.getNode(Opc, DL, MVT::i64, Op0,
12849 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12850 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12851 return;
12852 }
12853 // If the FP type needs to be softened, emit a library call using the 'si'
12854 // version. If we left it to default legalization we'd end up with 'di'. If
12855 // the FP type doesn't need to be softened just let generic type
12856 // legalization promote the result type.
12857 RTLIB::Libcall LC;
12858 if (IsSigned)
12859 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12860 else
12861 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12862 MakeLibCallOptions CallOptions;
12863 EVT OpVT = Op0.getValueType();
12864 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12865 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12866 SDValue Result;
12867 std::tie(Result, Chain) =
12868 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12869 Results.push_back(Result);
12870 if (IsStrict)
12871 Results.push_back(Chain);
12872 break;
12873 }
12874 case ISD::LROUND: {
12875 SDValue Op0 = N->getOperand(0);
12876 EVT Op0VT = Op0.getValueType();
12877 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12879 if (!isTypeLegal(Op0VT))
12880 return;
12881
12882 // In absense of Zfh, promote f16 to f32, then convert.
12883 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12884 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12885
12886 SDValue Res =
12887 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12888 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12889 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12890 return;
12891 }
12892 // If the FP type needs to be softened, emit a library call to lround. We'll
12893 // need to truncate the result. We assume any value that doesn't fit in i32
12894 // is allowed to return an unspecified value.
12895 RTLIB::Libcall LC =
12896 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12897 MakeLibCallOptions CallOptions;
12898 EVT OpVT = Op0.getValueType();
12899 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12900 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12901 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12902 Results.push_back(Result);
12903 break;
12904 }
12907 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12908 "has custom type legalization on riscv32");
12909
12910 SDValue LoCounter, HiCounter;
12911 MVT XLenVT = Subtarget.getXLenVT();
12912 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12913 LoCounter = DAG.getTargetConstant(
12914 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12915 HiCounter = DAG.getTargetConstant(
12916 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12917 } else {
12918 LoCounter = DAG.getTargetConstant(
12919 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12920 HiCounter = DAG.getTargetConstant(
12921 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12922 }
12923 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12925 N->getOperand(0), LoCounter, HiCounter);
12926
12927 Results.push_back(
12928 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12929 Results.push_back(RCW.getValue(2));
12930 break;
12931 }
12932 case ISD::LOAD: {
12933 if (!ISD::isNON_EXTLoad(N))
12934 return;
12935
12936 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12937 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12938 LoadSDNode *Ld = cast<LoadSDNode>(N);
12939
12940 SDLoc dl(N);
12941 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12942 Ld->getBasePtr(), Ld->getMemoryVT(),
12943 Ld->getMemOperand());
12944 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12945 Results.push_back(Res.getValue(1));
12946 return;
12947 }
12948 case ISD::MUL: {
12949 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12950 unsigned XLen = Subtarget.getXLen();
12951 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12952 if (Size > XLen) {
12953 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12954 SDValue LHS = N->getOperand(0);
12955 SDValue RHS = N->getOperand(1);
12956 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12957
12958 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12959 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12960 // We need exactly one side to be unsigned.
12961 if (LHSIsU == RHSIsU)
12962 return;
12963
12964 auto MakeMULPair = [&](SDValue S, SDValue U) {
12965 MVT XLenVT = Subtarget.getXLenVT();
12966 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12967 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12968 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12969 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12970 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12971 };
12972
12973 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12974 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12975
12976 // The other operand should be signed, but still prefer MULH when
12977 // possible.
12978 if (RHSIsU && LHSIsS && !RHSIsS)
12979 Results.push_back(MakeMULPair(LHS, RHS));
12980 else if (LHSIsU && RHSIsS && !LHSIsS)
12981 Results.push_back(MakeMULPair(RHS, LHS));
12982
12983 return;
12984 }
12985 [[fallthrough]];
12986 }
12987 case ISD::ADD:
12988 case ISD::SUB:
12989 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12990 "Unexpected custom legalisation");
12991 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12992 break;
12993 case ISD::SHL:
12994 case ISD::SRA:
12995 case ISD::SRL:
12996 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12997 "Unexpected custom legalisation");
12998 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12999 // If we can use a BSET instruction, allow default promotion to apply.
13000 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13001 isOneConstant(N->getOperand(0)))
13002 break;
13003 Results.push_back(customLegalizeToWOp(N, DAG));
13004 break;
13005 }
13006
13007 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13008 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13009 // shift amount.
13010 if (N->getOpcode() == ISD::SHL) {
13011 SDLoc DL(N);
13012 SDValue NewOp0 =
13013 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13014 SDValue NewOp1 =
13015 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13016 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13017 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13018 DAG.getValueType(MVT::i32));
13019 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13020 }
13021
13022 break;
13023 case ISD::ROTL:
13024 case ISD::ROTR:
13025 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13026 "Unexpected custom legalisation");
13027 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13028 Subtarget.hasVendorXTHeadBb()) &&
13029 "Unexpected custom legalization");
13030 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13031 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13032 return;
13033 Results.push_back(customLegalizeToWOp(N, DAG));
13034 break;
13035 case ISD::CTTZ:
13037 case ISD::CTLZ:
13038 case ISD::CTLZ_ZERO_UNDEF: {
13039 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13040 "Unexpected custom legalisation");
13041
13042 SDValue NewOp0 =
13043 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13044 bool IsCTZ =
13045 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13046 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13047 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13048 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13049 return;
13050 }
13051 case ISD::SDIV:
13052 case ISD::UDIV:
13053 case ISD::UREM: {
13054 MVT VT = N->getSimpleValueType(0);
13055 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13056 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13057 "Unexpected custom legalisation");
13058 // Don't promote division/remainder by constant since we should expand those
13059 // to multiply by magic constant.
13061 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13062 !isIntDivCheap(N->getValueType(0), Attr))
13063 return;
13064
13065 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13066 // the upper 32 bits. For other types we need to sign or zero extend
13067 // based on the opcode.
13068 unsigned ExtOpc = ISD::ANY_EXTEND;
13069 if (VT != MVT::i32)
13070 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13072
13073 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13074 break;
13075 }
13076 case ISD::SADDO: {
13077 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13078 "Unexpected custom legalisation");
13079
13080 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13081 // use the default legalization.
13082 if (!isa<ConstantSDNode>(N->getOperand(1)))
13083 return;
13084
13085 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13086 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13087 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13088 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13089 DAG.getValueType(MVT::i32));
13090
13091 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13092
13093 // For an addition, the result should be less than one of the operands (LHS)
13094 // if and only if the other operand (RHS) is negative, otherwise there will
13095 // be overflow.
13096 // For a subtraction, the result should be less than one of the operands
13097 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13098 // otherwise there will be overflow.
13099 EVT OType = N->getValueType(1);
13100 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13101 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13102
13103 SDValue Overflow =
13104 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13105 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13106 Results.push_back(Overflow);
13107 return;
13108 }
13109 case ISD::UADDO:
13110 case ISD::USUBO: {
13111 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13112 "Unexpected custom legalisation");
13113 bool IsAdd = N->getOpcode() == ISD::UADDO;
13114 // Create an ADDW or SUBW.
13115 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13116 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13117 SDValue Res =
13118 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13119 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13120 DAG.getValueType(MVT::i32));
13121
13122 SDValue Overflow;
13123 if (IsAdd && isOneConstant(RHS)) {
13124 // Special case uaddo X, 1 overflowed if the addition result is 0.
13125 // The general case (X + C) < C is not necessarily beneficial. Although we
13126 // reduce the live range of X, we may introduce the materialization of
13127 // constant C, especially when the setcc result is used by branch. We have
13128 // no compare with constant and branch instructions.
13129 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13130 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13131 } else if (IsAdd && isAllOnesConstant(RHS)) {
13132 // Special case uaddo X, -1 overflowed if X != 0.
13133 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13134 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13135 } else {
13136 // Sign extend the LHS and perform an unsigned compare with the ADDW
13137 // result. Since the inputs are sign extended from i32, this is equivalent
13138 // to comparing the lower 32 bits.
13139 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13140 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13141 IsAdd ? ISD::SETULT : ISD::SETUGT);
13142 }
13143
13144 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13145 Results.push_back(Overflow);
13146 return;
13147 }
13148 case ISD::UADDSAT:
13149 case ISD::USUBSAT: {
13150 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13151 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13152 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13153 // promotion for UADDO/USUBO.
13154 Results.push_back(expandAddSubSat(N, DAG));
13155 return;
13156 }
13157 case ISD::SADDSAT:
13158 case ISD::SSUBSAT: {
13159 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13160 "Unexpected custom legalisation");
13161 Results.push_back(expandAddSubSat(N, DAG));
13162 return;
13163 }
13164 case ISD::ABS: {
13165 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13166 "Unexpected custom legalisation");
13167
13168 if (Subtarget.hasStdExtZbb()) {
13169 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13170 // This allows us to remember that the result is sign extended. Expanding
13171 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13172 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13173 N->getOperand(0));
13174 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13175 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13176 return;
13177 }
13178
13179 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13180 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13181
13182 // Freeze the source so we can increase it's use count.
13183 Src = DAG.getFreeze(Src);
13184
13185 // Copy sign bit to all bits using the sraiw pattern.
13186 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13187 DAG.getValueType(MVT::i32));
13188 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13189 DAG.getConstant(31, DL, MVT::i64));
13190
13191 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13192 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13193
13194 // NOTE: The result is only required to be anyextended, but sext is
13195 // consistent with type legalization of sub.
13196 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13197 DAG.getValueType(MVT::i32));
13198 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13199 return;
13200 }
13201 case ISD::BITCAST: {
13202 EVT VT = N->getValueType(0);
13203 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13204 SDValue Op0 = N->getOperand(0);
13205 EVT Op0VT = Op0.getValueType();
13206 MVT XLenVT = Subtarget.getXLenVT();
13207 if (VT == MVT::i16 &&
13208 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13209 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13210 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13211 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13212 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13213 Subtarget.hasStdExtFOrZfinx()) {
13214 SDValue FPConv =
13215 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13216 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13217 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13218 Subtarget.hasStdExtDOrZdinx()) {
13219 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13220 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13221 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13222 NewReg.getValue(0), NewReg.getValue(1));
13223 Results.push_back(RetReg);
13224 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13225 isTypeLegal(Op0VT)) {
13226 // Custom-legalize bitcasts from fixed-length vector types to illegal
13227 // scalar types in order to improve codegen. Bitcast the vector to a
13228 // one-element vector type whose element type is the same as the result
13229 // type, and extract the first element.
13230 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13231 if (isTypeLegal(BVT)) {
13232 SDValue BVec = DAG.getBitcast(BVT, Op0);
13233 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13234 DAG.getVectorIdxConstant(0, DL)));
13235 }
13236 }
13237 break;
13238 }
13239 case RISCVISD::BREV8:
13240 case RISCVISD::ORC_B: {
13241 MVT VT = N->getSimpleValueType(0);
13242 MVT XLenVT = Subtarget.getXLenVT();
13243 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13244 "Unexpected custom legalisation");
13245 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13246 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13247 "Unexpected extension");
13248 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13249 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13250 // ReplaceNodeResults requires we maintain the same type for the return
13251 // value.
13252 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13253 break;
13254 }
13256 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13257 // type is illegal (currently only vXi64 RV32).
13258 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13259 // transferred to the destination register. We issue two of these from the
13260 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13261 // first element.
13262 SDValue Vec = N->getOperand(0);
13263 SDValue Idx = N->getOperand(1);
13264
13265 // The vector type hasn't been legalized yet so we can't issue target
13266 // specific nodes if it needs legalization.
13267 // FIXME: We would manually legalize if it's important.
13268 if (!isTypeLegal(Vec.getValueType()))
13269 return;
13270
13271 MVT VecVT = Vec.getSimpleValueType();
13272
13273 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13274 VecVT.getVectorElementType() == MVT::i64 &&
13275 "Unexpected EXTRACT_VECTOR_ELT legalization");
13276
13277 // If this is a fixed vector, we need to convert it to a scalable vector.
13278 MVT ContainerVT = VecVT;
13279 if (VecVT.isFixedLengthVector()) {
13280 ContainerVT = getContainerForFixedLengthVector(VecVT);
13281 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13282 }
13283
13284 MVT XLenVT = Subtarget.getXLenVT();
13285
13286 // Use a VL of 1 to avoid processing more elements than we need.
13287 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13288
13289 // Unless the index is known to be 0, we must slide the vector down to get
13290 // the desired element into index 0.
13291 if (!isNullConstant(Idx)) {
13292 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13293 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13294 }
13295
13296 // Extract the lower XLEN bits of the correct vector element.
13297 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13298
13299 // To extract the upper XLEN bits of the vector element, shift the first
13300 // element right by 32 bits and re-extract the lower XLEN bits.
13301 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13302 DAG.getUNDEF(ContainerVT),
13303 DAG.getConstant(32, DL, XLenVT), VL);
13304 SDValue LShr32 =
13305 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13306 DAG.getUNDEF(ContainerVT), Mask, VL);
13307
13308 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13309
13310 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13311 break;
13312 }
13314 unsigned IntNo = N->getConstantOperandVal(0);
13315 switch (IntNo) {
13316 default:
13318 "Don't know how to custom type legalize this intrinsic!");
13319 case Intrinsic::experimental_get_vector_length: {
13320 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13321 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13322 return;
13323 }
13324 case Intrinsic::experimental_cttz_elts: {
13325 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13326 Results.push_back(
13327 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13328 return;
13329 }
13330 case Intrinsic::riscv_orc_b:
13331 case Intrinsic::riscv_brev8:
13332 case Intrinsic::riscv_sha256sig0:
13333 case Intrinsic::riscv_sha256sig1:
13334 case Intrinsic::riscv_sha256sum0:
13335 case Intrinsic::riscv_sha256sum1:
13336 case Intrinsic::riscv_sm3p0:
13337 case Intrinsic::riscv_sm3p1: {
13338 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13339 return;
13340 unsigned Opc;
13341 switch (IntNo) {
13342 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13343 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13344 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13345 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13346 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13347 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13348 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13349 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13350 }
13351
13352 SDValue NewOp =
13353 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13354 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13355 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13356 return;
13357 }
13358 case Intrinsic::riscv_sm4ks:
13359 case Intrinsic::riscv_sm4ed: {
13360 unsigned Opc =
13361 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13362 SDValue NewOp0 =
13363 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13364 SDValue NewOp1 =
13365 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13366 SDValue Res =
13367 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13368 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13369 return;
13370 }
13371 case Intrinsic::riscv_mopr: {
13372 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13373 return;
13374 SDValue NewOp =
13375 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13376 SDValue Res = DAG.getNode(
13377 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13378 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13379 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13380 return;
13381 }
13382 case Intrinsic::riscv_moprr: {
13383 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13384 return;
13385 SDValue NewOp0 =
13386 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13387 SDValue NewOp1 =
13388 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13389 SDValue Res = DAG.getNode(
13390 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13391 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13392 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13393 return;
13394 }
13395 case Intrinsic::riscv_clmul: {
13396 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13397 return;
13398
13399 SDValue NewOp0 =
13400 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13401 SDValue NewOp1 =
13402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13403 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13404 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13405 return;
13406 }
13407 case Intrinsic::riscv_clmulh:
13408 case Intrinsic::riscv_clmulr: {
13409 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13410 return;
13411
13412 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13413 // to the full 128-bit clmul result of multiplying two xlen values.
13414 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13415 // upper 32 bits.
13416 //
13417 // The alternative is to mask the inputs to 32 bits and use clmul, but
13418 // that requires two shifts to mask each input without zext.w.
13419 // FIXME: If the inputs are known zero extended or could be freely
13420 // zero extended, the mask form would be better.
13421 SDValue NewOp0 =
13422 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13423 SDValue NewOp1 =
13424 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13425 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13426 DAG.getConstant(32, DL, MVT::i64));
13427 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13428 DAG.getConstant(32, DL, MVT::i64));
13429 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13431 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13432 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13433 DAG.getConstant(32, DL, MVT::i64));
13434 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13435 return;
13436 }
13437 case Intrinsic::riscv_vmv_x_s: {
13438 EVT VT = N->getValueType(0);
13439 MVT XLenVT = Subtarget.getXLenVT();
13440 if (VT.bitsLT(XLenVT)) {
13441 // Simple case just extract using vmv.x.s and truncate.
13442 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13443 Subtarget.getXLenVT(), N->getOperand(1));
13444 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13445 return;
13446 }
13447
13448 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13449 "Unexpected custom legalization");
13450
13451 // We need to do the move in two steps.
13452 SDValue Vec = N->getOperand(1);
13453 MVT VecVT = Vec.getSimpleValueType();
13454
13455 // First extract the lower XLEN bits of the element.
13456 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13457
13458 // To extract the upper XLEN bits of the vector element, shift the first
13459 // element right by 32 bits and re-extract the lower XLEN bits.
13460 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13461
13462 SDValue ThirtyTwoV =
13463 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13464 DAG.getConstant(32, DL, XLenVT), VL);
13465 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13466 DAG.getUNDEF(VecVT), Mask, VL);
13467 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13468
13469 Results.push_back(
13470 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13471 break;
13472 }
13473 }
13474 break;
13475 }
13476 case ISD::VECREDUCE_ADD:
13477 case ISD::VECREDUCE_AND:
13478 case ISD::VECREDUCE_OR:
13479 case ISD::VECREDUCE_XOR:
13484 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13485 Results.push_back(V);
13486 break;
13487 case ISD::VP_REDUCE_ADD:
13488 case ISD::VP_REDUCE_AND:
13489 case ISD::VP_REDUCE_OR:
13490 case ISD::VP_REDUCE_XOR:
13491 case ISD::VP_REDUCE_SMAX:
13492 case ISD::VP_REDUCE_UMAX:
13493 case ISD::VP_REDUCE_SMIN:
13494 case ISD::VP_REDUCE_UMIN:
13495 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13496 Results.push_back(V);
13497 break;
13498 case ISD::GET_ROUNDING: {
13499 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13500 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13501 Results.push_back(Res.getValue(0));
13502 Results.push_back(Res.getValue(1));
13503 break;
13504 }
13505 }
13506}
13507
13508/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13509/// which corresponds to it.
13510static unsigned getVecReduceOpcode(unsigned Opc) {
13511 switch (Opc) {
13512 default:
13513 llvm_unreachable("Unhandled binary to transfrom reduction");
13514 case ISD::ADD:
13515 return ISD::VECREDUCE_ADD;
13516 case ISD::UMAX:
13517 return ISD::VECREDUCE_UMAX;
13518 case ISD::SMAX:
13519 return ISD::VECREDUCE_SMAX;
13520 case ISD::UMIN:
13521 return ISD::VECREDUCE_UMIN;
13522 case ISD::SMIN:
13523 return ISD::VECREDUCE_SMIN;
13524 case ISD::AND:
13525 return ISD::VECREDUCE_AND;
13526 case ISD::OR:
13527 return ISD::VECREDUCE_OR;
13528 case ISD::XOR:
13529 return ISD::VECREDUCE_XOR;
13530 case ISD::FADD:
13531 // Note: This is the associative form of the generic reduction opcode.
13532 return ISD::VECREDUCE_FADD;
13533 }
13534}
13535
13536/// Perform two related transforms whose purpose is to incrementally recognize
13537/// an explode_vector followed by scalar reduction as a vector reduction node.
13538/// This exists to recover from a deficiency in SLP which can't handle
13539/// forests with multiple roots sharing common nodes. In some cases, one
13540/// of the trees will be vectorized, and the other will remain (unprofitably)
13541/// scalarized.
13542static SDValue
13544 const RISCVSubtarget &Subtarget) {
13545
13546 // This transforms need to run before all integer types have been legalized
13547 // to i64 (so that the vector element type matches the add type), and while
13548 // it's safe to introduce odd sized vector types.
13550 return SDValue();
13551
13552 // Without V, this transform isn't useful. We could form the (illegal)
13553 // operations and let them be scalarized again, but there's really no point.
13554 if (!Subtarget.hasVInstructions())
13555 return SDValue();
13556
13557 const SDLoc DL(N);
13558 const EVT VT = N->getValueType(0);
13559 const unsigned Opc = N->getOpcode();
13560
13561 // For FADD, we only handle the case with reassociation allowed. We
13562 // could handle strict reduction order, but at the moment, there's no
13563 // known reason to, and the complexity isn't worth it.
13564 // TODO: Handle fminnum and fmaxnum here
13565 if (!VT.isInteger() &&
13566 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13567 return SDValue();
13568
13569 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13570 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13571 "Inconsistent mappings");
13572 SDValue LHS = N->getOperand(0);
13573 SDValue RHS = N->getOperand(1);
13574
13575 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13576 return SDValue();
13577
13578 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13579 std::swap(LHS, RHS);
13580
13581 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13582 !isa<ConstantSDNode>(RHS.getOperand(1)))
13583 return SDValue();
13584
13585 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13586 SDValue SrcVec = RHS.getOperand(0);
13587 EVT SrcVecVT = SrcVec.getValueType();
13588 assert(SrcVecVT.getVectorElementType() == VT);
13589 if (SrcVecVT.isScalableVector())
13590 return SDValue();
13591
13592 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13593 return SDValue();
13594
13595 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13596 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13597 // root of our reduction tree. TODO: We could extend this to any two
13598 // adjacent aligned constant indices if desired.
13599 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13600 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13601 uint64_t LHSIdx =
13602 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13603 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13604 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13605 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13606 DAG.getVectorIdxConstant(0, DL));
13607 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13608 }
13609 }
13610
13611 // Match (binop (reduce (extract_subvector V, 0),
13612 // (extract_vector_elt V, sizeof(SubVec))))
13613 // into a reduction of one more element from the original vector V.
13614 if (LHS.getOpcode() != ReduceOpc)
13615 return SDValue();
13616
13617 SDValue ReduceVec = LHS.getOperand(0);
13618 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13619 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13620 isNullConstant(ReduceVec.getOperand(1)) &&
13621 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13622 // For illegal types (e.g. 3xi32), most will be combined again into a
13623 // wider (hopefully legal) type. If this is a terminal state, we are
13624 // relying on type legalization here to produce something reasonable
13625 // and this lowering quality could probably be improved. (TODO)
13626 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13627 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13628 DAG.getVectorIdxConstant(0, DL));
13629 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13630 ReduceVec->getFlags() & N->getFlags());
13631 }
13632
13633 return SDValue();
13634}
13635
13636
13637// Try to fold (<bop> x, (reduction.<bop> vec, start))
13639 const RISCVSubtarget &Subtarget) {
13640 auto BinOpToRVVReduce = [](unsigned Opc) {
13641 switch (Opc) {
13642 default:
13643 llvm_unreachable("Unhandled binary to transfrom reduction");
13644 case ISD::ADD:
13646 case ISD::UMAX:
13648 case ISD::SMAX:
13650 case ISD::UMIN:
13652 case ISD::SMIN:
13654 case ISD::AND:
13656 case ISD::OR:
13658 case ISD::XOR:
13660 case ISD::FADD:
13662 case ISD::FMAXNUM:
13664 case ISD::FMINNUM:
13666 }
13667 };
13668
13669 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13670 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13671 isNullConstant(V.getOperand(1)) &&
13672 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13673 };
13674
13675 unsigned Opc = N->getOpcode();
13676 unsigned ReduceIdx;
13677 if (IsReduction(N->getOperand(0), Opc))
13678 ReduceIdx = 0;
13679 else if (IsReduction(N->getOperand(1), Opc))
13680 ReduceIdx = 1;
13681 else
13682 return SDValue();
13683
13684 // Skip if FADD disallows reassociation but the combiner needs.
13685 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13686 return SDValue();
13687
13688 SDValue Extract = N->getOperand(ReduceIdx);
13689 SDValue Reduce = Extract.getOperand(0);
13690 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13691 return SDValue();
13692
13693 SDValue ScalarV = Reduce.getOperand(2);
13694 EVT ScalarVT = ScalarV.getValueType();
13695 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13696 ScalarV.getOperand(0)->isUndef() &&
13697 isNullConstant(ScalarV.getOperand(2)))
13698 ScalarV = ScalarV.getOperand(1);
13699
13700 // Make sure that ScalarV is a splat with VL=1.
13701 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13702 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13703 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13704 return SDValue();
13705
13706 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13707 return SDValue();
13708
13709 // Check the scalar of ScalarV is neutral element
13710 // TODO: Deal with value other than neutral element.
13711 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13712 0))
13713 return SDValue();
13714
13715 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13716 // FIXME: We might be able to improve this if operand 0 is undef.
13717 if (!isNonZeroAVL(Reduce.getOperand(5)))
13718 return SDValue();
13719
13720 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13721
13722 SDLoc DL(N);
13723 SDValue NewScalarV =
13724 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13725 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13726
13727 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13728 if (ScalarVT != ScalarV.getValueType())
13729 NewScalarV =
13730 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13731 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13732
13733 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13734 NewScalarV, Reduce.getOperand(3),
13735 Reduce.getOperand(4), Reduce.getOperand(5)};
13736 SDValue NewReduce =
13737 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13738 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13739 Extract.getOperand(1));
13740}
13741
13742// Optimize (add (shl x, c0), (shl y, c1)) ->
13743// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13745 const RISCVSubtarget &Subtarget) {
13746 // Perform this optimization only in the zba extension.
13747 if (!Subtarget.hasStdExtZba())
13748 return SDValue();
13749
13750 // Skip for vector types and larger types.
13751 EVT VT = N->getValueType(0);
13752 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13753 return SDValue();
13754
13755 // The two operand nodes must be SHL and have no other use.
13756 SDValue N0 = N->getOperand(0);
13757 SDValue N1 = N->getOperand(1);
13758 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13759 !N0->hasOneUse() || !N1->hasOneUse())
13760 return SDValue();
13761
13762 // Check c0 and c1.
13763 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13764 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13765 if (!N0C || !N1C)
13766 return SDValue();
13767 int64_t C0 = N0C->getSExtValue();
13768 int64_t C1 = N1C->getSExtValue();
13769 if (C0 <= 0 || C1 <= 0)
13770 return SDValue();
13771
13772 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13773 int64_t Bits = std::min(C0, C1);
13774 int64_t Diff = std::abs(C0 - C1);
13775 if (Diff != 1 && Diff != 2 && Diff != 3)
13776 return SDValue();
13777
13778 // Build nodes.
13779 SDLoc DL(N);
13780 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13781 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13782 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13783 DAG.getConstant(Diff, DL, VT), NS);
13784 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13785}
13786
13787// Combine a constant select operand into its use:
13788//
13789// (and (select cond, -1, c), x)
13790// -> (select cond, x, (and x, c)) [AllOnes=1]
13791// (or (select cond, 0, c), x)
13792// -> (select cond, x, (or x, c)) [AllOnes=0]
13793// (xor (select cond, 0, c), x)
13794// -> (select cond, x, (xor x, c)) [AllOnes=0]
13795// (add (select cond, 0, c), x)
13796// -> (select cond, x, (add x, c)) [AllOnes=0]
13797// (sub x, (select cond, 0, c))
13798// -> (select cond, x, (sub x, c)) [AllOnes=0]
13800 SelectionDAG &DAG, bool AllOnes,
13801 const RISCVSubtarget &Subtarget) {
13802 EVT VT = N->getValueType(0);
13803
13804 // Skip vectors.
13805 if (VT.isVector())
13806 return SDValue();
13807
13808 if (!Subtarget.hasConditionalMoveFusion()) {
13809 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13810 if ((!Subtarget.hasStdExtZicond() &&
13811 !Subtarget.hasVendorXVentanaCondOps()) ||
13812 N->getOpcode() != ISD::AND)
13813 return SDValue();
13814
13815 // Maybe harmful when condition code has multiple use.
13816 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13817 return SDValue();
13818
13819 // Maybe harmful when VT is wider than XLen.
13820 if (VT.getSizeInBits() > Subtarget.getXLen())
13821 return SDValue();
13822 }
13823
13824 if ((Slct.getOpcode() != ISD::SELECT &&
13825 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13826 !Slct.hasOneUse())
13827 return SDValue();
13828
13829 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13831 };
13832
13833 bool SwapSelectOps;
13834 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13835 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13836 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13837 SDValue NonConstantVal;
13838 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13839 SwapSelectOps = false;
13840 NonConstantVal = FalseVal;
13841 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13842 SwapSelectOps = true;
13843 NonConstantVal = TrueVal;
13844 } else
13845 return SDValue();
13846
13847 // Slct is now know to be the desired identity constant when CC is true.
13848 TrueVal = OtherOp;
13849 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13850 // Unless SwapSelectOps says the condition should be false.
13851 if (SwapSelectOps)
13852 std::swap(TrueVal, FalseVal);
13853
13854 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13855 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13856 {Slct.getOperand(0), Slct.getOperand(1),
13857 Slct.getOperand(2), TrueVal, FalseVal});
13858
13859 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13860 {Slct.getOperand(0), TrueVal, FalseVal});
13861}
13862
13863// Attempt combineSelectAndUse on each operand of a commutative operator N.
13865 bool AllOnes,
13866 const RISCVSubtarget &Subtarget) {
13867 SDValue N0 = N->getOperand(0);
13868 SDValue N1 = N->getOperand(1);
13869 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13870 return Result;
13871 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13872 return Result;
13873 return SDValue();
13874}
13875
13876// Transform (add (mul x, c0), c1) ->
13877// (add (mul (add x, c1/c0), c0), c1%c0).
13878// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13879// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13880// to an infinite loop in DAGCombine if transformed.
13881// Or transform (add (mul x, c0), c1) ->
13882// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13883// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13884// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13885// lead to an infinite loop in DAGCombine if transformed.
13886// Or transform (add (mul x, c0), c1) ->
13887// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13888// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13889// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13890// lead to an infinite loop in DAGCombine if transformed.
13891// Or transform (add (mul x, c0), c1) ->
13892// (mul (add x, c1/c0), c0).
13893// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13895 const RISCVSubtarget &Subtarget) {
13896 // Skip for vector types and larger types.
13897 EVT VT = N->getValueType(0);
13898 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13899 return SDValue();
13900 // The first operand node must be a MUL and has no other use.
13901 SDValue N0 = N->getOperand(0);
13902 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13903 return SDValue();
13904 // Check if c0 and c1 match above conditions.
13905 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13906 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13907 if (!N0C || !N1C)
13908 return SDValue();
13909 // If N0C has multiple uses it's possible one of the cases in
13910 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13911 // in an infinite loop.
13912 if (!N0C->hasOneUse())
13913 return SDValue();
13914 int64_t C0 = N0C->getSExtValue();
13915 int64_t C1 = N1C->getSExtValue();
13916 int64_t CA, CB;
13917 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13918 return SDValue();
13919 // Search for proper CA (non-zero) and CB that both are simm12.
13920 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13921 !isInt<12>(C0 * (C1 / C0))) {
13922 CA = C1 / C0;
13923 CB = C1 % C0;
13924 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13925 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13926 CA = C1 / C0 + 1;
13927 CB = C1 % C0 - C0;
13928 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13929 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13930 CA = C1 / C0 - 1;
13931 CB = C1 % C0 + C0;
13932 } else
13933 return SDValue();
13934 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13935 SDLoc DL(N);
13936 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13937 DAG.getSignedConstant(CA, DL, VT));
13938 SDValue New1 =
13939 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
13940 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
13941}
13942
13943// add (zext, zext) -> zext (add (zext, zext))
13944// sub (zext, zext) -> sext (sub (zext, zext))
13945// mul (zext, zext) -> zext (mul (zext, zext))
13946// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13947// udiv (zext, zext) -> zext (udiv (zext, zext))
13948// srem (zext, zext) -> zext (srem (zext, zext))
13949// urem (zext, zext) -> zext (urem (zext, zext))
13950//
13951// where the sum of the extend widths match, and the the range of the bin op
13952// fits inside the width of the narrower bin op. (For profitability on rvv, we
13953// use a power of two for both inner and outer extend.)
13955
13956 EVT VT = N->getValueType(0);
13957 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13958 return SDValue();
13959
13960 SDValue N0 = N->getOperand(0);
13961 SDValue N1 = N->getOperand(1);
13963 return SDValue();
13964 if (!N0.hasOneUse() || !N1.hasOneUse())
13965 return SDValue();
13966
13967 SDValue Src0 = N0.getOperand(0);
13968 SDValue Src1 = N1.getOperand(0);
13969 EVT SrcVT = Src0.getValueType();
13970 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13971 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13972 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13973 return SDValue();
13974
13975 LLVMContext &C = *DAG.getContext();
13977 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13978
13979 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13980 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13981
13982 // Src0 and Src1 are zero extended, so they're always positive if signed.
13983 //
13984 // sub can produce a negative from two positive operands, so it needs sign
13985 // extended. Other nodes produce a positive from two positive operands, so
13986 // zero extend instead.
13987 unsigned OuterExtend =
13988 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13989
13990 return DAG.getNode(
13991 OuterExtend, SDLoc(N), VT,
13992 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13993}
13994
13995// Try to turn (add (xor bool, 1) -1) into (neg bool).
13997 SDValue N0 = N->getOperand(0);
13998 SDValue N1 = N->getOperand(1);
13999 EVT VT = N->getValueType(0);
14000 SDLoc DL(N);
14001
14002 // RHS should be -1.
14003 if (!isAllOnesConstant(N1))
14004 return SDValue();
14005
14006 // Look for (xor X, 1).
14007 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14008 return SDValue();
14009
14010 // First xor input should be 0 or 1.
14012 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14013 return SDValue();
14014
14015 // Emit a negate of the setcc.
14016 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14017 N0.getOperand(0));
14018}
14019
14022 const RISCVSubtarget &Subtarget) {
14023 SelectionDAG &DAG = DCI.DAG;
14024 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14025 return V;
14026 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14027 return V;
14028 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14029 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14030 return V;
14031 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14032 return V;
14033 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14034 return V;
14035 if (SDValue V = combineBinOpOfZExt(N, DAG))
14036 return V;
14037
14038 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14039 // (select lhs, rhs, cc, x, (add x, y))
14040 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14041}
14042
14043// Try to turn a sub boolean RHS and constant LHS into an addi.
14045 SDValue N0 = N->getOperand(0);
14046 SDValue N1 = N->getOperand(1);
14047 EVT VT = N->getValueType(0);
14048 SDLoc DL(N);
14049
14050 // Require a constant LHS.
14051 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14052 if (!N0C)
14053 return SDValue();
14054
14055 // All our optimizations involve subtracting 1 from the immediate and forming
14056 // an ADDI. Make sure the new immediate is valid for an ADDI.
14057 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14058 if (!ImmValMinus1.isSignedIntN(12))
14059 return SDValue();
14060
14061 SDValue NewLHS;
14062 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14063 // (sub constant, (setcc x, y, eq/neq)) ->
14064 // (add (setcc x, y, neq/eq), constant - 1)
14065 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14066 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14067 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14068 return SDValue();
14069 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14070 NewLHS =
14071 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14072 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14073 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14074 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14075 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14076 NewLHS = N1.getOperand(0);
14077 } else
14078 return SDValue();
14079
14080 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14081 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14082}
14083
14084// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14085// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14086// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14087// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14089 const RISCVSubtarget &Subtarget) {
14090 if (!Subtarget.hasStdExtZbb())
14091 return SDValue();
14092
14093 EVT VT = N->getValueType(0);
14094
14095 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14096 return SDValue();
14097
14098 SDValue N0 = N->getOperand(0);
14099 SDValue N1 = N->getOperand(1);
14100
14101 if (N0->getOpcode() != ISD::SHL)
14102 return SDValue();
14103
14104 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14105 if (!ShAmtCLeft)
14106 return SDValue();
14107 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14108
14109 if (ShiftedAmount >= 8)
14110 return SDValue();
14111
14112 SDValue LeftShiftOperand = N0->getOperand(0);
14113 SDValue RightShiftOperand = N1;
14114
14115 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14116 if (N1->getOpcode() != ISD::SRL)
14117 return SDValue();
14118 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14119 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14120 return SDValue();
14121 RightShiftOperand = N1.getOperand(0);
14122 }
14123
14124 // At least one shift should have a single use.
14125 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14126 return SDValue();
14127
14128 if (LeftShiftOperand != RightShiftOperand)
14129 return SDValue();
14130
14131 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14132 Mask <<= ShiftedAmount;
14133 // Check that X has indeed the right shape (only the Y-th bit can be set in
14134 // every byte).
14135 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14136 return SDValue();
14137
14138 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14139}
14140
14142 const RISCVSubtarget &Subtarget) {
14143 if (SDValue V = combineSubOfBoolean(N, DAG))
14144 return V;
14145
14146 EVT VT = N->getValueType(0);
14147 SDValue N0 = N->getOperand(0);
14148 SDValue N1 = N->getOperand(1);
14149 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14150 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14151 isNullConstant(N1.getOperand(1))) {
14152 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14153 if (CCVal == ISD::SETLT) {
14154 SDLoc DL(N);
14155 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14156 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14157 DAG.getConstant(ShAmt, DL, VT));
14158 }
14159 }
14160
14161 if (SDValue V = combineBinOpOfZExt(N, DAG))
14162 return V;
14163 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14164 return V;
14165
14166 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14167 // (select lhs, rhs, cc, x, (sub x, y))
14168 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14169}
14170
14171// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14172// Legalizing setcc can introduce xors like this. Doing this transform reduces
14173// the number of xors and may allow the xor to fold into a branch condition.
14175 SDValue N0 = N->getOperand(0);
14176 SDValue N1 = N->getOperand(1);
14177 bool IsAnd = N->getOpcode() == ISD::AND;
14178
14179 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14180 return SDValue();
14181
14182 if (!N0.hasOneUse() || !N1.hasOneUse())
14183 return SDValue();
14184
14185 SDValue N01 = N0.getOperand(1);
14186 SDValue N11 = N1.getOperand(1);
14187
14188 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14189 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14190 // operation is And, allow one of the Xors to use -1.
14191 if (isOneConstant(N01)) {
14192 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14193 return SDValue();
14194 } else if (isOneConstant(N11)) {
14195 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14196 if (!(IsAnd && isAllOnesConstant(N01)))
14197 return SDValue();
14198 } else
14199 return SDValue();
14200
14201 EVT VT = N->getValueType(0);
14202
14203 SDValue N00 = N0.getOperand(0);
14204 SDValue N10 = N1.getOperand(0);
14205
14206 // The LHS of the xors needs to be 0/1.
14208 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14209 return SDValue();
14210
14211 // Invert the opcode and insert a new xor.
14212 SDLoc DL(N);
14213 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14214 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14215 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14216}
14217
14218// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14219// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14220// value to an unsigned value. This will be lowered to vmax and series of
14221// vnclipu instructions later. This can be extended to other truncated types
14222// other than i8 by replacing 256 and 255 with the equivalent constants for the
14223// type.
14225 EVT VT = N->getValueType(0);
14226 SDValue N0 = N->getOperand(0);
14227 EVT SrcVT = N0.getValueType();
14228
14229 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14230 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14231 return SDValue();
14232
14233 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14234 return SDValue();
14235
14236 SDValue Cond = N0.getOperand(0);
14237 SDValue True = N0.getOperand(1);
14238 SDValue False = N0.getOperand(2);
14239
14240 if (Cond.getOpcode() != ISD::SETCC)
14241 return SDValue();
14242
14243 // FIXME: Support the version of this pattern with the select operands
14244 // swapped.
14245 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14246 if (CCVal != ISD::SETULT)
14247 return SDValue();
14248
14249 SDValue CondLHS = Cond.getOperand(0);
14250 SDValue CondRHS = Cond.getOperand(1);
14251
14252 if (CondLHS != True)
14253 return SDValue();
14254
14255 unsigned ScalarBits = VT.getScalarSizeInBits();
14256
14257 // FIXME: Support other constants.
14258 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14259 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14260 return SDValue();
14261
14262 if (False.getOpcode() != ISD::SIGN_EXTEND)
14263 return SDValue();
14264
14265 False = False.getOperand(0);
14266
14267 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14268 return SDValue();
14269
14270 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14271 if (!FalseRHSC || !FalseRHSC->isZero())
14272 return SDValue();
14273
14274 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14275 if (CCVal2 != ISD::SETGT)
14276 return SDValue();
14277
14278 // Emit the signed to unsigned saturation pattern.
14279 SDLoc DL(N);
14280 SDValue Max =
14281 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14282 SDValue Min =
14283 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14284 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14285 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14286}
14287
14289 const RISCVSubtarget &Subtarget) {
14290 SDValue N0 = N->getOperand(0);
14291 EVT VT = N->getValueType(0);
14292
14293 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14294 // extending X. This is safe since we only need the LSB after the shift and
14295 // shift amounts larger than 31 would produce poison. If we wait until
14296 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14297 // to use a BEXT instruction.
14298 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14299 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14300 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14301 SDLoc DL(N0);
14302 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14303 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14304 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14305 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14306 }
14307
14308 return combineTruncSelectToSMaxUSat(N, DAG);
14309}
14310
14311// Combines two comparison operation and logic operation to one selection
14312// operation(min, max) and logic operation. Returns new constructed Node if
14313// conditions for optimization are satisfied.
14316 const RISCVSubtarget &Subtarget) {
14317 SelectionDAG &DAG = DCI.DAG;
14318
14319 SDValue N0 = N->getOperand(0);
14320 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14321 // extending X. This is safe since we only need the LSB after the shift and
14322 // shift amounts larger than 31 would produce poison. If we wait until
14323 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14324 // to use a BEXT instruction.
14325 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14326 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14327 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14328 N0.hasOneUse()) {
14329 SDLoc DL(N);
14330 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14331 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14332 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14333 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14334 DAG.getConstant(1, DL, MVT::i64));
14335 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14336 }
14337
14338 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14339 return V;
14340 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14341 return V;
14342
14343 if (DCI.isAfterLegalizeDAG())
14344 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14345 return V;
14346
14347 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14348 // (select lhs, rhs, cc, x, (and x, y))
14349 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14350}
14351
14352// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14353// FIXME: Generalize to other binary operators with same operand.
14355 SelectionDAG &DAG) {
14356 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14357
14358 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14360 !N0.hasOneUse() || !N1.hasOneUse())
14361 return SDValue();
14362
14363 // Should have the same condition.
14364 SDValue Cond = N0.getOperand(1);
14365 if (Cond != N1.getOperand(1))
14366 return SDValue();
14367
14368 SDValue TrueV = N0.getOperand(0);
14369 SDValue FalseV = N1.getOperand(0);
14370
14371 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14372 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14373 !isOneConstant(TrueV.getOperand(1)) ||
14374 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14375 return SDValue();
14376
14377 EVT VT = N->getValueType(0);
14378 SDLoc DL(N);
14379
14380 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14381 Cond);
14382 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14383 Cond);
14384 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14385 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14386}
14387
14389 const RISCVSubtarget &Subtarget) {
14390 SelectionDAG &DAG = DCI.DAG;
14391
14392 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14393 return V;
14394 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14395 return V;
14396
14397 if (DCI.isAfterLegalizeDAG())
14398 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14399 return V;
14400
14401 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14402 // We may be able to pull a common operation out of the true and false value.
14403 SDValue N0 = N->getOperand(0);
14404 SDValue N1 = N->getOperand(1);
14405 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14406 return V;
14407 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14408 return V;
14409
14410 // fold (or (select cond, 0, y), x) ->
14411 // (select cond, x, (or x, y))
14412 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14413}
14414
14416 const RISCVSubtarget &Subtarget) {
14417 SDValue N0 = N->getOperand(0);
14418 SDValue N1 = N->getOperand(1);
14419
14420 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14421 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14422 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14423 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14424 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14425 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14426 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14427 SDLoc DL(N);
14428 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14429 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14430 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14431 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14432 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14433 }
14434
14435 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14436 // NOTE: Assumes ROL being legal means ROLW is legal.
14437 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14438 if (N0.getOpcode() == RISCVISD::SLLW &&
14440 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14441 SDLoc DL(N);
14442 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14443 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14444 }
14445
14446 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14447 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14448 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14449 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14450 if (ConstN00 && CC == ISD::SETLT) {
14451 EVT VT = N0.getValueType();
14452 SDLoc DL(N0);
14453 const APInt &Imm = ConstN00->getAPIntValue();
14454 if ((Imm + 1).isSignedIntN(12))
14455 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14456 DAG.getConstant(Imm + 1, DL, VT), CC);
14457 }
14458 }
14459
14460 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14461 return V;
14462 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14463 return V;
14464
14465 // fold (xor (select cond, 0, y), x) ->
14466 // (select cond, x, (xor x, y))
14467 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14468}
14469
14470// Try to expand a scalar multiply to a faster sequence.
14473 const RISCVSubtarget &Subtarget) {
14474
14475 EVT VT = N->getValueType(0);
14476
14477 // LI + MUL is usually smaller than the alternative sequence.
14479 return SDValue();
14480
14481 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14482 return SDValue();
14483
14484 if (VT != Subtarget.getXLenVT())
14485 return SDValue();
14486
14487 const bool HasShlAdd =
14488 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14489
14490 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14491 if (!CNode)
14492 return SDValue();
14493 uint64_t MulAmt = CNode->getZExtValue();
14494
14495 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14496 // We're adding additional uses of X here, and in principle, we should be freezing
14497 // X before doing so. However, adding freeze here causes real regressions, and no
14498 // other target properly freezes X in these cases either.
14499 SDValue X = N->getOperand(0);
14500
14501 if (HasShlAdd) {
14502 for (uint64_t Divisor : {3, 5, 9}) {
14503 if (MulAmt % Divisor != 0)
14504 continue;
14505 uint64_t MulAmt2 = MulAmt / Divisor;
14506 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14507 if (isPowerOf2_64(MulAmt2)) {
14508 SDLoc DL(N);
14509 SDValue X = N->getOperand(0);
14510 // Put the shift first if we can fold a zext into the
14511 // shift forming a slli.uw.
14512 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14513 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14514 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14515 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14516 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14517 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14518 Shl);
14519 }
14520 // Otherwise, put rhe shl second so that it can fold with following
14521 // instructions (e.g. sext or add).
14522 SDValue Mul359 =
14523 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14524 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14525 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14526 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14527 }
14528
14529 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14530 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14531 SDLoc DL(N);
14532 SDValue Mul359 =
14533 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14534 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14535 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14536 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14537 Mul359);
14538 }
14539 }
14540
14541 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14542 // shXadd. First check if this a sum of two power of 2s because that's
14543 // easy. Then count how many zeros are up to the first bit.
14544 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14545 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14546 if (ScaleShift >= 1 && ScaleShift < 4) {
14547 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14548 SDLoc DL(N);
14549 SDValue Shift1 =
14550 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14551 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14552 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14553 }
14554 }
14555
14556 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14557 // This is the two instruction form, there are also three instruction
14558 // variants we could implement. e.g.
14559 // (2^(1,2,3) * 3,5,9 + 1) << C2
14560 // 2^(C1>3) * 3,5,9 +/- 1
14561 for (uint64_t Divisor : {3, 5, 9}) {
14562 uint64_t C = MulAmt - 1;
14563 if (C <= Divisor)
14564 continue;
14565 unsigned TZ = llvm::countr_zero(C);
14566 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14567 SDLoc DL(N);
14568 SDValue Mul359 =
14569 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14570 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14571 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14572 DAG.getConstant(TZ, DL, VT), X);
14573 }
14574 }
14575
14576 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14577 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14578 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14579 if (ScaleShift >= 1 && ScaleShift < 4) {
14580 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14581 SDLoc DL(N);
14582 SDValue Shift1 =
14583 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14584 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14585 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14586 DAG.getConstant(ScaleShift, DL, VT), X));
14587 }
14588 }
14589
14590 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14591 for (uint64_t Offset : {3, 5, 9}) {
14592 if (isPowerOf2_64(MulAmt + Offset)) {
14593 SDLoc DL(N);
14594 SDValue Shift1 =
14595 DAG.getNode(ISD::SHL, DL, VT, X,
14596 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14597 SDValue Mul359 =
14598 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14599 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14600 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14601 }
14602 }
14603 }
14604
14605 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14606 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14607 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14608 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14609 SDLoc DL(N);
14610 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14611 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14612 SDValue Shift2 =
14613 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14614 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14615 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14616 }
14617
14618 if (HasShlAdd) {
14619 for (uint64_t Divisor : {3, 5, 9}) {
14620 if (MulAmt % Divisor != 0)
14621 continue;
14622 uint64_t MulAmt2 = MulAmt / Divisor;
14623 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14624 // of 25 which happen to be quite common.
14625 for (uint64_t Divisor2 : {3, 5, 9}) {
14626 if (MulAmt2 % Divisor2 != 0)
14627 continue;
14628 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14629 if (isPowerOf2_64(MulAmt3)) {
14630 SDLoc DL(N);
14631 SDValue Mul359A =
14632 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14633 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14634 SDValue Mul359B = DAG.getNode(
14635 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14636 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14637 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14638 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14639 }
14640 }
14641 }
14642 }
14643
14644 return SDValue();
14645}
14646
14647// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14648// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14649// Same for other equivalent types with other equivalent constants.
14651 EVT VT = N->getValueType(0);
14652 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14653
14654 // Do this for legal vectors unless they are i1 or i8 vectors.
14655 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14656 return SDValue();
14657
14658 if (N->getOperand(0).getOpcode() != ISD::AND ||
14659 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14660 return SDValue();
14661
14662 SDValue And = N->getOperand(0);
14663 SDValue Srl = And.getOperand(0);
14664
14665 APInt V1, V2, V3;
14666 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14667 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14669 return SDValue();
14670
14671 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14672 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14673 V3 != (HalfSize - 1))
14674 return SDValue();
14675
14676 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14677 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14678 VT.getVectorElementCount() * 2);
14679 SDLoc DL(N);
14680 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14681 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14682 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14683 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14684}
14685
14688 const RISCVSubtarget &Subtarget) {
14689 EVT VT = N->getValueType(0);
14690 if (!VT.isVector())
14691 return expandMul(N, DAG, DCI, Subtarget);
14692
14693 SDLoc DL(N);
14694 SDValue N0 = N->getOperand(0);
14695 SDValue N1 = N->getOperand(1);
14696 SDValue MulOper;
14697 unsigned AddSubOpc;
14698
14699 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14700 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14701 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14702 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14703 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14704 AddSubOpc = V->getOpcode();
14705 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14706 SDValue Opnd = V->getOperand(1);
14707 MulOper = V->getOperand(0);
14708 if (AddSubOpc == ISD::SUB)
14709 std::swap(Opnd, MulOper);
14710 if (isOneOrOneSplat(Opnd))
14711 return true;
14712 }
14713 return false;
14714 };
14715
14716 if (IsAddSubWith1(N0)) {
14717 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14718 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14719 }
14720
14721 if (IsAddSubWith1(N1)) {
14722 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14723 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14724 }
14725
14726 if (SDValue V = combineBinOpOfZExt(N, DAG))
14727 return V;
14728
14730 return V;
14731
14732 return SDValue();
14733}
14734
14735/// According to the property that indexed load/store instructions zero-extend
14736/// their indices, try to narrow the type of index operand.
14737static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14738 if (isIndexTypeSigned(IndexType))
14739 return false;
14740
14741 if (!N->hasOneUse())
14742 return false;
14743
14744 EVT VT = N.getValueType();
14745 SDLoc DL(N);
14746
14747 // In general, what we're doing here is seeing if we can sink a truncate to
14748 // a smaller element type into the expression tree building our index.
14749 // TODO: We can generalize this and handle a bunch more cases if useful.
14750
14751 // Narrow a buildvector to the narrowest element type. This requires less
14752 // work and less register pressure at high LMUL, and creates smaller constants
14753 // which may be cheaper to materialize.
14754 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14755 KnownBits Known = DAG.computeKnownBits(N);
14756 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14757 LLVMContext &C = *DAG.getContext();
14758 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14759 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14760 N = DAG.getNode(ISD::TRUNCATE, DL,
14761 VT.changeVectorElementType(ResultVT), N);
14762 return true;
14763 }
14764 }
14765
14766 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14767 if (N.getOpcode() != ISD::SHL)
14768 return false;
14769
14770 SDValue N0 = N.getOperand(0);
14771 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14773 return false;
14774 if (!N0->hasOneUse())
14775 return false;
14776
14777 APInt ShAmt;
14778 SDValue N1 = N.getOperand(1);
14779 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14780 return false;
14781
14782 SDValue Src = N0.getOperand(0);
14783 EVT SrcVT = Src.getValueType();
14784 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14785 unsigned ShAmtV = ShAmt.getZExtValue();
14786 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14787 NewElen = std::max(NewElen, 8U);
14788
14789 // Skip if NewElen is not narrower than the original extended type.
14790 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14791 return false;
14792
14793 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14794 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14795
14796 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14797 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14798 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14799 return true;
14800}
14801
14802// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14803// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14804// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14805// can become a sext.w instead of a shift pair.
14807 const RISCVSubtarget &Subtarget) {
14808 SDValue N0 = N->getOperand(0);
14809 SDValue N1 = N->getOperand(1);
14810 EVT VT = N->getValueType(0);
14811 EVT OpVT = N0.getValueType();
14812
14813 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14814 return SDValue();
14815
14816 // RHS needs to be a constant.
14817 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14818 if (!N1C)
14819 return SDValue();
14820
14821 // LHS needs to be (and X, 0xffffffff).
14822 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14823 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14824 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14825 return SDValue();
14826
14827 // Looking for an equality compare.
14828 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14829 if (!isIntEqualitySetCC(Cond))
14830 return SDValue();
14831
14832 // Don't do this if the sign bit is provably zero, it will be turned back into
14833 // an AND.
14834 APInt SignMask = APInt::getOneBitSet(64, 31);
14835 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14836 return SDValue();
14837
14838 const APInt &C1 = N1C->getAPIntValue();
14839
14840 SDLoc dl(N);
14841 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14842 // to be equal.
14843 if (C1.getActiveBits() > 32)
14844 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14845
14846 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14847 N0.getOperand(0), DAG.getValueType(MVT::i32));
14848 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14849 dl, OpVT), Cond);
14850}
14851
14852static SDValue
14854 const RISCVSubtarget &Subtarget) {
14855 SDValue Src = N->getOperand(0);
14856 EVT VT = N->getValueType(0);
14857 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14858 unsigned Opc = Src.getOpcode();
14859
14860 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14861 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14862 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14863 Subtarget.hasStdExtZfhmin())
14864 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14865 Src.getOperand(0));
14866
14867 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14868 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14869 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14870 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14871 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14872 Src.getOperand(1));
14873
14874 return SDValue();
14875}
14876
14877namespace {
14878// Forward declaration of the structure holding the necessary information to
14879// apply a combine.
14880struct CombineResult;
14881
14882enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14883/// Helper class for folding sign/zero extensions.
14884/// In particular, this class is used for the following combines:
14885/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14886/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14887/// mul | mul_vl -> vwmul(u) | vwmul_su
14888/// shl | shl_vl -> vwsll
14889/// fadd -> vfwadd | vfwadd_w
14890/// fsub -> vfwsub | vfwsub_w
14891/// fmul -> vfwmul
14892/// An object of this class represents an operand of the operation we want to
14893/// combine.
14894/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14895/// NodeExtensionHelper for `a` and one for `b`.
14896///
14897/// This class abstracts away how the extension is materialized and
14898/// how its number of users affect the combines.
14899///
14900/// In particular:
14901/// - VWADD_W is conceptually == add(op0, sext(op1))
14902/// - VWADDU_W == add(op0, zext(op1))
14903/// - VWSUB_W == sub(op0, sext(op1))
14904/// - VWSUBU_W == sub(op0, zext(op1))
14905/// - VFWADD_W == fadd(op0, fpext(op1))
14906/// - VFWSUB_W == fsub(op0, fpext(op1))
14907/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14908/// zext|sext(smaller_value).
14909struct NodeExtensionHelper {
14910 /// Records if this operand is like being zero extended.
14911 bool SupportsZExt;
14912 /// Records if this operand is like being sign extended.
14913 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14914 /// instance, a splat constant (e.g., 3), would support being both sign and
14915 /// zero extended.
14916 bool SupportsSExt;
14917 /// Records if this operand is like being floating-Point extended.
14918 bool SupportsFPExt;
14919 /// This boolean captures whether we care if this operand would still be
14920 /// around after the folding happens.
14921 bool EnforceOneUse;
14922 /// Original value that this NodeExtensionHelper represents.
14923 SDValue OrigOperand;
14924
14925 /// Get the value feeding the extension or the value itself.
14926 /// E.g., for zext(a), this would return a.
14927 SDValue getSource() const {
14928 switch (OrigOperand.getOpcode()) {
14929 case ISD::ZERO_EXTEND:
14930 case ISD::SIGN_EXTEND:
14931 case RISCVISD::VSEXT_VL:
14932 case RISCVISD::VZEXT_VL:
14934 return OrigOperand.getOperand(0);
14935 default:
14936 return OrigOperand;
14937 }
14938 }
14939
14940 /// Check if this instance represents a splat.
14941 bool isSplat() const {
14942 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14943 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14944 }
14945
14946 /// Get the extended opcode.
14947 unsigned getExtOpc(ExtKind SupportsExt) const {
14948 switch (SupportsExt) {
14949 case ExtKind::SExt:
14950 return RISCVISD::VSEXT_VL;
14951 case ExtKind::ZExt:
14952 return RISCVISD::VZEXT_VL;
14953 case ExtKind::FPExt:
14955 }
14956 llvm_unreachable("Unknown ExtKind enum");
14957 }
14958
14959 /// Get or create a value that can feed \p Root with the given extension \p
14960 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14961 /// operand. \see ::getSource().
14962 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14963 const RISCVSubtarget &Subtarget,
14964 std::optional<ExtKind> SupportsExt) const {
14965 if (!SupportsExt.has_value())
14966 return OrigOperand;
14967
14968 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14969
14970 SDValue Source = getSource();
14971 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
14972 if (Source.getValueType() == NarrowVT)
14973 return Source;
14974
14975 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
14976 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
14977 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
14978 Root->getOpcode() == RISCVISD::VFMADD_VL);
14979 return Source;
14980 }
14981
14982 unsigned ExtOpc = getExtOpc(*SupportsExt);
14983
14984 // If we need an extension, we should be changing the type.
14985 SDLoc DL(OrigOperand);
14986 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
14987 switch (OrigOperand.getOpcode()) {
14988 case ISD::ZERO_EXTEND:
14989 case ISD::SIGN_EXTEND:
14990 case RISCVISD::VSEXT_VL:
14991 case RISCVISD::VZEXT_VL:
14993 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
14994 case ISD::SPLAT_VECTOR:
14995 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14997 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14998 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15000 Source = Source.getOperand(1);
15001 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15002 Source = Source.getOperand(0);
15003 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15004 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15005 DAG.getUNDEF(NarrowVT), Source, VL);
15006 default:
15007 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15008 // and that operand should already have the right NarrowVT so no
15009 // extension should be required at this point.
15010 llvm_unreachable("Unsupported opcode");
15011 }
15012 }
15013
15014 /// Helper function to get the narrow type for \p Root.
15015 /// The narrow type is the type of \p Root where we divided the size of each
15016 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15017 /// \pre Both the narrow type and the original type should be legal.
15018 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15019 MVT VT = Root->getSimpleValueType(0);
15020
15021 // Determine the narrow size.
15022 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15023
15024 MVT EltVT = SupportsExt == ExtKind::FPExt
15025 ? MVT::getFloatingPointVT(NarrowSize)
15026 : MVT::getIntegerVT(NarrowSize);
15027
15028 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15029 "Trying to extend something we can't represent");
15030 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15031 return NarrowVT;
15032 }
15033
15034 /// Get the opcode to materialize:
15035 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15036 static unsigned getSExtOpcode(unsigned Opcode) {
15037 switch (Opcode) {
15038 case ISD::ADD:
15039 case RISCVISD::ADD_VL:
15042 case ISD::OR:
15043 return RISCVISD::VWADD_VL;
15044 case ISD::SUB:
15045 case RISCVISD::SUB_VL:
15048 return RISCVISD::VWSUB_VL;
15049 case ISD::MUL:
15050 case RISCVISD::MUL_VL:
15051 return RISCVISD::VWMUL_VL;
15052 default:
15053 llvm_unreachable("Unexpected opcode");
15054 }
15055 }
15056
15057 /// Get the opcode to materialize:
15058 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15059 static unsigned getZExtOpcode(unsigned Opcode) {
15060 switch (Opcode) {
15061 case ISD::ADD:
15062 case RISCVISD::ADD_VL:
15065 case ISD::OR:
15066 return RISCVISD::VWADDU_VL;
15067 case ISD::SUB:
15068 case RISCVISD::SUB_VL:
15071 return RISCVISD::VWSUBU_VL;
15072 case ISD::MUL:
15073 case RISCVISD::MUL_VL:
15074 return RISCVISD::VWMULU_VL;
15075 case ISD::SHL:
15076 case RISCVISD::SHL_VL:
15077 return RISCVISD::VWSLL_VL;
15078 default:
15079 llvm_unreachable("Unexpected opcode");
15080 }
15081 }
15082
15083 /// Get the opcode to materialize:
15084 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15085 static unsigned getFPExtOpcode(unsigned Opcode) {
15086 switch (Opcode) {
15087 case RISCVISD::FADD_VL:
15089 return RISCVISD::VFWADD_VL;
15090 case RISCVISD::FSUB_VL:
15092 return RISCVISD::VFWSUB_VL;
15093 case RISCVISD::FMUL_VL:
15094 return RISCVISD::VFWMUL_VL;
15096 return RISCVISD::VFWMADD_VL;
15098 return RISCVISD::VFWMSUB_VL;
15100 return RISCVISD::VFWNMADD_VL;
15102 return RISCVISD::VFWNMSUB_VL;
15103 default:
15104 llvm_unreachable("Unexpected opcode");
15105 }
15106 }
15107
15108 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15109 /// newOpcode(a, b).
15110 static unsigned getSUOpcode(unsigned Opcode) {
15111 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15112 "SU is only supported for MUL");
15113 return RISCVISD::VWMULSU_VL;
15114 }
15115
15116 /// Get the opcode to materialize
15117 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15118 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15119 switch (Opcode) {
15120 case ISD::ADD:
15121 case RISCVISD::ADD_VL:
15122 case ISD::OR:
15123 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15125 case ISD::SUB:
15126 case RISCVISD::SUB_VL:
15127 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15129 case RISCVISD::FADD_VL:
15130 return RISCVISD::VFWADD_W_VL;
15131 case RISCVISD::FSUB_VL:
15132 return RISCVISD::VFWSUB_W_VL;
15133 default:
15134 llvm_unreachable("Unexpected opcode");
15135 }
15136 }
15137
15138 using CombineToTry = std::function<std::optional<CombineResult>(
15139 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15140 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15141 const RISCVSubtarget &)>;
15142
15143 /// Check if this node needs to be fully folded or extended for all users.
15144 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15145
15146 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15147 const RISCVSubtarget &Subtarget) {
15148 unsigned Opc = OrigOperand.getOpcode();
15149 MVT VT = OrigOperand.getSimpleValueType();
15150
15151 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15152 "Unexpected Opcode");
15153
15154 // The pasthru must be undef for tail agnostic.
15155 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15156 return;
15157
15158 // Get the scalar value.
15159 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15160 : OrigOperand.getOperand(1);
15161
15162 // See if we have enough sign bits or zero bits in the scalar to use a
15163 // widening opcode by splatting to smaller element size.
15164 unsigned EltBits = VT.getScalarSizeInBits();
15165 unsigned ScalarBits = Op.getValueSizeInBits();
15166 // If we're not getting all bits from the element, we need special handling.
15167 if (ScalarBits < EltBits) {
15168 // This should only occur on RV32.
15169 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15170 !Subtarget.is64Bit() && "Unexpected splat");
15171 // vmv.v.x sign extends narrow inputs.
15172 SupportsSExt = true;
15173
15174 // If the input is positive, then sign extend is also zero extend.
15175 if (DAG.SignBitIsZero(Op))
15176 SupportsZExt = true;
15177
15178 EnforceOneUse = false;
15179 return;
15180 }
15181
15182 unsigned NarrowSize = EltBits / 2;
15183 // If the narrow type cannot be expressed with a legal VMV,
15184 // this is not a valid candidate.
15185 if (NarrowSize < 8)
15186 return;
15187
15188 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15189 SupportsSExt = true;
15190
15191 if (DAG.MaskedValueIsZero(Op,
15192 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15193 SupportsZExt = true;
15194
15195 EnforceOneUse = false;
15196 }
15197
15198 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15199 const RISCVSubtarget &Subtarget) {
15200 // Any f16 extension will neeed zvfh
15201 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15202 return false;
15203 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15204 // zvfbfwma
15205 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15206 Root->getOpcode() != RISCVISD::VFMADD_VL))
15207 return false;
15208 return true;
15209 }
15210
15211 /// Helper method to set the various fields of this struct based on the
15212 /// type of \p Root.
15213 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15214 const RISCVSubtarget &Subtarget) {
15215 SupportsZExt = false;
15216 SupportsSExt = false;
15217 SupportsFPExt = false;
15218 EnforceOneUse = true;
15219 unsigned Opc = OrigOperand.getOpcode();
15220 // For the nodes we handle below, we end up using their inputs directly: see
15221 // getSource(). However since they either don't have a passthru or we check
15222 // that their passthru is undef, we can safely ignore their mask and VL.
15223 switch (Opc) {
15224 case ISD::ZERO_EXTEND:
15225 case ISD::SIGN_EXTEND: {
15226 MVT VT = OrigOperand.getSimpleValueType();
15227 if (!VT.isVector())
15228 break;
15229
15230 SDValue NarrowElt = OrigOperand.getOperand(0);
15231 MVT NarrowVT = NarrowElt.getSimpleValueType();
15232 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15233 if (NarrowVT.getVectorElementType() == MVT::i1)
15234 break;
15235
15236 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15237 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15238 break;
15239 }
15240 case RISCVISD::VZEXT_VL:
15241 SupportsZExt = true;
15242 break;
15243 case RISCVISD::VSEXT_VL:
15244 SupportsSExt = true;
15245 break;
15247 MVT NarrowEltVT =
15249 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15250 break;
15251 SupportsFPExt = true;
15252 break;
15253 }
15254 case ISD::SPLAT_VECTOR:
15256 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15257 break;
15258 case RISCVISD::VFMV_V_F_VL: {
15259 MVT VT = OrigOperand.getSimpleValueType();
15260
15261 if (!OrigOperand.getOperand(0).isUndef())
15262 break;
15263
15264 SDValue Op = OrigOperand.getOperand(1);
15265 if (Op.getOpcode() != ISD::FP_EXTEND)
15266 break;
15267
15268 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15269 Subtarget))
15270 break;
15271
15272 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15273 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15274 if (NarrowSize != ScalarBits)
15275 break;
15276
15277 SupportsFPExt = true;
15278 break;
15279 }
15280 default:
15281 break;
15282 }
15283 }
15284
15285 /// Check if \p Root supports any extension folding combines.
15286 static bool isSupportedRoot(const SDNode *Root,
15287 const RISCVSubtarget &Subtarget) {
15288 switch (Root->getOpcode()) {
15289 case ISD::ADD:
15290 case ISD::SUB:
15291 case ISD::MUL: {
15292 return Root->getValueType(0).isScalableVector();
15293 }
15294 case ISD::OR: {
15295 return Root->getValueType(0).isScalableVector() &&
15296 Root->getFlags().hasDisjoint();
15297 }
15298 // Vector Widening Integer Add/Sub/Mul Instructions
15299 case RISCVISD::ADD_VL:
15300 case RISCVISD::MUL_VL:
15303 case RISCVISD::SUB_VL:
15306 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15307 case RISCVISD::FADD_VL:
15308 case RISCVISD::FSUB_VL:
15309 case RISCVISD::FMUL_VL:
15312 return true;
15313 case ISD::SHL:
15314 return Root->getValueType(0).isScalableVector() &&
15315 Subtarget.hasStdExtZvbb();
15316 case RISCVISD::SHL_VL:
15317 return Subtarget.hasStdExtZvbb();
15322 return true;
15323 default:
15324 return false;
15325 }
15326 }
15327
15328 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15329 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15330 const RISCVSubtarget &Subtarget) {
15331 assert(isSupportedRoot(Root, Subtarget) &&
15332 "Trying to build an helper with an "
15333 "unsupported root");
15334 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15336 OrigOperand = Root->getOperand(OperandIdx);
15337
15338 unsigned Opc = Root->getOpcode();
15339 switch (Opc) {
15340 // We consider
15341 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15342 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15343 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15350 if (OperandIdx == 1) {
15351 SupportsZExt =
15353 SupportsSExt =
15355 SupportsFPExt =
15357 // There's no existing extension here, so we don't have to worry about
15358 // making sure it gets removed.
15359 EnforceOneUse = false;
15360 break;
15361 }
15362 [[fallthrough]];
15363 default:
15364 fillUpExtensionSupport(Root, DAG, Subtarget);
15365 break;
15366 }
15367 }
15368
15369 /// Helper function to get the Mask and VL from \p Root.
15370 static std::pair<SDValue, SDValue>
15371 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15372 const RISCVSubtarget &Subtarget) {
15373 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15374 switch (Root->getOpcode()) {
15375 case ISD::ADD:
15376 case ISD::SUB:
15377 case ISD::MUL:
15378 case ISD::OR:
15379 case ISD::SHL: {
15380 SDLoc DL(Root);
15381 MVT VT = Root->getSimpleValueType(0);
15382 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15383 }
15384 default:
15385 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15386 }
15387 }
15388
15389 /// Helper function to check if \p N is commutative with respect to the
15390 /// foldings that are supported by this class.
15391 static bool isCommutative(const SDNode *N) {
15392 switch (N->getOpcode()) {
15393 case ISD::ADD:
15394 case ISD::MUL:
15395 case ISD::OR:
15396 case RISCVISD::ADD_VL:
15397 case RISCVISD::MUL_VL:
15400 case RISCVISD::FADD_VL:
15401 case RISCVISD::FMUL_VL:
15407 return true;
15408 case ISD::SUB:
15409 case RISCVISD::SUB_VL:
15412 case RISCVISD::FSUB_VL:
15414 case ISD::SHL:
15415 case RISCVISD::SHL_VL:
15416 return false;
15417 default:
15418 llvm_unreachable("Unexpected opcode");
15419 }
15420 }
15421
15422 /// Get a list of combine to try for folding extensions in \p Root.
15423 /// Note that each returned CombineToTry function doesn't actually modify
15424 /// anything. Instead they produce an optional CombineResult that if not None,
15425 /// need to be materialized for the combine to be applied.
15426 /// \see CombineResult::materialize.
15427 /// If the related CombineToTry function returns std::nullopt, that means the
15428 /// combine didn't match.
15429 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15430};
15431
15432/// Helper structure that holds all the necessary information to materialize a
15433/// combine that does some extension folding.
15434struct CombineResult {
15435 /// Opcode to be generated when materializing the combine.
15436 unsigned TargetOpcode;
15437 // No value means no extension is needed.
15438 std::optional<ExtKind> LHSExt;
15439 std::optional<ExtKind> RHSExt;
15440 /// Root of the combine.
15441 SDNode *Root;
15442 /// LHS of the TargetOpcode.
15443 NodeExtensionHelper LHS;
15444 /// RHS of the TargetOpcode.
15445 NodeExtensionHelper RHS;
15446
15447 CombineResult(unsigned TargetOpcode, SDNode *Root,
15448 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15449 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15450 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15451 LHS(LHS), RHS(RHS) {}
15452
15453 /// Return a value that uses TargetOpcode and that can be used to replace
15454 /// Root.
15455 /// The actual replacement is *not* done in that method.
15456 SDValue materialize(SelectionDAG &DAG,
15457 const RISCVSubtarget &Subtarget) const {
15458 SDValue Mask, VL, Passthru;
15459 std::tie(Mask, VL) =
15460 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15461 switch (Root->getOpcode()) {
15462 default:
15463 Passthru = Root->getOperand(2);
15464 break;
15465 case ISD::ADD:
15466 case ISD::SUB:
15467 case ISD::MUL:
15468 case ISD::OR:
15469 case ISD::SHL:
15470 Passthru = DAG.getUNDEF(Root->getValueType(0));
15471 break;
15472 }
15473 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15474 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15475 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15476 Passthru, Mask, VL);
15477 }
15478};
15479
15480/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15481/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15482/// are zext) and LHS and RHS can be folded into Root.
15483/// AllowExtMask define which form `ext` can take in this pattern.
15484///
15485/// \note If the pattern can match with both zext and sext, the returned
15486/// CombineResult will feature the zext result.
15487///
15488/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15489/// can be used to apply the pattern.
15490static std::optional<CombineResult>
15491canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15492 const NodeExtensionHelper &RHS,
15493 uint8_t AllowExtMask, SelectionDAG &DAG,
15494 const RISCVSubtarget &Subtarget) {
15495 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15496 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15497 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15498 /*RHSExt=*/{ExtKind::ZExt});
15499 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15500 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15501 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15502 /*RHSExt=*/{ExtKind::SExt});
15503 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15504 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15505 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15506 /*RHSExt=*/{ExtKind::FPExt});
15507 return std::nullopt;
15508}
15509
15510/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15511/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15512/// are zext) and LHS and RHS can be folded into Root.
15513///
15514/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15515/// can be used to apply the pattern.
15516static std::optional<CombineResult>
15517canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15518 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15519 const RISCVSubtarget &Subtarget) {
15520 return canFoldToVWWithSameExtensionImpl(
15521 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15522 Subtarget);
15523}
15524
15525/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15526///
15527/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15528/// can be used to apply the pattern.
15529static std::optional<CombineResult>
15530canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15531 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15532 const RISCVSubtarget &Subtarget) {
15533 if (RHS.SupportsFPExt)
15534 return CombineResult(
15535 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15536 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15537
15538 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15539 // sext/zext?
15540 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15541 // purposes.
15542 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15543 return CombineResult(
15544 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15545 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15546 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15547 return CombineResult(
15548 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15549 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15550 return std::nullopt;
15551}
15552
15553/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15554///
15555/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15556/// can be used to apply the pattern.
15557static std::optional<CombineResult>
15558canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15559 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15560 const RISCVSubtarget &Subtarget) {
15561 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15562 Subtarget);
15563}
15564
15565/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15566///
15567/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15568/// can be used to apply the pattern.
15569static std::optional<CombineResult>
15570canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15571 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15572 const RISCVSubtarget &Subtarget) {
15573 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15574 Subtarget);
15575}
15576
15577/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15578///
15579/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15580/// can be used to apply the pattern.
15581static std::optional<CombineResult>
15582canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15583 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15584 const RISCVSubtarget &Subtarget) {
15585 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15586 Subtarget);
15587}
15588
15589/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15590///
15591/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15592/// can be used to apply the pattern.
15593static std::optional<CombineResult>
15594canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15595 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15596 const RISCVSubtarget &Subtarget) {
15597
15598 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15599 return std::nullopt;
15600 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15601 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15602 /*RHSExt=*/{ExtKind::ZExt});
15603}
15604
15606NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15607 SmallVector<CombineToTry> Strategies;
15608 switch (Root->getOpcode()) {
15609 case ISD::ADD:
15610 case ISD::SUB:
15611 case ISD::OR:
15612 case RISCVISD::ADD_VL:
15613 case RISCVISD::SUB_VL:
15614 case RISCVISD::FADD_VL:
15615 case RISCVISD::FSUB_VL:
15616 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15617 Strategies.push_back(canFoldToVWWithSameExtension);
15618 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15619 Strategies.push_back(canFoldToVW_W);
15620 break;
15621 case RISCVISD::FMUL_VL:
15626 Strategies.push_back(canFoldToVWWithSameExtension);
15627 break;
15628 case ISD::MUL:
15629 case RISCVISD::MUL_VL:
15630 // mul -> vwmul(u)
15631 Strategies.push_back(canFoldToVWWithSameExtension);
15632 // mul -> vwmulsu
15633 Strategies.push_back(canFoldToVW_SU);
15634 break;
15635 case ISD::SHL:
15636 case RISCVISD::SHL_VL:
15637 // shl -> vwsll
15638 Strategies.push_back(canFoldToVWWithZEXT);
15639 break;
15642 // vwadd_w|vwsub_w -> vwadd|vwsub
15643 Strategies.push_back(canFoldToVWWithSEXT);
15644 break;
15647 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15648 Strategies.push_back(canFoldToVWWithZEXT);
15649 break;
15652 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15653 Strategies.push_back(canFoldToVWWithFPEXT);
15654 break;
15655 default:
15656 llvm_unreachable("Unexpected opcode");
15657 }
15658 return Strategies;
15659}
15660} // End anonymous namespace.
15661
15662/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15663/// The supported combines are:
15664/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15665/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15666/// mul | mul_vl -> vwmul(u) | vwmul_su
15667/// shl | shl_vl -> vwsll
15668/// fadd_vl -> vfwadd | vfwadd_w
15669/// fsub_vl -> vfwsub | vfwsub_w
15670/// fmul_vl -> vfwmul
15671/// vwadd_w(u) -> vwadd(u)
15672/// vwsub_w(u) -> vwsub(u)
15673/// vfwadd_w -> vfwadd
15674/// vfwsub_w -> vfwsub
15677 const RISCVSubtarget &Subtarget) {
15678 SelectionDAG &DAG = DCI.DAG;
15679 if (DCI.isBeforeLegalize())
15680 return SDValue();
15681
15682 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15683 return SDValue();
15684
15685 SmallVector<SDNode *> Worklist;
15686 SmallSet<SDNode *, 8> Inserted;
15687 Worklist.push_back(N);
15688 Inserted.insert(N);
15689 SmallVector<CombineResult> CombinesToApply;
15690
15691 while (!Worklist.empty()) {
15692 SDNode *Root = Worklist.pop_back_val();
15693
15694 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15695 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15696 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15697 &Inserted](const NodeExtensionHelper &Op) {
15698 if (Op.needToPromoteOtherUsers()) {
15699 for (SDUse &Use : Op.OrigOperand->uses()) {
15700 SDNode *TheUser = Use.getUser();
15701 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15702 return false;
15703 // We only support the first 2 operands of FMA.
15704 if (Use.getOperandNo() >= 2)
15705 return false;
15706 if (Inserted.insert(TheUser).second)
15707 Worklist.push_back(TheUser);
15708 }
15709 }
15710 return true;
15711 };
15712
15713 // Control the compile time by limiting the number of node we look at in
15714 // total.
15715 if (Inserted.size() > ExtensionMaxWebSize)
15716 return SDValue();
15717
15719 NodeExtensionHelper::getSupportedFoldings(Root);
15720
15721 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15722 bool Matched = false;
15723 for (int Attempt = 0;
15724 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15725 ++Attempt) {
15726
15727 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15728 FoldingStrategies) {
15729 std::optional<CombineResult> Res =
15730 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15731 if (Res) {
15732 Matched = true;
15733 CombinesToApply.push_back(*Res);
15734 // All the inputs that are extended need to be folded, otherwise
15735 // we would be leaving the old input (since it is may still be used),
15736 // and the new one.
15737 if (Res->LHSExt.has_value())
15738 if (!AppendUsersIfNeeded(LHS))
15739 return SDValue();
15740 if (Res->RHSExt.has_value())
15741 if (!AppendUsersIfNeeded(RHS))
15742 return SDValue();
15743 break;
15744 }
15745 }
15746 std::swap(LHS, RHS);
15747 }
15748 // Right now we do an all or nothing approach.
15749 if (!Matched)
15750 return SDValue();
15751 }
15752 // Store the value for the replacement of the input node separately.
15753 SDValue InputRootReplacement;
15754 // We do the RAUW after we materialize all the combines, because some replaced
15755 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15756 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15757 // yet-to-be-visited CombinesToApply roots.
15759 ValuesToReplace.reserve(CombinesToApply.size());
15760 for (CombineResult Res : CombinesToApply) {
15761 SDValue NewValue = Res.materialize(DAG, Subtarget);
15762 if (!InputRootReplacement) {
15763 assert(Res.Root == N &&
15764 "First element is expected to be the current node");
15765 InputRootReplacement = NewValue;
15766 } else {
15767 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15768 }
15769 }
15770 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15771 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15772 DCI.AddToWorklist(OldNewValues.second.getNode());
15773 }
15774 return InputRootReplacement;
15775}
15776
15777// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15778// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15779// y will be the Passthru and cond will be the Mask.
15781 unsigned Opc = N->getOpcode();
15784
15785 SDValue Y = N->getOperand(0);
15786 SDValue MergeOp = N->getOperand(1);
15787 unsigned MergeOpc = MergeOp.getOpcode();
15788
15789 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15790 return SDValue();
15791
15792 SDValue X = MergeOp->getOperand(1);
15793
15794 if (!MergeOp.hasOneUse())
15795 return SDValue();
15796
15797 // Passthru should be undef
15798 SDValue Passthru = N->getOperand(2);
15799 if (!Passthru.isUndef())
15800 return SDValue();
15801
15802 // Mask should be all ones
15803 SDValue Mask = N->getOperand(3);
15804 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15805 return SDValue();
15806
15807 // False value of MergeOp should be all zeros
15808 SDValue Z = MergeOp->getOperand(2);
15809
15810 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15811 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15812 Z = Z.getOperand(1);
15813
15814 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15815 return SDValue();
15816
15817 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15818 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15819 N->getFlags());
15820}
15821
15824 const RISCVSubtarget &Subtarget) {
15825 [[maybe_unused]] unsigned Opc = N->getOpcode();
15828
15829 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15830 return V;
15831
15832 return combineVWADDSUBWSelect(N, DCI.DAG);
15833}
15834
15835// Helper function for performMemPairCombine.
15836// Try to combine the memory loads/stores LSNode1 and LSNode2
15837// into a single memory pair operation.
15839 LSBaseSDNode *LSNode2, SDValue BasePtr,
15840 uint64_t Imm) {
15842 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15843
15844 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15845 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15846 return SDValue();
15847
15849 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15850
15851 // The new operation has twice the width.
15852 MVT XLenVT = Subtarget.getXLenVT();
15853 EVT MemVT = LSNode1->getMemoryVT();
15854 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15855 MachineMemOperand *MMO = LSNode1->getMemOperand();
15857 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15858
15859 if (LSNode1->getOpcode() == ISD::LOAD) {
15860 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15861 unsigned Opcode;
15862 if (MemVT == MVT::i32)
15863 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15864 else
15865 Opcode = RISCVISD::TH_LDD;
15866
15867 SDValue Res = DAG.getMemIntrinsicNode(
15868 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15869 {LSNode1->getChain(), BasePtr,
15870 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15871 NewMemVT, NewMMO);
15872
15873 SDValue Node1 =
15874 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15875 SDValue Node2 =
15876 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15877
15878 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15879 return Node1;
15880 } else {
15881 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15882
15883 SDValue Res = DAG.getMemIntrinsicNode(
15884 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15885 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15886 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15887 NewMemVT, NewMMO);
15888
15889 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15890 return Res;
15891 }
15892}
15893
15894// Try to combine two adjacent loads/stores to a single pair instruction from
15895// the XTHeadMemPair vendor extension.
15898 SelectionDAG &DAG = DCI.DAG;
15900 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15901
15902 // Target does not support load/store pair.
15903 if (!Subtarget.hasVendorXTHeadMemPair())
15904 return SDValue();
15905
15906 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15907 EVT MemVT = LSNode1->getMemoryVT();
15908 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15909
15910 // No volatile, indexed or atomic loads/stores.
15911 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15912 return SDValue();
15913
15914 // Function to get a base + constant representation from a memory value.
15915 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15916 if (Ptr->getOpcode() == ISD::ADD)
15917 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15918 return {Ptr->getOperand(0), C1->getZExtValue()};
15919 return {Ptr, 0};
15920 };
15921
15922 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15923
15924 SDValue Chain = N->getOperand(0);
15925 for (SDUse &Use : Chain->uses()) {
15926 if (Use.getUser() != N && Use.getResNo() == 0 &&
15927 Use.getUser()->getOpcode() == N->getOpcode()) {
15928 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
15929
15930 // No volatile, indexed or atomic loads/stores.
15931 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15932 continue;
15933
15934 // Check if LSNode1 and LSNode2 have the same type and extension.
15935 if (LSNode1->getOpcode() == ISD::LOAD)
15936 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15937 cast<LoadSDNode>(LSNode1)->getExtensionType())
15938 continue;
15939
15940 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15941 continue;
15942
15943 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15944
15945 // Check if the base pointer is the same for both instruction.
15946 if (Base1 != Base2)
15947 continue;
15948
15949 // Check if the offsets match the XTHeadMemPair encoding contraints.
15950 bool Valid = false;
15951 if (MemVT == MVT::i32) {
15952 // Check for adjacent i32 values and a 2-bit index.
15953 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15954 Valid = true;
15955 } else if (MemVT == MVT::i64) {
15956 // Check for adjacent i64 values and a 2-bit index.
15957 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15958 Valid = true;
15959 }
15960
15961 if (!Valid)
15962 continue;
15963
15964 // Try to combine.
15965 if (SDValue Res =
15966 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15967 return Res;
15968 }
15969 }
15970
15971 return SDValue();
15972}
15973
15974// Fold
15975// (fp_to_int (froundeven X)) -> fcvt X, rne
15976// (fp_to_int (ftrunc X)) -> fcvt X, rtz
15977// (fp_to_int (ffloor X)) -> fcvt X, rdn
15978// (fp_to_int (fceil X)) -> fcvt X, rup
15979// (fp_to_int (fround X)) -> fcvt X, rmm
15980// (fp_to_int (frint X)) -> fcvt X
15983 const RISCVSubtarget &Subtarget) {
15984 SelectionDAG &DAG = DCI.DAG;
15985 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15986 MVT XLenVT = Subtarget.getXLenVT();
15987
15988 SDValue Src = N->getOperand(0);
15989
15990 // Don't do this for strict-fp Src.
15991 if (Src->isStrictFPOpcode())
15992 return SDValue();
15993
15994 // Ensure the FP type is legal.
15995 if (!TLI.isTypeLegal(Src.getValueType()))
15996 return SDValue();
15997
15998 // Don't do this for f16 with Zfhmin and not Zfh.
15999 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16000 return SDValue();
16001
16002 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16003 // If the result is invalid, we didn't find a foldable instruction.
16004 if (FRM == RISCVFPRndMode::Invalid)
16005 return SDValue();
16006
16007 SDLoc DL(N);
16008 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16009 EVT VT = N->getValueType(0);
16010
16011 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16012 MVT SrcVT = Src.getSimpleValueType();
16013 MVT SrcContainerVT = SrcVT;
16014 MVT ContainerVT = VT.getSimpleVT();
16015 SDValue XVal = Src.getOperand(0);
16016
16017 // For widening and narrowing conversions we just combine it into a
16018 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16019 // end up getting lowered to their appropriate pseudo instructions based on
16020 // their operand types
16021 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16022 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16023 return SDValue();
16024
16025 // Make fixed-length vectors scalable first
16026 if (SrcVT.isFixedLengthVector()) {
16027 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16028 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16029 ContainerVT =
16030 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16031 }
16032
16033 auto [Mask, VL] =
16034 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16035
16036 SDValue FpToInt;
16037 if (FRM == RISCVFPRndMode::RTZ) {
16038 // Use the dedicated trunc static rounding mode if we're truncating so we
16039 // don't need to generate calls to fsrmi/fsrm
16040 unsigned Opc =
16042 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16043 } else {
16044 unsigned Opc =
16046 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16047 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16048 }
16049
16050 // If converted from fixed-length to scalable, convert back
16051 if (VT.isFixedLengthVector())
16052 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16053
16054 return FpToInt;
16055 }
16056
16057 // Only handle XLen or i32 types. Other types narrower than XLen will
16058 // eventually be legalized to XLenVT.
16059 if (VT != MVT::i32 && VT != XLenVT)
16060 return SDValue();
16061
16062 unsigned Opc;
16063 if (VT == XLenVT)
16064 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16065 else
16067
16068 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16069 DAG.getTargetConstant(FRM, DL, XLenVT));
16070 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16071}
16072
16073// Fold
16074// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16075// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16076// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16077// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16078// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16079// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16082 const RISCVSubtarget &Subtarget) {
16083 SelectionDAG &DAG = DCI.DAG;
16084 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16085 MVT XLenVT = Subtarget.getXLenVT();
16086
16087 // Only handle XLen types. Other types narrower than XLen will eventually be
16088 // legalized to XLenVT.
16089 EVT DstVT = N->getValueType(0);
16090 if (DstVT != XLenVT)
16091 return SDValue();
16092
16093 SDValue Src = N->getOperand(0);
16094
16095 // Don't do this for strict-fp Src.
16096 if (Src->isStrictFPOpcode())
16097 return SDValue();
16098
16099 // Ensure the FP type is also legal.
16100 if (!TLI.isTypeLegal(Src.getValueType()))
16101 return SDValue();
16102
16103 // Don't do this for f16 with Zfhmin and not Zfh.
16104 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16105 return SDValue();
16106
16107 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16108
16109 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16110 if (FRM == RISCVFPRndMode::Invalid)
16111 return SDValue();
16112
16113 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16114
16115 unsigned Opc;
16116 if (SatVT == DstVT)
16117 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16118 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16120 else
16121 return SDValue();
16122 // FIXME: Support other SatVTs by clamping before or after the conversion.
16123
16124 Src = Src.getOperand(0);
16125
16126 SDLoc DL(N);
16127 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16128 DAG.getTargetConstant(FRM, DL, XLenVT));
16129
16130 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16131 // extend.
16132 if (Opc == RISCVISD::FCVT_WU_RV64)
16133 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16134
16135 // RISC-V FP-to-int conversions saturate to the destination register size, but
16136 // don't produce 0 for nan.
16137 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16138 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16139}
16140
16141// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16142// smaller than XLenVT.
16144 const RISCVSubtarget &Subtarget) {
16145 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16146
16147 SDValue Src = N->getOperand(0);
16148 if (Src.getOpcode() != ISD::BSWAP)
16149 return SDValue();
16150
16151 EVT VT = N->getValueType(0);
16152 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16153 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16154 return SDValue();
16155
16156 SDLoc DL(N);
16157 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16158}
16159
16160// Convert from one FMA opcode to another based on whether we are negating the
16161// multiply result and/or the accumulator.
16162// NOTE: Only supports RVV operations with VL.
16163static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16164 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16165 if (NegMul) {
16166 // clang-format off
16167 switch (Opcode) {
16168 default: llvm_unreachable("Unexpected opcode");
16169 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16170 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16171 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16172 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16177 }
16178 // clang-format on
16179 }
16180
16181 // Negating the accumulator changes ADD<->SUB.
16182 if (NegAcc) {
16183 // clang-format off
16184 switch (Opcode) {
16185 default: llvm_unreachable("Unexpected opcode");
16186 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16187 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16188 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16189 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16194 }
16195 // clang-format on
16196 }
16197
16198 return Opcode;
16199}
16200
16202 // Fold FNEG_VL into FMA opcodes.
16203 // The first operand of strict-fp is chain.
16204 bool IsStrict =
16205 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16206 unsigned Offset = IsStrict ? 1 : 0;
16207 SDValue A = N->getOperand(0 + Offset);
16208 SDValue B = N->getOperand(1 + Offset);
16209 SDValue C = N->getOperand(2 + Offset);
16210 SDValue Mask = N->getOperand(3 + Offset);
16211 SDValue VL = N->getOperand(4 + Offset);
16212
16213 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16214 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16215 V.getOperand(2) == VL) {
16216 // Return the negated input.
16217 V = V.getOperand(0);
16218 return true;
16219 }
16220
16221 return false;
16222 };
16223
16224 bool NegA = invertIfNegative(A);
16225 bool NegB = invertIfNegative(B);
16226 bool NegC = invertIfNegative(C);
16227
16228 // If no operands are negated, we're done.
16229 if (!NegA && !NegB && !NegC)
16230 return SDValue();
16231
16232 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16233 if (IsStrict)
16234 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16235 {N->getOperand(0), A, B, C, Mask, VL});
16236 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16237 VL);
16238}
16239
16242 const RISCVSubtarget &Subtarget) {
16243 SelectionDAG &DAG = DCI.DAG;
16244
16246 return V;
16247
16248 // FIXME: Ignore strict opcodes for now.
16249 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16250 return SDValue();
16251
16252 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16253}
16254
16256 const RISCVSubtarget &Subtarget) {
16257 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16258
16259 EVT VT = N->getValueType(0);
16260
16261 if (VT != Subtarget.getXLenVT())
16262 return SDValue();
16263
16264 if (!isa<ConstantSDNode>(N->getOperand(1)))
16265 return SDValue();
16266 uint64_t ShAmt = N->getConstantOperandVal(1);
16267
16268 SDValue N0 = N->getOperand(0);
16269
16270 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16271 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16272 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16273 unsigned ExtSize =
16274 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16275 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16276 N0.getOperand(0).hasOneUse() &&
16277 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16278 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16279 if (LShAmt < ExtSize) {
16280 unsigned Size = VT.getSizeInBits();
16281 SDLoc ShlDL(N0.getOperand(0));
16282 SDValue Shl =
16283 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16284 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16285 SDLoc DL(N);
16286 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16287 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16288 }
16289 }
16290 }
16291
16292 if (ShAmt > 32 || VT != MVT::i64)
16293 return SDValue();
16294
16295 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16296 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16297 //
16298 // Also try these folds where an add or sub is in the middle.
16299 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16300 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16301 SDValue Shl;
16302 ConstantSDNode *AddC = nullptr;
16303
16304 // We might have an ADD or SUB between the SRA and SHL.
16305 bool IsAdd = N0.getOpcode() == ISD::ADD;
16306 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16307 // Other operand needs to be a constant we can modify.
16308 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16309 if (!AddC)
16310 return SDValue();
16311
16312 // AddC needs to have at least 32 trailing zeros.
16313 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16314 return SDValue();
16315
16316 // All users should be a shift by constant less than or equal to 32. This
16317 // ensures we'll do this optimization for each of them to produce an
16318 // add/sub+sext_inreg they can all share.
16319 for (SDNode *U : N0->users()) {
16320 if (U->getOpcode() != ISD::SRA ||
16321 !isa<ConstantSDNode>(U->getOperand(1)) ||
16322 U->getConstantOperandVal(1) > 32)
16323 return SDValue();
16324 }
16325
16326 Shl = N0.getOperand(IsAdd ? 0 : 1);
16327 } else {
16328 // Not an ADD or SUB.
16329 Shl = N0;
16330 }
16331
16332 // Look for a shift left by 32.
16333 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16334 Shl.getConstantOperandVal(1) != 32)
16335 return SDValue();
16336
16337 // We if we didn't look through an add/sub, then the shl should have one use.
16338 // If we did look through an add/sub, the sext_inreg we create is free so
16339 // we're only creating 2 new instructions. It's enough to only remove the
16340 // original sra+add/sub.
16341 if (!AddC && !Shl.hasOneUse())
16342 return SDValue();
16343
16344 SDLoc DL(N);
16345 SDValue In = Shl.getOperand(0);
16346
16347 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16348 // constant.
16349 if (AddC) {
16350 SDValue ShiftedAddC =
16351 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16352 if (IsAdd)
16353 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16354 else
16355 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16356 }
16357
16358 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16359 DAG.getValueType(MVT::i32));
16360 if (ShAmt == 32)
16361 return SExt;
16362
16363 return DAG.getNode(
16364 ISD::SHL, DL, MVT::i64, SExt,
16365 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16366}
16367
16368// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16369// the result is used as the conditon of a br_cc or select_cc we can invert,
16370// inverting the setcc is free, and Z is 0/1. Caller will invert the
16371// br_cc/select_cc.
16373 bool IsAnd = Cond.getOpcode() == ISD::AND;
16374 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16375 return SDValue();
16376
16377 if (!Cond.hasOneUse())
16378 return SDValue();
16379
16380 SDValue Setcc = Cond.getOperand(0);
16381 SDValue Xor = Cond.getOperand(1);
16382 // Canonicalize setcc to LHS.
16383 if (Setcc.getOpcode() != ISD::SETCC)
16384 std::swap(Setcc, Xor);
16385 // LHS should be a setcc and RHS should be an xor.
16386 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16387 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16388 return SDValue();
16389
16390 // If the condition is an And, SimplifyDemandedBits may have changed
16391 // (xor Z, 1) to (not Z).
16392 SDValue Xor1 = Xor.getOperand(1);
16393 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16394 return SDValue();
16395
16396 EVT VT = Cond.getValueType();
16397 SDValue Xor0 = Xor.getOperand(0);
16398
16399 // The LHS of the xor needs to be 0/1.
16401 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16402 return SDValue();
16403
16404 // We can only invert integer setccs.
16405 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16406 if (!SetCCOpVT.isScalarInteger())
16407 return SDValue();
16408
16409 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16410 if (ISD::isIntEqualitySetCC(CCVal)) {
16411 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16412 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16413 Setcc.getOperand(1), CCVal);
16414 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16415 // Invert (setlt 0, X) by converting to (setlt X, 1).
16416 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16417 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16418 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16419 // (setlt X, 1) by converting to (setlt 0, X).
16420 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16421 DAG.getConstant(0, SDLoc(Setcc), VT),
16422 Setcc.getOperand(0), CCVal);
16423 } else
16424 return SDValue();
16425
16426 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16427 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16428}
16429
16430// Perform common combines for BR_CC and SELECT_CC condtions.
16431static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16432 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16433 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16434
16435 // As far as arithmetic right shift always saves the sign,
16436 // shift can be omitted.
16437 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16438 // setge (sra X, N), 0 -> setge X, 0
16439 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16440 LHS.getOpcode() == ISD::SRA) {
16441 LHS = LHS.getOperand(0);
16442 return true;
16443 }
16444
16445 if (!ISD::isIntEqualitySetCC(CCVal))
16446 return false;
16447
16448 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16449 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16450 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16451 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16452 // If we're looking for eq 0 instead of ne 0, we need to invert the
16453 // condition.
16454 bool Invert = CCVal == ISD::SETEQ;
16455 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16456 if (Invert)
16457 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16458
16459 RHS = LHS.getOperand(1);
16460 LHS = LHS.getOperand(0);
16461 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16462
16463 CC = DAG.getCondCode(CCVal);
16464 return true;
16465 }
16466
16467 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16468 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16469 RHS = LHS.getOperand(1);
16470 LHS = LHS.getOperand(0);
16471 return true;
16472 }
16473
16474 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16475 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16476 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16477 SDValue LHS0 = LHS.getOperand(0);
16478 if (LHS0.getOpcode() == ISD::AND &&
16479 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16480 uint64_t Mask = LHS0.getConstantOperandVal(1);
16481 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16482 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16483 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16484 CC = DAG.getCondCode(CCVal);
16485
16486 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16487 LHS = LHS0.getOperand(0);
16488 if (ShAmt != 0)
16489 LHS =
16490 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16491 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16492 return true;
16493 }
16494 }
16495 }
16496
16497 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16498 // This can occur when legalizing some floating point comparisons.
16499 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16500 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16501 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16502 CC = DAG.getCondCode(CCVal);
16503 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16504 return true;
16505 }
16506
16507 if (isNullConstant(RHS)) {
16508 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16509 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16510 CC = DAG.getCondCode(CCVal);
16511 LHS = NewCond;
16512 return true;
16513 }
16514 }
16515
16516 return false;
16517}
16518
16519// Fold
16520// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16521// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16522// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16523// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16525 SDValue TrueVal, SDValue FalseVal,
16526 bool Swapped) {
16527 bool Commutative = true;
16528 unsigned Opc = TrueVal.getOpcode();
16529 switch (Opc) {
16530 default:
16531 return SDValue();
16532 case ISD::SHL:
16533 case ISD::SRA:
16534 case ISD::SRL:
16535 case ISD::SUB:
16536 Commutative = false;
16537 break;
16538 case ISD::ADD:
16539 case ISD::OR:
16540 case ISD::XOR:
16541 break;
16542 }
16543
16544 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16545 return SDValue();
16546
16547 unsigned OpToFold;
16548 if (FalseVal == TrueVal.getOperand(0))
16549 OpToFold = 0;
16550 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16551 OpToFold = 1;
16552 else
16553 return SDValue();
16554
16555 EVT VT = N->getValueType(0);
16556 SDLoc DL(N);
16557 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16558 EVT OtherOpVT = OtherOp.getValueType();
16559 SDValue IdentityOperand =
16560 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16561 if (!Commutative)
16562 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16563 assert(IdentityOperand && "No identity operand!");
16564
16565 if (Swapped)
16566 std::swap(OtherOp, IdentityOperand);
16567 SDValue NewSel =
16568 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16569 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16570}
16571
16572// This tries to get rid of `select` and `icmp` that are being used to handle
16573// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16575 SDValue Cond = N->getOperand(0);
16576
16577 // This represents either CTTZ or CTLZ instruction.
16578 SDValue CountZeroes;
16579
16580 SDValue ValOnZero;
16581
16582 if (Cond.getOpcode() != ISD::SETCC)
16583 return SDValue();
16584
16585 if (!isNullConstant(Cond->getOperand(1)))
16586 return SDValue();
16587
16588 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16589 if (CCVal == ISD::CondCode::SETEQ) {
16590 CountZeroes = N->getOperand(2);
16591 ValOnZero = N->getOperand(1);
16592 } else if (CCVal == ISD::CondCode::SETNE) {
16593 CountZeroes = N->getOperand(1);
16594 ValOnZero = N->getOperand(2);
16595 } else {
16596 return SDValue();
16597 }
16598
16599 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16600 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16601 CountZeroes = CountZeroes.getOperand(0);
16602
16603 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16604 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16605 CountZeroes.getOpcode() != ISD::CTLZ &&
16606 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16607 return SDValue();
16608
16609 if (!isNullConstant(ValOnZero))
16610 return SDValue();
16611
16612 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16613 if (Cond->getOperand(0) != CountZeroesArgument)
16614 return SDValue();
16615
16616 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16617 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16618 CountZeroes.getValueType(), CountZeroesArgument);
16619 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16620 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16621 CountZeroes.getValueType(), CountZeroesArgument);
16622 }
16623
16624 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16625 SDValue BitWidthMinusOne =
16626 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16627
16628 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16629 CountZeroes, BitWidthMinusOne);
16630 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16631}
16632
16634 const RISCVSubtarget &Subtarget) {
16635 SDValue Cond = N->getOperand(0);
16636 SDValue True = N->getOperand(1);
16637 SDValue False = N->getOperand(2);
16638 SDLoc DL(N);
16639 EVT VT = N->getValueType(0);
16640 EVT CondVT = Cond.getValueType();
16641
16642 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16643 return SDValue();
16644
16645 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16646 // BEXTI, where C is power of 2.
16647 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16648 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16649 SDValue LHS = Cond.getOperand(0);
16650 SDValue RHS = Cond.getOperand(1);
16651 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16652 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16653 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16654 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16655 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16656 return DAG.getSelect(DL, VT,
16657 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16658 False, True);
16659 }
16660 }
16661 return SDValue();
16662}
16663
16665 const RISCVSubtarget &Subtarget) {
16666 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16667 return Folded;
16668
16669 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16670 return V;
16671
16672 if (Subtarget.hasConditionalMoveFusion())
16673 return SDValue();
16674
16675 SDValue TrueVal = N->getOperand(1);
16676 SDValue FalseVal = N->getOperand(2);
16677 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16678 return V;
16679 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16680}
16681
16682/// If we have a build_vector where each lane is binop X, C, where C
16683/// is a constant (but not necessarily the same constant on all lanes),
16684/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16685/// We assume that materializing a constant build vector will be no more
16686/// expensive that performing O(n) binops.
16688 const RISCVSubtarget &Subtarget,
16689 const RISCVTargetLowering &TLI) {
16690 SDLoc DL(N);
16691 EVT VT = N->getValueType(0);
16692
16693 assert(!VT.isScalableVector() && "unexpected build vector");
16694
16695 if (VT.getVectorNumElements() == 1)
16696 return SDValue();
16697
16698 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16699 if (!TLI.isBinOp(Opcode))
16700 return SDValue();
16701
16702 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16703 return SDValue();
16704
16705 // This BUILD_VECTOR involves an implicit truncation, and sinking
16706 // truncates through binops is non-trivial.
16707 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16708 return SDValue();
16709
16710 SmallVector<SDValue> LHSOps;
16711 SmallVector<SDValue> RHSOps;
16712 for (SDValue Op : N->ops()) {
16713 if (Op.isUndef()) {
16714 // We can't form a divide or remainder from undef.
16715 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16716 return SDValue();
16717
16718 LHSOps.push_back(Op);
16719 RHSOps.push_back(Op);
16720 continue;
16721 }
16722
16723 // TODO: We can handle operations which have an neutral rhs value
16724 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16725 // of profit in a more explicit manner.
16726 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16727 return SDValue();
16728
16729 LHSOps.push_back(Op.getOperand(0));
16730 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16731 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16732 return SDValue();
16733 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16734 // have different LHS and RHS types.
16735 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16736 return SDValue();
16737
16738 RHSOps.push_back(Op.getOperand(1));
16739 }
16740
16741 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16742 DAG.getBuildVector(VT, DL, RHSOps));
16743}
16744
16746 const RISCVSubtarget &Subtarget,
16747 const RISCVTargetLowering &TLI) {
16748 SDValue InVec = N->getOperand(0);
16749 SDValue InVal = N->getOperand(1);
16750 SDValue EltNo = N->getOperand(2);
16751 SDLoc DL(N);
16752
16753 EVT VT = InVec.getValueType();
16754 if (VT.isScalableVector())
16755 return SDValue();
16756
16757 if (!InVec.hasOneUse())
16758 return SDValue();
16759
16760 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16761 // move the insert_vector_elts into the arms of the binop. Note that
16762 // the new RHS must be a constant.
16763 const unsigned InVecOpcode = InVec->getOpcode();
16764 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16765 InVal.hasOneUse()) {
16766 SDValue InVecLHS = InVec->getOperand(0);
16767 SDValue InVecRHS = InVec->getOperand(1);
16768 SDValue InValLHS = InVal->getOperand(0);
16769 SDValue InValRHS = InVal->getOperand(1);
16770
16772 return SDValue();
16773 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16774 return SDValue();
16775 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16776 // have different LHS and RHS types.
16777 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16778 return SDValue();
16780 InVecLHS, InValLHS, EltNo);
16782 InVecRHS, InValRHS, EltNo);
16783 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16784 }
16785
16786 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16787 // move the insert_vector_elt to the source operand of the concat_vector.
16788 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16789 return SDValue();
16790
16791 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16792 if (!IndexC)
16793 return SDValue();
16794 unsigned Elt = IndexC->getZExtValue();
16795
16796 EVT ConcatVT = InVec.getOperand(0).getValueType();
16797 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16798 return SDValue();
16799 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16800 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16801
16802 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16803 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16804 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16805 ConcatOp, InVal, NewIdx);
16806
16807 SmallVector<SDValue> ConcatOps;
16808 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16809 ConcatOps[ConcatOpIdx] = ConcatOp;
16810 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16811}
16812
16813// If we're concatenating a series of vector loads like
16814// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16815// Then we can turn this into a strided load by widening the vector elements
16816// vlse32 p, stride=n
16818 const RISCVSubtarget &Subtarget,
16819 const RISCVTargetLowering &TLI) {
16820 SDLoc DL(N);
16821 EVT VT = N->getValueType(0);
16822
16823 // Only perform this combine on legal MVTs.
16824 if (!TLI.isTypeLegal(VT))
16825 return SDValue();
16826
16827 // TODO: Potentially extend this to scalable vectors
16828 if (VT.isScalableVector())
16829 return SDValue();
16830
16831 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16832 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16833 !SDValue(BaseLd, 0).hasOneUse())
16834 return SDValue();
16835
16836 EVT BaseLdVT = BaseLd->getValueType(0);
16837
16838 // Go through the loads and check that they're strided
16840 Lds.push_back(BaseLd);
16841 Align Align = BaseLd->getAlign();
16842 for (SDValue Op : N->ops().drop_front()) {
16843 auto *Ld = dyn_cast<LoadSDNode>(Op);
16844 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16845 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16846 Ld->getValueType(0) != BaseLdVT)
16847 return SDValue();
16848
16849 Lds.push_back(Ld);
16850
16851 // The common alignment is the most restrictive (smallest) of all the loads
16852 Align = std::min(Align, Ld->getAlign());
16853 }
16854
16855 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16856 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16857 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16858 // If the load ptrs can be decomposed into a common (Base + Index) with a
16859 // common constant stride, then return the constant stride.
16860 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16861 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16862 if (BIO1.equalBaseIndex(BIO2, DAG))
16863 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16864
16865 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16866 SDValue P1 = Ld1->getBasePtr();
16867 SDValue P2 = Ld2->getBasePtr();
16868 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16869 return {{P2.getOperand(1), false}};
16870 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16871 return {{P1.getOperand(1), true}};
16872
16873 return std::nullopt;
16874 };
16875
16876 // Get the distance between the first and second loads
16877 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16878 if (!BaseDiff)
16879 return SDValue();
16880
16881 // Check all the loads are the same distance apart
16882 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16883 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16884 return SDValue();
16885
16886 // TODO: At this point, we've successfully matched a generalized gather
16887 // load. Maybe we should emit that, and then move the specialized
16888 // matchers above and below into a DAG combine?
16889
16890 // Get the widened scalar type, e.g. v4i8 -> i64
16891 unsigned WideScalarBitWidth =
16892 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16893 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16894
16895 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16896 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16897 if (!TLI.isTypeLegal(WideVecVT))
16898 return SDValue();
16899
16900 // Check that the operation is legal
16901 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16902 return SDValue();
16903
16904 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16905 SDValue Stride =
16906 std::holds_alternative<SDValue>(StrideVariant)
16907 ? std::get<SDValue>(StrideVariant)
16908 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
16909 Lds[0]->getOffset().getValueType());
16910 if (MustNegateStride)
16911 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16912
16913 SDValue AllOneMask =
16914 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16915 DAG.getConstant(1, DL, MVT::i1));
16916
16917 uint64_t MemSize;
16918 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16919 ConstStride && ConstStride->getSExtValue() >= 0)
16920 // total size = (elsize * n) + (stride - elsize) * (n-1)
16921 // = elsize + stride * (n-1)
16922 MemSize = WideScalarVT.getSizeInBits() +
16923 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16924 else
16925 // If Stride isn't constant, then we can't know how much it will load
16927
16929 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16930 Align);
16931
16932 SDValue StridedLoad = DAG.getStridedLoadVP(
16933 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
16934 AllOneMask,
16935 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
16936
16937 for (SDValue Ld : N->ops())
16938 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16939
16940 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16941}
16942
16944 const RISCVSubtarget &Subtarget) {
16945
16946 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16947
16948 if (N->getValueType(0).isFixedLengthVector())
16949 return SDValue();
16950
16951 SDValue Addend = N->getOperand(0);
16952 SDValue MulOp = N->getOperand(1);
16953
16954 if (N->getOpcode() == RISCVISD::ADD_VL) {
16955 SDValue AddPassthruOp = N->getOperand(2);
16956 if (!AddPassthruOp.isUndef())
16957 return SDValue();
16958 }
16959
16960 auto IsVWMulOpc = [](unsigned Opc) {
16961 switch (Opc) {
16962 case RISCVISD::VWMUL_VL:
16965 return true;
16966 default:
16967 return false;
16968 }
16969 };
16970
16971 if (!IsVWMulOpc(MulOp.getOpcode()))
16972 std::swap(Addend, MulOp);
16973
16974 if (!IsVWMulOpc(MulOp.getOpcode()))
16975 return SDValue();
16976
16977 SDValue MulPassthruOp = MulOp.getOperand(2);
16978
16979 if (!MulPassthruOp.isUndef())
16980 return SDValue();
16981
16982 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
16983 const RISCVSubtarget &Subtarget) {
16984 if (N->getOpcode() == ISD::ADD) {
16985 SDLoc DL(N);
16986 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
16987 Subtarget);
16988 }
16989 return std::make_pair(N->getOperand(3), N->getOperand(4));
16990 }(N, DAG, Subtarget);
16991
16992 SDValue MulMask = MulOp.getOperand(3);
16993 SDValue MulVL = MulOp.getOperand(4);
16994
16995 if (AddMask != MulMask || AddVL != MulVL)
16996 return SDValue();
16997
16998 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
16999 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17000 "Unexpected opcode after VWMACC_VL");
17001 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17002 "Unexpected opcode after VWMACC_VL!");
17003 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17004 "Unexpected opcode after VWMUL_VL!");
17005 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17006 "Unexpected opcode after VWMUL_VL!");
17007
17008 SDLoc DL(N);
17009 EVT VT = N->getValueType(0);
17010 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17011 AddVL};
17012 return DAG.getNode(Opc, DL, VT, Ops);
17013}
17014
17016 ISD::MemIndexType &IndexType,
17018 if (!DCI.isBeforeLegalize())
17019 return false;
17020
17021 SelectionDAG &DAG = DCI.DAG;
17022 const MVT XLenVT =
17023 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17024
17025 const EVT IndexVT = Index.getValueType();
17026
17027 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17028 // mode, so anything else must be manually legalized.
17029 if (!isIndexTypeSigned(IndexType))
17030 return false;
17031
17032 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17033 // Any index legalization should first promote to XLenVT, so we don't lose
17034 // bits when scaling. This may create an illegal index type so we let
17035 // LLVM's legalization take care of the splitting.
17036 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17037 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17038 IndexVT.changeVectorElementType(XLenVT), Index);
17039 }
17040 IndexType = ISD::UNSIGNED_SCALED;
17041 return true;
17042}
17043
17044/// Match the index vector of a scatter or gather node as the shuffle mask
17045/// which performs the rearrangement if possible. Will only match if
17046/// all lanes are touched, and thus replacing the scatter or gather with
17047/// a unit strided access and shuffle is legal.
17048static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17049 SmallVector<int> &ShuffleMask) {
17050 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17051 return false;
17052 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17053 return false;
17054
17055 const unsigned ElementSize = VT.getScalarStoreSize();
17056 const unsigned NumElems = VT.getVectorNumElements();
17057
17058 // Create the shuffle mask and check all bits active
17059 assert(ShuffleMask.empty());
17060 BitVector ActiveLanes(NumElems);
17061 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17062 // TODO: We've found an active bit of UB, and could be
17063 // more aggressive here if desired.
17064 if (Index->getOperand(i)->isUndef())
17065 return false;
17066 uint64_t C = Index->getConstantOperandVal(i);
17067 if (C % ElementSize != 0)
17068 return false;
17069 C = C / ElementSize;
17070 if (C >= NumElems)
17071 return false;
17072 ShuffleMask.push_back(C);
17073 ActiveLanes.set(C);
17074 }
17075 return ActiveLanes.all();
17076}
17077
17078/// Match the index of a gather or scatter operation as an operation
17079/// with twice the element width and half the number of elements. This is
17080/// generally profitable (if legal) because these operations are linear
17081/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17082/// come out ahead.
17083static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17084 Align BaseAlign, const RISCVSubtarget &ST) {
17085 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17086 return false;
17087 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17088 return false;
17089
17090 // Attempt a doubling. If we can use a element type 4x or 8x in
17091 // size, this will happen via multiply iterations of the transform.
17092 const unsigned NumElems = VT.getVectorNumElements();
17093 if (NumElems % 2 != 0)
17094 return false;
17095
17096 const unsigned ElementSize = VT.getScalarStoreSize();
17097 const unsigned WiderElementSize = ElementSize * 2;
17098 if (WiderElementSize > ST.getELen()/8)
17099 return false;
17100
17101 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17102 return false;
17103
17104 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17105 // TODO: We've found an active bit of UB, and could be
17106 // more aggressive here if desired.
17107 if (Index->getOperand(i)->isUndef())
17108 return false;
17109 // TODO: This offset check is too strict if we support fully
17110 // misaligned memory operations.
17111 uint64_t C = Index->getConstantOperandVal(i);
17112 if (i % 2 == 0) {
17113 if (C % WiderElementSize != 0)
17114 return false;
17115 continue;
17116 }
17117 uint64_t Last = Index->getConstantOperandVal(i-1);
17118 if (C != Last + ElementSize)
17119 return false;
17120 }
17121 return true;
17122}
17123
17124// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17125// This would be benefit for the cases where X and Y are both the same value
17126// type of low precision vectors. Since the truncate would be lowered into
17127// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17128// restriction, such pattern would be expanded into a series of "vsetvli"
17129// and "vnsrl" instructions later to reach this point.
17131 SDValue Mask = N->getOperand(1);
17132 SDValue VL = N->getOperand(2);
17133
17134 bool IsVLMAX = isAllOnesConstant(VL) ||
17135 (isa<RegisterSDNode>(VL) &&
17136 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17137 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17138 Mask.getOperand(0) != VL)
17139 return SDValue();
17140
17141 auto IsTruncNode = [&](SDValue V) {
17142 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17143 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17144 };
17145
17146 SDValue Op = N->getOperand(0);
17147
17148 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17149 // to distinguish such pattern.
17150 while (IsTruncNode(Op)) {
17151 if (!Op.hasOneUse())
17152 return SDValue();
17153 Op = Op.getOperand(0);
17154 }
17155
17156 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17157 return SDValue();
17158
17159 SDValue N0 = Op.getOperand(0);
17160 SDValue N1 = Op.getOperand(1);
17161 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17162 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17163 return SDValue();
17164
17165 SDValue N00 = N0.getOperand(0);
17166 SDValue N10 = N1.getOperand(0);
17167 if (!N00.getValueType().isVector() ||
17168 N00.getValueType() != N10.getValueType() ||
17169 N->getValueType(0) != N10.getValueType())
17170 return SDValue();
17171
17172 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17173 SDValue SMin =
17174 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17175 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17176 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17177}
17178
17179// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17180// maximum value for the truncated type.
17181// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17182// is the signed maximum value for the truncated type and C2 is the signed
17183// minimum value.
17185 const RISCVSubtarget &Subtarget) {
17186 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17187
17188 MVT VT = N->getSimpleValueType(0);
17189
17190 SDValue Mask = N->getOperand(1);
17191 SDValue VL = N->getOperand(2);
17192
17193 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17194 APInt &SplatVal) {
17195 if (V.getOpcode() != Opc &&
17196 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17197 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17198 return SDValue();
17199
17200 SDValue Op = V.getOperand(1);
17201
17202 // Peek through conversion between fixed and scalable vectors.
17203 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17204 isNullConstant(Op.getOperand(2)) &&
17205 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17206 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17207 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17208 isNullConstant(Op.getOperand(1).getOperand(1)))
17209 Op = Op.getOperand(1).getOperand(0);
17210
17211 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17212 return V.getOperand(0);
17213
17214 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17215 Op.getOperand(2) == VL) {
17216 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17217 SplatVal =
17218 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17219 return V.getOperand(0);
17220 }
17221 }
17222
17223 return SDValue();
17224 };
17225
17226 SDLoc DL(N);
17227
17228 auto DetectUSatPattern = [&](SDValue V) {
17229 APInt LoC, HiC;
17230
17231 // Simple case, V is a UMIN.
17232 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17233 if (HiC.isMask(VT.getScalarSizeInBits()))
17234 return UMinOp;
17235
17236 // If we have an SMAX that removes negative numbers first, then we can match
17237 // SMIN instead of UMIN.
17238 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17239 if (SDValue SMaxOp =
17240 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17241 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17242 return SMinOp;
17243
17244 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17245 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17246 // first.
17247 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17248 if (SDValue SMinOp =
17249 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17250 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17251 HiC.uge(LoC))
17252 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17253 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17254 Mask, VL);
17255
17256 return SDValue();
17257 };
17258
17259 auto DetectSSatPattern = [&](SDValue V) {
17260 unsigned NumDstBits = VT.getScalarSizeInBits();
17261 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17262 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17263 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17264
17265 APInt HiC, LoC;
17266 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17267 if (SDValue SMaxOp =
17268 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17269 if (HiC == SignedMax && LoC == SignedMin)
17270 return SMaxOp;
17271
17272 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17273 if (SDValue SMinOp =
17274 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17275 if (HiC == SignedMax && LoC == SignedMin)
17276 return SMinOp;
17277
17278 return SDValue();
17279 };
17280
17281 SDValue Src = N->getOperand(0);
17282
17283 // Look through multiple layers of truncates.
17284 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17285 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17286 Src.hasOneUse())
17287 Src = Src.getOperand(0);
17288
17289 SDValue Val;
17290 unsigned ClipOpc;
17291 if ((Val = DetectUSatPattern(Src)))
17293 else if ((Val = DetectSSatPattern(Src)))
17295 else
17296 return SDValue();
17297
17298 MVT ValVT = Val.getSimpleValueType();
17299
17300 do {
17301 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17302 ValVT = ValVT.changeVectorElementType(ValEltVT);
17303 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17304 } while (ValVT != VT);
17305
17306 return Val;
17307}
17308
17309// Convert
17310// (iX ctpop (bitcast (vXi1 A)))
17311// ->
17312// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17313// FIXME: It's complicated to match all the variations of this after type
17314// legalization so we only handle the pre-type legalization pattern, but that
17315// requires the fixed vector type to be legal.
17317 const RISCVSubtarget &Subtarget) {
17318 EVT VT = N->getValueType(0);
17319 if (!VT.isScalarInteger())
17320 return SDValue();
17321
17322 SDValue Src = N->getOperand(0);
17323
17324 // Peek through zero_extend. It doesn't change the count.
17325 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17326 Src = Src.getOperand(0);
17327
17328 if (Src.getOpcode() != ISD::BITCAST)
17329 return SDValue();
17330
17331 Src = Src.getOperand(0);
17332 EVT SrcEVT = Src.getValueType();
17333 if (!SrcEVT.isSimple())
17334 return SDValue();
17335
17336 MVT SrcMVT = SrcEVT.getSimpleVT();
17337 // Make sure the input is an i1 vector.
17338 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17339 return SDValue();
17340
17341 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17342 return SDValue();
17343
17344 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17345 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17346
17347 SDLoc DL(N);
17348 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17349
17350 MVT XLenVT = Subtarget.getXLenVT();
17351 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17352 return DAG.getZExtOrTrunc(Pop, DL, VT);
17353}
17354
17356 DAGCombinerInfo &DCI) const {
17357 SelectionDAG &DAG = DCI.DAG;
17358 const MVT XLenVT = Subtarget.getXLenVT();
17359 SDLoc DL(N);
17360
17361 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17362 // bits are demanded. N will be added to the Worklist if it was not deleted.
17363 // Caller should return SDValue(N, 0) if this returns true.
17364 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17365 SDValue Op = N->getOperand(OpNo);
17366 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17367 if (!SimplifyDemandedBits(Op, Mask, DCI))
17368 return false;
17369
17370 if (N->getOpcode() != ISD::DELETED_NODE)
17371 DCI.AddToWorklist(N);
17372 return true;
17373 };
17374
17375 switch (N->getOpcode()) {
17376 default:
17377 break;
17378 case RISCVISD::SplitF64: {
17379 SDValue Op0 = N->getOperand(0);
17380 // If the input to SplitF64 is just BuildPairF64 then the operation is
17381 // redundant. Instead, use BuildPairF64's operands directly.
17382 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17383 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17384
17385 if (Op0->isUndef()) {
17386 SDValue Lo = DAG.getUNDEF(MVT::i32);
17387 SDValue Hi = DAG.getUNDEF(MVT::i32);
17388 return DCI.CombineTo(N, Lo, Hi);
17389 }
17390
17391 // It's cheaper to materialise two 32-bit integers than to load a double
17392 // from the constant pool and transfer it to integer registers through the
17393 // stack.
17394 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17395 APInt V = C->getValueAPF().bitcastToAPInt();
17396 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17397 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17398 return DCI.CombineTo(N, Lo, Hi);
17399 }
17400
17401 // This is a target-specific version of a DAGCombine performed in
17402 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17403 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17404 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17405 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17406 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17407 break;
17408 SDValue NewSplitF64 =
17409 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17410 Op0.getOperand(0));
17411 SDValue Lo = NewSplitF64.getValue(0);
17412 SDValue Hi = NewSplitF64.getValue(1);
17413 APInt SignBit = APInt::getSignMask(32);
17414 if (Op0.getOpcode() == ISD::FNEG) {
17415 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17416 DAG.getConstant(SignBit, DL, MVT::i32));
17417 return DCI.CombineTo(N, Lo, NewHi);
17418 }
17419 assert(Op0.getOpcode() == ISD::FABS);
17420 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17421 DAG.getConstant(~SignBit, DL, MVT::i32));
17422 return DCI.CombineTo(N, Lo, NewHi);
17423 }
17424 case RISCVISD::SLLW:
17425 case RISCVISD::SRAW:
17426 case RISCVISD::SRLW:
17427 case RISCVISD::RORW:
17428 case RISCVISD::ROLW: {
17429 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17430 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17431 SimplifyDemandedLowBitsHelper(1, 5))
17432 return SDValue(N, 0);
17433
17434 break;
17435 }
17436 case RISCVISD::CLZW:
17437 case RISCVISD::CTZW: {
17438 // Only the lower 32 bits of the first operand are read
17439 if (SimplifyDemandedLowBitsHelper(0, 32))
17440 return SDValue(N, 0);
17441 break;
17442 }
17444 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17445 // conversion is unnecessary and can be replaced with the
17446 // FMV_X_ANYEXTW_RV64 operand.
17447 SDValue Op0 = N->getOperand(0);
17449 return Op0.getOperand(0);
17450 break;
17451 }
17454 SDLoc DL(N);
17455 SDValue Op0 = N->getOperand(0);
17456 MVT VT = N->getSimpleValueType(0);
17457
17458 // Constant fold.
17459 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17460 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17461 return DAG.getConstant(Val, DL, VT);
17462 }
17463
17464 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17465 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17466 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17467 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17468 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17469 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17470 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17471 assert(Op0.getOperand(0).getValueType() == VT &&
17472 "Unexpected value type!");
17473 return Op0.getOperand(0);
17474 }
17475
17476 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17477 cast<LoadSDNode>(Op0)->isSimple()) {
17479 auto *LN0 = cast<LoadSDNode>(Op0);
17480 SDValue Load =
17481 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17482 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17483 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17484 return Load;
17485 }
17486
17487 // This is a target-specific version of a DAGCombine performed in
17488 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17489 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17490 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17491 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17492 !Op0.getNode()->hasOneUse())
17493 break;
17494 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17495 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17496 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17497 if (Op0.getOpcode() == ISD::FNEG)
17498 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17499 DAG.getConstant(SignBit, DL, VT));
17500
17501 assert(Op0.getOpcode() == ISD::FABS);
17502 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17503 DAG.getConstant(~SignBit, DL, VT));
17504 }
17505 case ISD::ABS: {
17506 EVT VT = N->getValueType(0);
17507 SDValue N0 = N->getOperand(0);
17508 // abs (sext) -> zext (abs)
17509 // abs (zext) -> zext (handled elsewhere)
17510 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17511 SDValue Src = N0.getOperand(0);
17512 SDLoc DL(N);
17513 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17514 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17515 }
17516 break;
17517 }
17518 case ISD::ADD: {
17519 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17520 return V;
17521 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17522 return V;
17523 return performADDCombine(N, DCI, Subtarget);
17524 }
17525 case ISD::SUB: {
17526 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17527 return V;
17528 return performSUBCombine(N, DAG, Subtarget);
17529 }
17530 case ISD::AND:
17531 return performANDCombine(N, DCI, Subtarget);
17532 case ISD::OR: {
17533 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17534 return V;
17535 return performORCombine(N, DCI, Subtarget);
17536 }
17537 case ISD::XOR:
17538 return performXORCombine(N, DAG, Subtarget);
17539 case ISD::MUL:
17540 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17541 return V;
17542 return performMULCombine(N, DAG, DCI, Subtarget);
17543 case ISD::SDIV:
17544 case ISD::UDIV:
17545 case ISD::SREM:
17546 case ISD::UREM:
17547 if (SDValue V = combineBinOpOfZExt(N, DAG))
17548 return V;
17549 break;
17550 case ISD::FMUL: {
17551 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17552 SDValue N0 = N->getOperand(0);
17553 SDValue N1 = N->getOperand(1);
17554 if (N0->getOpcode() != ISD::FCOPYSIGN)
17555 std::swap(N0, N1);
17556 if (N0->getOpcode() != ISD::FCOPYSIGN)
17557 return SDValue();
17558 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17559 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17560 return SDValue();
17561 EVT VT = N->getValueType(0);
17562 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17563 return SDValue();
17564 SDValue Sign = N0->getOperand(1);
17565 if (Sign.getValueType() != VT)
17566 return SDValue();
17567 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17568 }
17569 case ISD::FADD:
17570 case ISD::UMAX:
17571 case ISD::UMIN:
17572 case ISD::SMAX:
17573 case ISD::SMIN:
17574 case ISD::FMAXNUM:
17575 case ISD::FMINNUM: {
17576 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17577 return V;
17578 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17579 return V;
17580 return SDValue();
17581 }
17582 case ISD::SETCC:
17583 return performSETCCCombine(N, DAG, Subtarget);
17585 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17586 case ISD::ZERO_EXTEND:
17587 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17588 // type legalization. This is safe because fp_to_uint produces poison if
17589 // it overflows.
17590 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17591 SDValue Src = N->getOperand(0);
17592 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17593 isTypeLegal(Src.getOperand(0).getValueType()))
17594 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17595 Src.getOperand(0));
17596 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17597 isTypeLegal(Src.getOperand(1).getValueType())) {
17598 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17599 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17600 Src.getOperand(0), Src.getOperand(1));
17601 DCI.CombineTo(N, Res);
17602 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17603 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17604 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17605 }
17606 }
17607 return SDValue();
17609 if (SDValue V = combineTruncOfSraSext(N, DAG))
17610 return V;
17611 return combineTruncToVnclip(N, DAG, Subtarget);
17612 case ISD::TRUNCATE:
17613 return performTRUNCATECombine(N, DAG, Subtarget);
17614 case ISD::SELECT:
17615 return performSELECTCombine(N, DAG, Subtarget);
17617 case RISCVISD::CZERO_NEZ: {
17618 SDValue Val = N->getOperand(0);
17619 SDValue Cond = N->getOperand(1);
17620
17621 unsigned Opc = N->getOpcode();
17622
17623 // czero_eqz x, x -> x
17624 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17625 return Val;
17626
17627 unsigned InvOpc =
17629
17630 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17631 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17632 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17633 SDValue NewCond = Cond.getOperand(0);
17634 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17635 if (DAG.MaskedValueIsZero(NewCond, Mask))
17636 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17637 }
17638 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17639 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17640 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17641 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17642 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17643 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17644 if (ISD::isIntEqualitySetCC(CCVal))
17645 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17646 N->getValueType(0), Val, Cond.getOperand(0));
17647 }
17648 return SDValue();
17649 }
17650 case RISCVISD::SELECT_CC: {
17651 // Transform
17652 SDValue LHS = N->getOperand(0);
17653 SDValue RHS = N->getOperand(1);
17654 SDValue CC = N->getOperand(2);
17655 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17656 SDValue TrueV = N->getOperand(3);
17657 SDValue FalseV = N->getOperand(4);
17658 SDLoc DL(N);
17659 EVT VT = N->getValueType(0);
17660
17661 // If the True and False values are the same, we don't need a select_cc.
17662 if (TrueV == FalseV)
17663 return TrueV;
17664
17665 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17666 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17667 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17668 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17669 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17670 if (CCVal == ISD::CondCode::SETGE)
17671 std::swap(TrueV, FalseV);
17672
17673 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17674 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17675 // Only handle simm12, if it is not in this range, it can be considered as
17676 // register.
17677 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17678 isInt<12>(TrueSImm - FalseSImm)) {
17679 SDValue SRA =
17680 DAG.getNode(ISD::SRA, DL, VT, LHS,
17681 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17682 SDValue AND =
17683 DAG.getNode(ISD::AND, DL, VT, SRA,
17684 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17685 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17686 }
17687
17688 if (CCVal == ISD::CondCode::SETGE)
17689 std::swap(TrueV, FalseV);
17690 }
17691
17692 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17693 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17694 {LHS, RHS, CC, TrueV, FalseV});
17695
17696 if (!Subtarget.hasConditionalMoveFusion()) {
17697 // (select c, -1, y) -> -c | y
17698 if (isAllOnesConstant(TrueV)) {
17699 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17700 SDValue Neg = DAG.getNegative(C, DL, VT);
17701 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17702 }
17703 // (select c, y, -1) -> -!c | y
17704 if (isAllOnesConstant(FalseV)) {
17705 SDValue C =
17706 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17707 SDValue Neg = DAG.getNegative(C, DL, VT);
17708 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17709 }
17710
17711 // (select c, 0, y) -> -!c & y
17712 if (isNullConstant(TrueV)) {
17713 SDValue C =
17714 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17715 SDValue Neg = DAG.getNegative(C, DL, VT);
17716 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17717 }
17718 // (select c, y, 0) -> -c & y
17719 if (isNullConstant(FalseV)) {
17720 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17721 SDValue Neg = DAG.getNegative(C, DL, VT);
17722 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17723 }
17724 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17725 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17726 if (((isOneConstant(FalseV) && LHS == TrueV &&
17727 CCVal == ISD::CondCode::SETNE) ||
17728 (isOneConstant(TrueV) && LHS == FalseV &&
17729 CCVal == ISD::CondCode::SETEQ)) &&
17731 // freeze it to be safe.
17732 LHS = DAG.getFreeze(LHS);
17734 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17735 }
17736 }
17737
17738 // If both true/false are an xor with 1, pull through the select.
17739 // This can occur after op legalization if both operands are setccs that
17740 // require an xor to invert.
17741 // FIXME: Generalize to other binary ops with identical operand?
17742 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17743 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17744 isOneConstant(TrueV.getOperand(1)) &&
17745 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17746 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17747 TrueV.getOperand(0), FalseV.getOperand(0));
17748 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17749 }
17750
17751 return SDValue();
17752 }
17753 case RISCVISD::BR_CC: {
17754 SDValue LHS = N->getOperand(1);
17755 SDValue RHS = N->getOperand(2);
17756 SDValue CC = N->getOperand(3);
17757 SDLoc DL(N);
17758
17759 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17760 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17761 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17762
17763 return SDValue();
17764 }
17765 case ISD::BITREVERSE:
17766 return performBITREVERSECombine(N, DAG, Subtarget);
17767 case ISD::FP_TO_SINT:
17768 case ISD::FP_TO_UINT:
17769 return performFP_TO_INTCombine(N, DCI, Subtarget);
17772 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17773 case ISD::FCOPYSIGN: {
17774 EVT VT = N->getValueType(0);
17775 if (!VT.isVector())
17776 break;
17777 // There is a form of VFSGNJ which injects the negated sign of its second
17778 // operand. Try and bubble any FNEG up after the extend/round to produce
17779 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17780 // TRUNC=1.
17781 SDValue In2 = N->getOperand(1);
17782 // Avoid cases where the extend/round has multiple uses, as duplicating
17783 // those is typically more expensive than removing a fneg.
17784 if (!In2.hasOneUse())
17785 break;
17786 if (In2.getOpcode() != ISD::FP_EXTEND &&
17787 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17788 break;
17789 In2 = In2.getOperand(0);
17790 if (In2.getOpcode() != ISD::FNEG)
17791 break;
17792 SDLoc DL(N);
17793 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17794 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17795 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17796 }
17797 case ISD::MGATHER: {
17798 const auto *MGN = cast<MaskedGatherSDNode>(N);
17799 const EVT VT = N->getValueType(0);
17800 SDValue Index = MGN->getIndex();
17801 SDValue ScaleOp = MGN->getScale();
17802 ISD::MemIndexType IndexType = MGN->getIndexType();
17803 assert(!MGN->isIndexScaled() &&
17804 "Scaled gather/scatter should not be formed");
17805
17806 SDLoc DL(N);
17807 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17808 return DAG.getMaskedGather(
17809 N->getVTList(), MGN->getMemoryVT(), DL,
17810 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17811 MGN->getBasePtr(), Index, ScaleOp},
17812 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17813
17814 if (narrowIndex(Index, IndexType, DAG))
17815 return DAG.getMaskedGather(
17816 N->getVTList(), MGN->getMemoryVT(), DL,
17817 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17818 MGN->getBasePtr(), Index, ScaleOp},
17819 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17820
17821 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17822 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17823 // The sequence will be XLenVT, not the type of Index. Tell
17824 // isSimpleVIDSequence this so we avoid overflow.
17825 if (std::optional<VIDSequence> SimpleVID =
17826 isSimpleVIDSequence(Index, Subtarget.getXLen());
17827 SimpleVID && SimpleVID->StepDenominator == 1) {
17828 const int64_t StepNumerator = SimpleVID->StepNumerator;
17829 const int64_t Addend = SimpleVID->Addend;
17830
17831 // Note: We don't need to check alignment here since (by assumption
17832 // from the existance of the gather), our offsets must be sufficiently
17833 // aligned.
17834
17835 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17836 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17837 assert(IndexType == ISD::UNSIGNED_SCALED);
17838 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17839 DAG.getSignedConstant(Addend, DL, PtrVT));
17840
17841 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17843 SDValue StridedLoad = DAG.getStridedLoadVP(
17844 VT, DL, MGN->getChain(), BasePtr,
17845 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
17846 EVL, MGN->getMemOperand());
17847 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17848 StridedLoad, MGN->getPassThru(), EVL);
17849 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17850 DL);
17851 }
17852 }
17853
17854 SmallVector<int> ShuffleMask;
17855 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17856 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17857 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17858 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17859 MGN->getMask(), DAG.getUNDEF(VT),
17860 MGN->getMemoryVT(), MGN->getMemOperand(),
17862 SDValue Shuffle =
17863 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17864 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17865 }
17866
17867 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17868 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17869 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17870 SmallVector<SDValue> NewIndices;
17871 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17872 NewIndices.push_back(Index.getOperand(i));
17873 EVT IndexVT = Index.getValueType()
17874 .getHalfNumVectorElementsVT(*DAG.getContext());
17875 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17876
17877 unsigned ElementSize = VT.getScalarStoreSize();
17878 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17879 auto EltCnt = VT.getVectorElementCount();
17880 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17881 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17882 EltCnt.divideCoefficientBy(2));
17883 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17884 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17885 EltCnt.divideCoefficientBy(2));
17886 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17887
17888 SDValue Gather =
17889 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17890 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17891 Index, ScaleOp},
17892 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17893 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17894 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17895 }
17896 break;
17897 }
17898 case ISD::MSCATTER:{
17899 const auto *MSN = cast<MaskedScatterSDNode>(N);
17900 SDValue Index = MSN->getIndex();
17901 SDValue ScaleOp = MSN->getScale();
17902 ISD::MemIndexType IndexType = MSN->getIndexType();
17903 assert(!MSN->isIndexScaled() &&
17904 "Scaled gather/scatter should not be formed");
17905
17906 SDLoc DL(N);
17907 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17908 return DAG.getMaskedScatter(
17909 N->getVTList(), MSN->getMemoryVT(), DL,
17910 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17911 Index, ScaleOp},
17912 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17913
17914 if (narrowIndex(Index, IndexType, DAG))
17915 return DAG.getMaskedScatter(
17916 N->getVTList(), MSN->getMemoryVT(), DL,
17917 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17918 Index, ScaleOp},
17919 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17920
17921 EVT VT = MSN->getValue()->getValueType(0);
17922 SmallVector<int> ShuffleMask;
17923 if (!MSN->isTruncatingStore() &&
17924 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17925 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17926 DAG.getUNDEF(VT), ShuffleMask);
17927 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17928 DAG.getUNDEF(XLenVT), MSN->getMask(),
17929 MSN->getMemoryVT(), MSN->getMemOperand(),
17930 ISD::UNINDEXED, false);
17931 }
17932 break;
17933 }
17934 case ISD::VP_GATHER: {
17935 const auto *VPGN = cast<VPGatherSDNode>(N);
17936 SDValue Index = VPGN->getIndex();
17937 SDValue ScaleOp = VPGN->getScale();
17938 ISD::MemIndexType IndexType = VPGN->getIndexType();
17939 assert(!VPGN->isIndexScaled() &&
17940 "Scaled gather/scatter should not be formed");
17941
17942 SDLoc DL(N);
17943 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17944 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17945 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17946 ScaleOp, VPGN->getMask(),
17947 VPGN->getVectorLength()},
17948 VPGN->getMemOperand(), IndexType);
17949
17950 if (narrowIndex(Index, IndexType, DAG))
17951 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17952 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17953 ScaleOp, VPGN->getMask(),
17954 VPGN->getVectorLength()},
17955 VPGN->getMemOperand(), IndexType);
17956
17957 break;
17958 }
17959 case ISD::VP_SCATTER: {
17960 const auto *VPSN = cast<VPScatterSDNode>(N);
17961 SDValue Index = VPSN->getIndex();
17962 SDValue ScaleOp = VPSN->getScale();
17963 ISD::MemIndexType IndexType = VPSN->getIndexType();
17964 assert(!VPSN->isIndexScaled() &&
17965 "Scaled gather/scatter should not be formed");
17966
17967 SDLoc DL(N);
17968 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17969 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17970 {VPSN->getChain(), VPSN->getValue(),
17971 VPSN->getBasePtr(), Index, ScaleOp,
17972 VPSN->getMask(), VPSN->getVectorLength()},
17973 VPSN->getMemOperand(), IndexType);
17974
17975 if (narrowIndex(Index, IndexType, DAG))
17976 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17977 {VPSN->getChain(), VPSN->getValue(),
17978 VPSN->getBasePtr(), Index, ScaleOp,
17979 VPSN->getMask(), VPSN->getVectorLength()},
17980 VPSN->getMemOperand(), IndexType);
17981 break;
17982 }
17983 case RISCVISD::SHL_VL:
17984 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17985 return V;
17986 [[fallthrough]];
17987 case RISCVISD::SRA_VL:
17988 case RISCVISD::SRL_VL: {
17989 SDValue ShAmt = N->getOperand(1);
17991 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17992 SDLoc DL(N);
17993 SDValue VL = N->getOperand(4);
17994 EVT VT = N->getValueType(0);
17995 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17996 ShAmt.getOperand(1), VL);
17997 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
17998 N->getOperand(2), N->getOperand(3), N->getOperand(4));
17999 }
18000 break;
18001 }
18002 case ISD::SRA:
18003 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18004 return V;
18005 [[fallthrough]];
18006 case ISD::SRL:
18007 case ISD::SHL: {
18008 if (N->getOpcode() == ISD::SHL) {
18009 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18010 return V;
18011 }
18012 SDValue ShAmt = N->getOperand(1);
18014 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18015 SDLoc DL(N);
18016 EVT VT = N->getValueType(0);
18017 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18018 ShAmt.getOperand(1),
18019 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18020 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18021 }
18022 break;
18023 }
18024 case RISCVISD::ADD_VL:
18025 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18026 return V;
18027 return combineToVWMACC(N, DAG, Subtarget);
18032 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18033 case RISCVISD::SUB_VL:
18034 case RISCVISD::MUL_VL:
18035 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18044 return performVFMADD_VLCombine(N, DCI, Subtarget);
18045 case RISCVISD::FADD_VL:
18046 case RISCVISD::FSUB_VL:
18047 case RISCVISD::FMUL_VL:
18050 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18051 case ISD::LOAD:
18052 case ISD::STORE: {
18053 if (DCI.isAfterLegalizeDAG())
18054 if (SDValue V = performMemPairCombine(N, DCI))
18055 return V;
18056
18057 if (N->getOpcode() != ISD::STORE)
18058 break;
18059
18060 auto *Store = cast<StoreSDNode>(N);
18061 SDValue Chain = Store->getChain();
18062 EVT MemVT = Store->getMemoryVT();
18063 SDValue Val = Store->getValue();
18064 SDLoc DL(N);
18065
18066 bool IsScalarizable =
18067 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18068 Store->isSimple() &&
18069 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18070 isPowerOf2_64(MemVT.getSizeInBits()) &&
18071 MemVT.getSizeInBits() <= Subtarget.getXLen();
18072
18073 // If sufficiently aligned we can scalarize stores of constant vectors of
18074 // any power-of-two size up to XLen bits, provided that they aren't too
18075 // expensive to materialize.
18076 // vsetivli zero, 2, e8, m1, ta, ma
18077 // vmv.v.i v8, 4
18078 // vse64.v v8, (a0)
18079 // ->
18080 // li a1, 1028
18081 // sh a1, 0(a0)
18082 if (DCI.isBeforeLegalize() && IsScalarizable &&
18084 // Get the constant vector bits
18085 APInt NewC(Val.getValueSizeInBits(), 0);
18086 uint64_t EltSize = Val.getScalarValueSizeInBits();
18087 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18088 if (Val.getOperand(i).isUndef())
18089 continue;
18090 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18091 i * EltSize);
18092 }
18093 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18094
18095 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18096 true) <= 2 &&
18098 NewVT, *Store->getMemOperand())) {
18099 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18100 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18101 Store->getPointerInfo(), Store->getOriginalAlign(),
18102 Store->getMemOperand()->getFlags());
18103 }
18104 }
18105
18106 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18107 // vsetivli zero, 2, e16, m1, ta, ma
18108 // vle16.v v8, (a0)
18109 // vse16.v v8, (a1)
18110 if (auto *L = dyn_cast<LoadSDNode>(Val);
18111 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18112 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18113 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18114 L->getMemoryVT() == MemVT) {
18115 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18117 NewVT, *Store->getMemOperand()) &&
18119 NewVT, *L->getMemOperand())) {
18120 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18121 L->getPointerInfo(), L->getOriginalAlign(),
18122 L->getMemOperand()->getFlags());
18123 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18124 Store->getPointerInfo(), Store->getOriginalAlign(),
18125 Store->getMemOperand()->getFlags());
18126 }
18127 }
18128
18129 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18130 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18131 // any illegal types.
18132 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18133 (DCI.isAfterLegalizeDAG() &&
18135 isNullConstant(Val.getOperand(1)))) {
18136 SDValue Src = Val.getOperand(0);
18137 MVT VecVT = Src.getSimpleValueType();
18138 // VecVT should be scalable and memory VT should match the element type.
18139 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18140 MemVT == VecVT.getVectorElementType()) {
18141 SDLoc DL(N);
18142 MVT MaskVT = getMaskTypeFor(VecVT);
18143 return DAG.getStoreVP(
18144 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18145 DAG.getConstant(1, DL, MaskVT),
18146 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18147 Store->getMemOperand(), Store->getAddressingMode(),
18148 Store->isTruncatingStore(), /*IsCompress*/ false);
18149 }
18150 }
18151
18152 break;
18153 }
18154 case ISD::SPLAT_VECTOR: {
18155 EVT VT = N->getValueType(0);
18156 // Only perform this combine on legal MVT types.
18157 if (!isTypeLegal(VT))
18158 break;
18159 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18160 DAG, Subtarget))
18161 return Gather;
18162 break;
18163 }
18164 case ISD::BUILD_VECTOR:
18165 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18166 return V;
18167 break;
18169 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18170 return V;
18171 break;
18173 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18174 return V;
18175 break;
18176 case RISCVISD::VFMV_V_F_VL: {
18177 const MVT VT = N->getSimpleValueType(0);
18178 SDValue Passthru = N->getOperand(0);
18179 SDValue Scalar = N->getOperand(1);
18180 SDValue VL = N->getOperand(2);
18181
18182 // If VL is 1, we can use vfmv.s.f.
18183 if (isOneConstant(VL))
18184 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18185 break;
18186 }
18187 case RISCVISD::VMV_V_X_VL: {
18188 const MVT VT = N->getSimpleValueType(0);
18189 SDValue Passthru = N->getOperand(0);
18190 SDValue Scalar = N->getOperand(1);
18191 SDValue VL = N->getOperand(2);
18192
18193 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18194 // scalar input.
18195 unsigned ScalarSize = Scalar.getValueSizeInBits();
18196 unsigned EltWidth = VT.getScalarSizeInBits();
18197 if (ScalarSize > EltWidth && Passthru.isUndef())
18198 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18199 return SDValue(N, 0);
18200
18201 // If VL is 1 and the scalar value won't benefit from immediate, we can
18202 // use vmv.s.x.
18203 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18204 if (isOneConstant(VL) &&
18205 (!Const || Const->isZero() ||
18206 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18207 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18208
18209 break;
18210 }
18211 case RISCVISD::VFMV_S_F_VL: {
18212 SDValue Src = N->getOperand(1);
18213 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18214 // into an undef vector.
18215 // TODO: Could use a vslide or vmv.v.v for non-undef.
18216 if (N->getOperand(0).isUndef() &&
18217 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18218 isNullConstant(Src.getOperand(1)) &&
18219 Src.getOperand(0).getValueType().isScalableVector()) {
18220 EVT VT = N->getValueType(0);
18221 EVT SrcVT = Src.getOperand(0).getValueType();
18223 // Widths match, just return the original vector.
18224 if (SrcVT == VT)
18225 return Src.getOperand(0);
18226 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18227 }
18228 [[fallthrough]];
18229 }
18230 case RISCVISD::VMV_S_X_VL: {
18231 const MVT VT = N->getSimpleValueType(0);
18232 SDValue Passthru = N->getOperand(0);
18233 SDValue Scalar = N->getOperand(1);
18234 SDValue VL = N->getOperand(2);
18235
18236 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18237 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18238 return Scalar.getOperand(0);
18239
18240 // Use M1 or smaller to avoid over constraining register allocation
18241 const MVT M1VT = getLMUL1VT(VT);
18242 if (M1VT.bitsLT(VT)) {
18243 SDValue M1Passthru =
18244 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18245 DAG.getVectorIdxConstant(0, DL));
18246 SDValue Result =
18247 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18248 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18249 DAG.getVectorIdxConstant(0, DL));
18250 return Result;
18251 }
18252
18253 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18254 // higher would involve overly constraining the register allocator for
18255 // no purpose.
18256 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18257 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18258 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18259 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18260
18261 break;
18262 }
18263 case RISCVISD::VMV_X_S: {
18264 SDValue Vec = N->getOperand(0);
18265 MVT VecVT = N->getOperand(0).getSimpleValueType();
18266 const MVT M1VT = getLMUL1VT(VecVT);
18267 if (M1VT.bitsLT(VecVT)) {
18268 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18269 DAG.getVectorIdxConstant(0, DL));
18270 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18271 }
18272 break;
18273 }
18277 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18278 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18279 switch (IntNo) {
18280 // By default we do not combine any intrinsic.
18281 default:
18282 return SDValue();
18283 case Intrinsic::riscv_vcpop:
18284 case Intrinsic::riscv_vcpop_mask:
18285 case Intrinsic::riscv_vfirst:
18286 case Intrinsic::riscv_vfirst_mask: {
18287 SDValue VL = N->getOperand(2);
18288 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18289 IntNo == Intrinsic::riscv_vfirst_mask)
18290 VL = N->getOperand(3);
18291 if (!isNullConstant(VL))
18292 return SDValue();
18293 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18294 SDLoc DL(N);
18295 EVT VT = N->getValueType(0);
18296 if (IntNo == Intrinsic::riscv_vfirst ||
18297 IntNo == Intrinsic::riscv_vfirst_mask)
18298 return DAG.getAllOnesConstant(DL, VT);
18299 return DAG.getConstant(0, DL, VT);
18300 }
18301 }
18302 }
18303 case ISD::BITCAST: {
18305 SDValue N0 = N->getOperand(0);
18306 EVT VT = N->getValueType(0);
18307 EVT SrcVT = N0.getValueType();
18308 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18309 unsigned NF = VT.getRISCVVectorTupleNumFields();
18310 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18311 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18312 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18313
18314 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18315
18316 SDValue Result = DAG.getUNDEF(VT);
18317 for (unsigned i = 0; i < NF; ++i)
18318 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18319 DAG.getVectorIdxConstant(i, DL));
18320 return Result;
18321 }
18322 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18323 // type, widen both sides to avoid a trip through memory.
18324 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18325 VT.isScalarInteger()) {
18326 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18327 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18328 Ops[0] = N0;
18329 SDLoc DL(N);
18330 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18331 N0 = DAG.getBitcast(MVT::i8, N0);
18332 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18333 }
18334
18335 return SDValue();
18336 }
18337 case ISD::CTPOP:
18338 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18339 return V;
18340 break;
18341 }
18342
18343 return SDValue();
18344}
18345
18347 EVT XVT, unsigned KeptBits) const {
18348 // For vectors, we don't have a preference..
18349 if (XVT.isVector())
18350 return false;
18351
18352 if (XVT != MVT::i32 && XVT != MVT::i64)
18353 return false;
18354
18355 // We can use sext.w for RV64 or an srai 31 on RV32.
18356 if (KeptBits == 32 || KeptBits == 64)
18357 return true;
18358
18359 // With Zbb we can use sext.h/sext.b.
18360 return Subtarget.hasStdExtZbb() &&
18361 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18362 KeptBits == 16);
18363}
18364
18366 const SDNode *N, CombineLevel Level) const {
18367 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18368 N->getOpcode() == ISD::SRL) &&
18369 "Expected shift op");
18370
18371 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18372 // materialised in fewer instructions than `(OP _, c1)`:
18373 //
18374 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18375 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18376 SDValue N0 = N->getOperand(0);
18377 EVT Ty = N0.getValueType();
18378
18379 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18380 // LD/ST, it can still complete the folding optimization operation performed
18381 // above.
18382 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18383 for (SDNode *Use : X->users()) {
18384 // This use is the one we're on right now. Skip it
18385 if (Use == User || Use->getOpcode() == ISD::SELECT)
18386 continue;
18387 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18388 return false;
18389 }
18390 return true;
18391 };
18392
18393 if (Ty.isScalarInteger() &&
18394 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18395 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18396 return isUsedByLdSt(N0.getNode(), N);
18397
18398 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18399 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18400 if (C1 && C2) {
18401 const APInt &C1Int = C1->getAPIntValue();
18402 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18403
18404 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18405 // and the combine should happen, to potentially allow further combines
18406 // later.
18407 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18408 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18409 return true;
18410
18411 // We can materialise `c1` in an add immediate, so it's "free", and the
18412 // combine should be prevented.
18413 if (C1Int.getSignificantBits() <= 64 &&
18415 return false;
18416
18417 // Neither constant will fit into an immediate, so find materialisation
18418 // costs.
18419 int C1Cost =
18420 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18421 /*CompressionCost*/ true);
18422 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18423 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18424 /*CompressionCost*/ true);
18425
18426 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18427 // combine should be prevented.
18428 if (C1Cost < ShiftedC1Cost)
18429 return false;
18430 }
18431 }
18432
18433 if (!N0->hasOneUse())
18434 return false;
18435
18436 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18437 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18438 !N0->getOperand(0)->hasOneUse())
18439 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18440
18441 return true;
18442}
18443
18445 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18446 TargetLoweringOpt &TLO) const {
18447 // Delay this optimization as late as possible.
18448 if (!TLO.LegalOps)
18449 return false;
18450
18451 EVT VT = Op.getValueType();
18452 if (VT.isVector())
18453 return false;
18454
18455 unsigned Opcode = Op.getOpcode();
18456 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18457 return false;
18458
18459 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18460 if (!C)
18461 return false;
18462
18463 const APInt &Mask = C->getAPIntValue();
18464
18465 // Clear all non-demanded bits initially.
18466 APInt ShrunkMask = Mask & DemandedBits;
18467
18468 // Try to make a smaller immediate by setting undemanded bits.
18469
18470 APInt ExpandedMask = Mask | ~DemandedBits;
18471
18472 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18473 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18474 };
18475 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18476 if (NewMask == Mask)
18477 return true;
18478 SDLoc DL(Op);
18479 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18480 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18481 Op.getOperand(0), NewC);
18482 return TLO.CombineTo(Op, NewOp);
18483 };
18484
18485 // If the shrunk mask fits in sign extended 12 bits, let the target
18486 // independent code apply it.
18487 if (ShrunkMask.isSignedIntN(12))
18488 return false;
18489
18490 // And has a few special cases for zext.
18491 if (Opcode == ISD::AND) {
18492 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18493 // otherwise use SLLI + SRLI.
18494 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18495 if (IsLegalMask(NewMask))
18496 return UseMask(NewMask);
18497
18498 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18499 if (VT == MVT::i64) {
18500 APInt NewMask = APInt(64, 0xffffffff);
18501 if (IsLegalMask(NewMask))
18502 return UseMask(NewMask);
18503 }
18504 }
18505
18506 // For the remaining optimizations, we need to be able to make a negative
18507 // number through a combination of mask and undemanded bits.
18508 if (!ExpandedMask.isNegative())
18509 return false;
18510
18511 // What is the fewest number of bits we need to represent the negative number.
18512 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18513
18514 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18515 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18516 // If we can't create a simm12, we shouldn't change opaque constants.
18517 APInt NewMask = ShrunkMask;
18518 if (MinSignedBits <= 12)
18519 NewMask.setBitsFrom(11);
18520 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18521 NewMask.setBitsFrom(31);
18522 else
18523 return false;
18524
18525 // Check that our new mask is a subset of the demanded mask.
18526 assert(IsLegalMask(NewMask));
18527 return UseMask(NewMask);
18528}
18529
18530static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18531 static const uint64_t GREVMasks[] = {
18532 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18533 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18534
18535 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18536 unsigned Shift = 1 << Stage;
18537 if (ShAmt & Shift) {
18538 uint64_t Mask = GREVMasks[Stage];
18539 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18540 if (IsGORC)
18541 Res |= x;
18542 x = Res;
18543 }
18544 }
18545
18546 return x;
18547}
18548
18550 KnownBits &Known,
18551 const APInt &DemandedElts,
18552 const SelectionDAG &DAG,
18553 unsigned Depth) const {
18554 unsigned BitWidth = Known.getBitWidth();
18555 unsigned Opc = Op.getOpcode();
18556 assert((Opc >= ISD::BUILTIN_OP_END ||
18557 Opc == ISD::INTRINSIC_WO_CHAIN ||
18558 Opc == ISD::INTRINSIC_W_CHAIN ||
18559 Opc == ISD::INTRINSIC_VOID) &&
18560 "Should use MaskedValueIsZero if you don't know whether Op"
18561 " is a target node!");
18562
18563 Known.resetAll();
18564 switch (Opc) {
18565 default: break;
18566 case RISCVISD::SELECT_CC: {
18567 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18568 // If we don't know any bits, early out.
18569 if (Known.isUnknown())
18570 break;
18571 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18572
18573 // Only known if known in both the LHS and RHS.
18574 Known = Known.intersectWith(Known2);
18575 break;
18576 }
18579 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18580 // Result is either all zero or operand 0. We can propagate zeros, but not
18581 // ones.
18582 Known.One.clearAllBits();
18583 break;
18584 case RISCVISD::REMUW: {
18585 KnownBits Known2;
18586 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18587 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18588 // We only care about the lower 32 bits.
18589 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18590 // Restore the original width by sign extending.
18591 Known = Known.sext(BitWidth);
18592 break;
18593 }
18594 case RISCVISD::DIVUW: {
18595 KnownBits Known2;
18596 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18597 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18598 // We only care about the lower 32 bits.
18599 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18600 // Restore the original width by sign extending.
18601 Known = Known.sext(BitWidth);
18602 break;
18603 }
18604 case RISCVISD::SLLW: {
18605 KnownBits Known2;
18606 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18607 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18608 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18609 // Restore the original width by sign extending.
18610 Known = Known.sext(BitWidth);
18611 break;
18612 }
18613 case RISCVISD::CTZW: {
18614 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18615 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18616 unsigned LowBits = llvm::bit_width(PossibleTZ);
18617 Known.Zero.setBitsFrom(LowBits);
18618 break;
18619 }
18620 case RISCVISD::CLZW: {
18621 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18622 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18623 unsigned LowBits = llvm::bit_width(PossibleLZ);
18624 Known.Zero.setBitsFrom(LowBits);
18625 break;
18626 }
18627 case RISCVISD::BREV8:
18628 case RISCVISD::ORC_B: {
18629 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18630 // control value of 7 is equivalent to brev8 and orc.b.
18631 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18632 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18633 // To compute zeros, we need to invert the value and invert it back after.
18634 Known.Zero =
18635 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18636 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18637 break;
18638 }
18639 case RISCVISD::READ_VLENB: {
18640 // We can use the minimum and maximum VLEN values to bound VLENB. We
18641 // know VLEN must be a power of two.
18642 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18643 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18644 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18645 Known.Zero.setLowBits(Log2_32(MinVLenB));
18646 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18647 if (MaxVLenB == MinVLenB)
18648 Known.One.setBit(Log2_32(MinVLenB));
18649 break;
18650 }
18651 case RISCVISD::FCLASS: {
18652 // fclass will only set one of the low 10 bits.
18653 Known.Zero.setBitsFrom(10);
18654 break;
18655 }
18658 unsigned IntNo =
18659 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18660 switch (IntNo) {
18661 default:
18662 // We can't do anything for most intrinsics.
18663 break;
18664 case Intrinsic::riscv_vsetvli:
18665 case Intrinsic::riscv_vsetvlimax: {
18666 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18667 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18668 RISCVII::VLMUL VLMUL =
18669 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18670 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18671 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18672 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18673 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18674
18675 // Result of vsetvli must be not larger than AVL.
18676 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18677 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18678
18679 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18680 if (BitWidth > KnownZeroFirstBit)
18681 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18682 break;
18683 }
18684 }
18685 break;
18686 }
18687 }
18688}
18689
18691 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18692 unsigned Depth) const {
18693 switch (Op.getOpcode()) {
18694 default:
18695 break;
18696 case RISCVISD::SELECT_CC: {
18697 unsigned Tmp =
18698 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18699 if (Tmp == 1) return 1; // Early out.
18700 unsigned Tmp2 =
18701 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18702 return std::min(Tmp, Tmp2);
18703 }
18706 // Output is either all zero or operand 0. We can propagate sign bit count
18707 // from operand 0.
18708 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18709 case RISCVISD::ABSW: {
18710 // We expand this at isel to negw+max. The result will have 33 sign bits
18711 // if the input has at least 33 sign bits.
18712 unsigned Tmp =
18713 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18714 if (Tmp < 33) return 1;
18715 return 33;
18716 }
18717 case RISCVISD::SLLW:
18718 case RISCVISD::SRAW:
18719 case RISCVISD::SRLW:
18720 case RISCVISD::DIVW:
18721 case RISCVISD::DIVUW:
18722 case RISCVISD::REMUW:
18723 case RISCVISD::ROLW:
18724 case RISCVISD::RORW:
18729 // TODO: As the result is sign-extended, this is conservatively correct. A
18730 // more precise answer could be calculated for SRAW depending on known
18731 // bits in the shift amount.
18732 return 33;
18733 case RISCVISD::VMV_X_S: {
18734 // The number of sign bits of the scalar result is computed by obtaining the
18735 // element type of the input vector operand, subtracting its width from the
18736 // XLEN, and then adding one (sign bit within the element type). If the
18737 // element type is wider than XLen, the least-significant XLEN bits are
18738 // taken.
18739 unsigned XLen = Subtarget.getXLen();
18740 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
18741 if (EltBits <= XLen)
18742 return XLen - EltBits + 1;
18743 break;
18744 }
18746 unsigned IntNo = Op.getConstantOperandVal(1);
18747 switch (IntNo) {
18748 default:
18749 break;
18750 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18751 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18752 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18753 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18754 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18755 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18756 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18757 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18758 case Intrinsic::riscv_masked_cmpxchg_i64:
18759 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18760 // narrow atomic operation. These are implemented using atomic
18761 // operations at the minimum supported atomicrmw/cmpxchg width whose
18762 // result is then sign extended to XLEN. With +A, the minimum width is
18763 // 32 for both 64 and 32.
18764 assert(Subtarget.getXLen() == 64);
18766 assert(Subtarget.hasStdExtA());
18767 return 33;
18768 }
18769 break;
18770 }
18771 }
18772
18773 return 1;
18774}
18775
18777 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18778 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18779
18780 // TODO: Add more target nodes.
18781 switch (Op.getOpcode()) {
18783 // Integer select_cc cannot create poison.
18784 // TODO: What are the FP poison semantics?
18785 // TODO: This instruction blocks poison from the unselected operand, can
18786 // we do anything with that?
18787 return !Op.getValueType().isInteger();
18788 }
18790 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18791}
18792
18793const Constant *
18795 assert(Ld && "Unexpected null LoadSDNode");
18796 if (!ISD::isNormalLoad(Ld))
18797 return nullptr;
18798
18799 SDValue Ptr = Ld->getBasePtr();
18800
18801 // Only constant pools with no offset are supported.
18802 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18803 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18804 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18805 CNode->getOffset() != 0)
18806 return nullptr;
18807
18808 return CNode;
18809 };
18810
18811 // Simple case, LLA.
18812 if (Ptr.getOpcode() == RISCVISD::LLA) {
18813 auto *CNode = GetSupportedConstantPool(Ptr);
18814 if (!CNode || CNode->getTargetFlags() != 0)
18815 return nullptr;
18816
18817 return CNode->getConstVal();
18818 }
18819
18820 // Look for a HI and ADD_LO pair.
18821 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18822 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18823 return nullptr;
18824
18825 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18826 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18827
18828 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18829 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18830 return nullptr;
18831
18832 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18833 return nullptr;
18834
18835 return CNodeLo->getConstVal();
18836}
18837
18839 MachineBasicBlock *BB) {
18840 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18841
18842 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18843 // Should the count have wrapped while it was being read, we need to try
18844 // again.
18845 // For example:
18846 // ```
18847 // read:
18848 // csrrs x3, counterh # load high word of counter
18849 // csrrs x2, counter # load low word of counter
18850 // csrrs x4, counterh # load high word of counter
18851 // bne x3, x4, read # check if high word reads match, otherwise try again
18852 // ```
18853
18854 MachineFunction &MF = *BB->getParent();
18855 const BasicBlock *LLVMBB = BB->getBasicBlock();
18857
18858 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
18859 MF.insert(It, LoopMBB);
18860
18861 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
18862 MF.insert(It, DoneMBB);
18863
18864 // Transfer the remainder of BB and its successor edges to DoneMBB.
18865 DoneMBB->splice(DoneMBB->begin(), BB,
18866 std::next(MachineBasicBlock::iterator(MI)), BB->end());
18868
18869 BB->addSuccessor(LoopMBB);
18870
18872 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18873 Register LoReg = MI.getOperand(0).getReg();
18874 Register HiReg = MI.getOperand(1).getReg();
18875 int64_t LoCounter = MI.getOperand(2).getImm();
18876 int64_t HiCounter = MI.getOperand(3).getImm();
18877 DebugLoc DL = MI.getDebugLoc();
18878
18880 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
18881 .addImm(HiCounter)
18882 .addReg(RISCV::X0);
18883 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
18884 .addImm(LoCounter)
18885 .addReg(RISCV::X0);
18886 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
18887 .addImm(HiCounter)
18888 .addReg(RISCV::X0);
18889
18890 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
18891 .addReg(HiReg)
18892 .addReg(ReadAgainReg)
18893 .addMBB(LoopMBB);
18894
18895 LoopMBB->addSuccessor(LoopMBB);
18896 LoopMBB->addSuccessor(DoneMBB);
18897
18898 MI.eraseFromParent();
18899
18900 return DoneMBB;
18901}
18902
18905 const RISCVSubtarget &Subtarget) {
18906 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18907
18908 MachineFunction &MF = *BB->getParent();
18909 DebugLoc DL = MI.getDebugLoc();
18912 Register LoReg = MI.getOperand(0).getReg();
18913 Register HiReg = MI.getOperand(1).getReg();
18914 Register SrcReg = MI.getOperand(2).getReg();
18915
18916 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18917 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18918
18919 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18920 RI, Register());
18922 MachineMemOperand *MMOLo =
18926 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18927 .addFrameIndex(FI)
18928 .addImm(0)
18929 .addMemOperand(MMOLo);
18930 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18931 .addFrameIndex(FI)
18932 .addImm(4)
18933 .addMemOperand(MMOHi);
18934 MI.eraseFromParent(); // The pseudo instruction is gone now.
18935 return BB;
18936}
18937
18940 const RISCVSubtarget &Subtarget) {
18941 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18942 "Unexpected instruction");
18943
18944 MachineFunction &MF = *BB->getParent();
18945 DebugLoc DL = MI.getDebugLoc();
18948 Register DstReg = MI.getOperand(0).getReg();
18949 Register LoReg = MI.getOperand(1).getReg();
18950 Register HiReg = MI.getOperand(2).getReg();
18951
18952 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18953 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18954
18956 MachineMemOperand *MMOLo =
18960 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18961 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
18962 .addFrameIndex(FI)
18963 .addImm(0)
18964 .addMemOperand(MMOLo);
18965 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18966 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
18967 .addFrameIndex(FI)
18968 .addImm(4)
18969 .addMemOperand(MMOHi);
18970 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
18971 MI.eraseFromParent(); // The pseudo instruction is gone now.
18972 return BB;
18973}
18974
18976 switch (MI.getOpcode()) {
18977 default:
18978 return false;
18979 case RISCV::Select_GPR_Using_CC_GPR:
18980 case RISCV::Select_GPR_Using_CC_Imm:
18981 case RISCV::Select_FPR16_Using_CC_GPR:
18982 case RISCV::Select_FPR16INX_Using_CC_GPR:
18983 case RISCV::Select_FPR32_Using_CC_GPR:
18984 case RISCV::Select_FPR32INX_Using_CC_GPR:
18985 case RISCV::Select_FPR64_Using_CC_GPR:
18986 case RISCV::Select_FPR64INX_Using_CC_GPR:
18987 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18988 return true;
18989 }
18990}
18991
18993 unsigned RelOpcode, unsigned EqOpcode,
18994 const RISCVSubtarget &Subtarget) {
18995 DebugLoc DL = MI.getDebugLoc();
18996 Register DstReg = MI.getOperand(0).getReg();
18997 Register Src1Reg = MI.getOperand(1).getReg();
18998 Register Src2Reg = MI.getOperand(2).getReg();
19000 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19002
19003 // Save the current FFLAGS.
19004 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19005
19006 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19007 .addReg(Src1Reg)
19008 .addReg(Src2Reg);
19011
19012 // Restore the FFLAGS.
19013 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19014 .addReg(SavedFFlags, RegState::Kill);
19015
19016 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19017 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19018 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19019 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19022
19023 // Erase the pseudoinstruction.
19024 MI.eraseFromParent();
19025 return BB;
19026}
19027
19028static MachineBasicBlock *
19030 MachineBasicBlock *ThisMBB,
19031 const RISCVSubtarget &Subtarget) {
19032 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19033 // Without this, custom-inserter would have generated:
19034 //
19035 // A
19036 // | \
19037 // | B
19038 // | /
19039 // C
19040 // | \
19041 // | D
19042 // | /
19043 // E
19044 //
19045 // A: X = ...; Y = ...
19046 // B: empty
19047 // C: Z = PHI [X, A], [Y, B]
19048 // D: empty
19049 // E: PHI [X, C], [Z, D]
19050 //
19051 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19052 //
19053 // A
19054 // | \
19055 // | C
19056 // | /|
19057 // |/ |
19058 // | |
19059 // | D
19060 // | /
19061 // E
19062 //
19063 // A: X = ...; Y = ...
19064 // D: empty
19065 // E: PHI [X, A], [X, C], [Y, D]
19066
19067 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19068 const DebugLoc &DL = First.getDebugLoc();
19069 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19070 MachineFunction *F = ThisMBB->getParent();
19071 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19072 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19073 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19074 MachineFunction::iterator It = ++ThisMBB->getIterator();
19075 F->insert(It, FirstMBB);
19076 F->insert(It, SecondMBB);
19077 F->insert(It, SinkMBB);
19078
19079 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19080 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19082 ThisMBB->end());
19083 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19084
19085 // Fallthrough block for ThisMBB.
19086 ThisMBB->addSuccessor(FirstMBB);
19087 // Fallthrough block for FirstMBB.
19088 FirstMBB->addSuccessor(SecondMBB);
19089 ThisMBB->addSuccessor(SinkMBB);
19090 FirstMBB->addSuccessor(SinkMBB);
19091 // This is fallthrough.
19092 SecondMBB->addSuccessor(SinkMBB);
19093
19094 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19095 Register FLHS = First.getOperand(1).getReg();
19096 Register FRHS = First.getOperand(2).getReg();
19097 // Insert appropriate branch.
19098 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19099 .addReg(FLHS)
19100 .addReg(FRHS)
19101 .addMBB(SinkMBB);
19102
19103 Register SLHS = Second.getOperand(1).getReg();
19104 Register SRHS = Second.getOperand(2).getReg();
19105 Register Op1Reg4 = First.getOperand(4).getReg();
19106 Register Op1Reg5 = First.getOperand(5).getReg();
19107
19108 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19109 // Insert appropriate branch.
19110 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19111 .addReg(SLHS)
19112 .addReg(SRHS)
19113 .addMBB(SinkMBB);
19114
19115 Register DestReg = Second.getOperand(0).getReg();
19116 Register Op2Reg4 = Second.getOperand(4).getReg();
19117 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19118 .addReg(Op2Reg4)
19119 .addMBB(ThisMBB)
19120 .addReg(Op1Reg4)
19121 .addMBB(FirstMBB)
19122 .addReg(Op1Reg5)
19123 .addMBB(SecondMBB);
19124
19125 // Now remove the Select_FPRX_s.
19126 First.eraseFromParent();
19127 Second.eraseFromParent();
19128 return SinkMBB;
19129}
19130
19133 const RISCVSubtarget &Subtarget) {
19134 // To "insert" Select_* instructions, we actually have to insert the triangle
19135 // control-flow pattern. The incoming instructions know the destination vreg
19136 // to set, the condition code register to branch on, the true/false values to
19137 // select between, and the condcode to use to select the appropriate branch.
19138 //
19139 // We produce the following control flow:
19140 // HeadMBB
19141 // | \
19142 // | IfFalseMBB
19143 // | /
19144 // TailMBB
19145 //
19146 // When we find a sequence of selects we attempt to optimize their emission
19147 // by sharing the control flow. Currently we only handle cases where we have
19148 // multiple selects with the exact same condition (same LHS, RHS and CC).
19149 // The selects may be interleaved with other instructions if the other
19150 // instructions meet some requirements we deem safe:
19151 // - They are not pseudo instructions.
19152 // - They are debug instructions. Otherwise,
19153 // - They do not have side-effects, do not access memory and their inputs do
19154 // not depend on the results of the select pseudo-instructions.
19155 // The TrueV/FalseV operands of the selects cannot depend on the result of
19156 // previous selects in the sequence.
19157 // These conditions could be further relaxed. See the X86 target for a
19158 // related approach and more information.
19159 //
19160 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19161 // is checked here and handled by a separate function -
19162 // EmitLoweredCascadedSelect.
19163
19164 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19165 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19166 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19167 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19168 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19169 Next->getOperand(5).isKill())
19170 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19171
19172 Register LHS = MI.getOperand(1).getReg();
19173 Register RHS;
19174 if (MI.getOperand(2).isReg())
19175 RHS = MI.getOperand(2).getReg();
19176 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19177
19178 SmallVector<MachineInstr *, 4> SelectDebugValues;
19179 SmallSet<Register, 4> SelectDests;
19180 SelectDests.insert(MI.getOperand(0).getReg());
19181
19182 MachineInstr *LastSelectPseudo = &MI;
19183 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19184 SequenceMBBI != E; ++SequenceMBBI) {
19185 if (SequenceMBBI->isDebugInstr())
19186 continue;
19187 if (isSelectPseudo(*SequenceMBBI)) {
19188 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19189 !SequenceMBBI->getOperand(2).isReg() ||
19190 SequenceMBBI->getOperand(2).getReg() != RHS ||
19191 SequenceMBBI->getOperand(3).getImm() != CC ||
19192 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19193 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19194 break;
19195 LastSelectPseudo = &*SequenceMBBI;
19196 SequenceMBBI->collectDebugValues(SelectDebugValues);
19197 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19198 continue;
19199 }
19200 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19201 SequenceMBBI->mayLoadOrStore() ||
19202 SequenceMBBI->usesCustomInsertionHook())
19203 break;
19204 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19205 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19206 }))
19207 break;
19208 }
19209
19210 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19211 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19212 DebugLoc DL = MI.getDebugLoc();
19214
19215 MachineBasicBlock *HeadMBB = BB;
19216 MachineFunction *F = BB->getParent();
19217 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19218 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19219
19220 F->insert(I, IfFalseMBB);
19221 F->insert(I, TailMBB);
19222
19223 // Set the call frame size on entry to the new basic blocks.
19224 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19225 IfFalseMBB->setCallFrameSize(CallFrameSize);
19226 TailMBB->setCallFrameSize(CallFrameSize);
19227
19228 // Transfer debug instructions associated with the selects to TailMBB.
19229 for (MachineInstr *DebugInstr : SelectDebugValues) {
19230 TailMBB->push_back(DebugInstr->removeFromParent());
19231 }
19232
19233 // Move all instructions after the sequence to TailMBB.
19234 TailMBB->splice(TailMBB->end(), HeadMBB,
19235 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19236 // Update machine-CFG edges by transferring all successors of the current
19237 // block to the new block which will contain the Phi nodes for the selects.
19238 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19239 // Set the successors for HeadMBB.
19240 HeadMBB->addSuccessor(IfFalseMBB);
19241 HeadMBB->addSuccessor(TailMBB);
19242
19243 // Insert appropriate branch.
19244 if (MI.getOperand(2).isImm())
19245 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19246 .addReg(LHS)
19247 .addImm(MI.getOperand(2).getImm())
19248 .addMBB(TailMBB);
19249 else
19250 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19251 .addReg(LHS)
19252 .addReg(RHS)
19253 .addMBB(TailMBB);
19254
19255 // IfFalseMBB just falls through to TailMBB.
19256 IfFalseMBB->addSuccessor(TailMBB);
19257
19258 // Create PHIs for all of the select pseudo-instructions.
19259 auto SelectMBBI = MI.getIterator();
19260 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19261 auto InsertionPoint = TailMBB->begin();
19262 while (SelectMBBI != SelectEnd) {
19263 auto Next = std::next(SelectMBBI);
19264 if (isSelectPseudo(*SelectMBBI)) {
19265 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19266 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19267 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19268 .addReg(SelectMBBI->getOperand(4).getReg())
19269 .addMBB(HeadMBB)
19270 .addReg(SelectMBBI->getOperand(5).getReg())
19271 .addMBB(IfFalseMBB);
19272 SelectMBBI->eraseFromParent();
19273 }
19274 SelectMBBI = Next;
19275 }
19276
19277 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19278 return TailMBB;
19279}
19280
19281// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19282static const RISCV::RISCVMaskedPseudoInfo *
19283lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19285 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19286 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19288 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19289 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19290 return Masked;
19291}
19292
19295 unsigned CVTXOpc) {
19296 DebugLoc DL = MI.getDebugLoc();
19297
19299
19301 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19302
19303 // Save the old value of FFLAGS.
19304 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19305
19306 assert(MI.getNumOperands() == 7);
19307
19308 // Emit a VFCVT_X_F
19309 const TargetRegisterInfo *TRI =
19311 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19312 Register Tmp = MRI.createVirtualRegister(RC);
19313 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19314 .add(MI.getOperand(1))
19315 .add(MI.getOperand(2))
19316 .add(MI.getOperand(3))
19317 .add(MachineOperand::CreateImm(7)) // frm = DYN
19318 .add(MI.getOperand(4))
19319 .add(MI.getOperand(5))
19320 .add(MI.getOperand(6))
19321 .add(MachineOperand::CreateReg(RISCV::FRM,
19322 /*IsDef*/ false,
19323 /*IsImp*/ true));
19324
19325 // Emit a VFCVT_F_X
19326 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19327 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19328 // There is no E8 variant for VFCVT_F_X.
19329 assert(Log2SEW >= 4);
19330 unsigned CVTFOpc =
19331 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19332 ->MaskedPseudo;
19333
19334 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19335 .add(MI.getOperand(0))
19336 .add(MI.getOperand(1))
19337 .addReg(Tmp)
19338 .add(MI.getOperand(3))
19339 .add(MachineOperand::CreateImm(7)) // frm = DYN
19340 .add(MI.getOperand(4))
19341 .add(MI.getOperand(5))
19342 .add(MI.getOperand(6))
19343 .add(MachineOperand::CreateReg(RISCV::FRM,
19344 /*IsDef*/ false,
19345 /*IsImp*/ true));
19346
19347 // Restore FFLAGS.
19348 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19349 .addReg(SavedFFLAGS, RegState::Kill);
19350
19351 // Erase the pseudoinstruction.
19352 MI.eraseFromParent();
19353 return BB;
19354}
19355
19357 const RISCVSubtarget &Subtarget) {
19358 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19359 const TargetRegisterClass *RC;
19360 switch (MI.getOpcode()) {
19361 default:
19362 llvm_unreachable("Unexpected opcode");
19363 case RISCV::PseudoFROUND_H:
19364 CmpOpc = RISCV::FLT_H;
19365 F2IOpc = RISCV::FCVT_W_H;
19366 I2FOpc = RISCV::FCVT_H_W;
19367 FSGNJOpc = RISCV::FSGNJ_H;
19368 FSGNJXOpc = RISCV::FSGNJX_H;
19369 RC = &RISCV::FPR16RegClass;
19370 break;
19371 case RISCV::PseudoFROUND_H_INX:
19372 CmpOpc = RISCV::FLT_H_INX;
19373 F2IOpc = RISCV::FCVT_W_H_INX;
19374 I2FOpc = RISCV::FCVT_H_W_INX;
19375 FSGNJOpc = RISCV::FSGNJ_H_INX;
19376 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19377 RC = &RISCV::GPRF16RegClass;
19378 break;
19379 case RISCV::PseudoFROUND_S:
19380 CmpOpc = RISCV::FLT_S;
19381 F2IOpc = RISCV::FCVT_W_S;
19382 I2FOpc = RISCV::FCVT_S_W;
19383 FSGNJOpc = RISCV::FSGNJ_S;
19384 FSGNJXOpc = RISCV::FSGNJX_S;
19385 RC = &RISCV::FPR32RegClass;
19386 break;
19387 case RISCV::PseudoFROUND_S_INX:
19388 CmpOpc = RISCV::FLT_S_INX;
19389 F2IOpc = RISCV::FCVT_W_S_INX;
19390 I2FOpc = RISCV::FCVT_S_W_INX;
19391 FSGNJOpc = RISCV::FSGNJ_S_INX;
19392 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19393 RC = &RISCV::GPRF32RegClass;
19394 break;
19395 case RISCV::PseudoFROUND_D:
19396 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19397 CmpOpc = RISCV::FLT_D;
19398 F2IOpc = RISCV::FCVT_L_D;
19399 I2FOpc = RISCV::FCVT_D_L;
19400 FSGNJOpc = RISCV::FSGNJ_D;
19401 FSGNJXOpc = RISCV::FSGNJX_D;
19402 RC = &RISCV::FPR64RegClass;
19403 break;
19404 case RISCV::PseudoFROUND_D_INX:
19405 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19406 CmpOpc = RISCV::FLT_D_INX;
19407 F2IOpc = RISCV::FCVT_L_D_INX;
19408 I2FOpc = RISCV::FCVT_D_L_INX;
19409 FSGNJOpc = RISCV::FSGNJ_D_INX;
19410 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19411 RC = &RISCV::GPRRegClass;
19412 break;
19413 }
19414
19415 const BasicBlock *BB = MBB->getBasicBlock();
19416 DebugLoc DL = MI.getDebugLoc();
19418
19420 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19421 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19422
19423 F->insert(I, CvtMBB);
19424 F->insert(I, DoneMBB);
19425 // Move all instructions after the sequence to DoneMBB.
19426 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19427 MBB->end());
19428 // Update machine-CFG edges by transferring all successors of the current
19429 // block to the new block which will contain the Phi nodes for the selects.
19431 // Set the successors for MBB.
19432 MBB->addSuccessor(CvtMBB);
19433 MBB->addSuccessor(DoneMBB);
19434
19435 Register DstReg = MI.getOperand(0).getReg();
19436 Register SrcReg = MI.getOperand(1).getReg();
19437 Register MaxReg = MI.getOperand(2).getReg();
19438 int64_t FRM = MI.getOperand(3).getImm();
19439
19440 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19442
19443 Register FabsReg = MRI.createVirtualRegister(RC);
19444 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19445
19446 // Compare the FP value to the max value.
19447 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19448 auto MIB =
19449 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19452
19453 // Insert branch.
19454 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19455 .addReg(CmpReg)
19456 .addReg(RISCV::X0)
19457 .addMBB(DoneMBB);
19458
19459 CvtMBB->addSuccessor(DoneMBB);
19460
19461 // Convert to integer.
19462 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19463 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19466
19467 // Convert back to FP.
19468 Register I2FReg = MRI.createVirtualRegister(RC);
19469 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19472
19473 // Restore the sign bit.
19474 Register CvtReg = MRI.createVirtualRegister(RC);
19475 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19476
19477 // Merge the results.
19478 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19479 .addReg(SrcReg)
19480 .addMBB(MBB)
19481 .addReg(CvtReg)
19482 .addMBB(CvtMBB);
19483
19484 MI.eraseFromParent();
19485 return DoneMBB;
19486}
19487
19490 MachineBasicBlock *BB) const {
19491 switch (MI.getOpcode()) {
19492 default:
19493 llvm_unreachable("Unexpected instr type to insert");
19494 case RISCV::ReadCounterWide:
19495 assert(!Subtarget.is64Bit() &&
19496 "ReadCounterWide is only to be used on riscv32");
19497 return emitReadCounterWidePseudo(MI, BB);
19498 case RISCV::Select_GPR_Using_CC_GPR:
19499 case RISCV::Select_GPR_Using_CC_Imm:
19500 case RISCV::Select_FPR16_Using_CC_GPR:
19501 case RISCV::Select_FPR16INX_Using_CC_GPR:
19502 case RISCV::Select_FPR32_Using_CC_GPR:
19503 case RISCV::Select_FPR32INX_Using_CC_GPR:
19504 case RISCV::Select_FPR64_Using_CC_GPR:
19505 case RISCV::Select_FPR64INX_Using_CC_GPR:
19506 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19507 return emitSelectPseudo(MI, BB, Subtarget);
19508 case RISCV::BuildPairF64Pseudo:
19509 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19510 case RISCV::SplitF64Pseudo:
19511 return emitSplitF64Pseudo(MI, BB, Subtarget);
19512 case RISCV::PseudoQuietFLE_H:
19513 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19514 case RISCV::PseudoQuietFLE_H_INX:
19515 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19516 case RISCV::PseudoQuietFLT_H:
19517 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19518 case RISCV::PseudoQuietFLT_H_INX:
19519 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19520 case RISCV::PseudoQuietFLE_S:
19521 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19522 case RISCV::PseudoQuietFLE_S_INX:
19523 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19524 case RISCV::PseudoQuietFLT_S:
19525 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19526 case RISCV::PseudoQuietFLT_S_INX:
19527 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19528 case RISCV::PseudoQuietFLE_D:
19529 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19530 case RISCV::PseudoQuietFLE_D_INX:
19531 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19532 case RISCV::PseudoQuietFLE_D_IN32X:
19533 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19534 Subtarget);
19535 case RISCV::PseudoQuietFLT_D:
19536 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19537 case RISCV::PseudoQuietFLT_D_INX:
19538 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19539 case RISCV::PseudoQuietFLT_D_IN32X:
19540 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19541 Subtarget);
19542
19543 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19544 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19545 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19546 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19547 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19548 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19549 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19550 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19551 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19552 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19553 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19554 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19555 case RISCV::PseudoFROUND_H:
19556 case RISCV::PseudoFROUND_H_INX:
19557 case RISCV::PseudoFROUND_S:
19558 case RISCV::PseudoFROUND_S_INX:
19559 case RISCV::PseudoFROUND_D:
19560 case RISCV::PseudoFROUND_D_INX:
19561 case RISCV::PseudoFROUND_D_IN32X:
19562 return emitFROUND(MI, BB, Subtarget);
19563 case TargetOpcode::STATEPOINT:
19564 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19565 // while jal call instruction (where statepoint will be lowered at the end)
19566 // has implicit def. This def is early-clobber as it will be set at
19567 // the moment of the call and earlier than any use is read.
19568 // Add this implicit dead def here as a workaround.
19569 MI.addOperand(*MI.getMF(),
19571 RISCV::X1, /*isDef*/ true,
19572 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19573 /*isUndef*/ false, /*isEarlyClobber*/ true));
19574 [[fallthrough]];
19575 case TargetOpcode::STACKMAP:
19576 case TargetOpcode::PATCHPOINT:
19577 if (!Subtarget.is64Bit())
19578 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19579 "supported on 64-bit targets");
19580 return emitPatchPoint(MI, BB);
19581 }
19582}
19583
19585 SDNode *Node) const {
19586 // Add FRM dependency to any instructions with dynamic rounding mode.
19587 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19588 if (Idx < 0) {
19589 // Vector pseudos have FRM index indicated by TSFlags.
19590 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19591 if (Idx < 0)
19592 return;
19593 }
19594 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19595 return;
19596 // If the instruction already reads FRM, don't add another read.
19597 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19598 return;
19599 MI.addOperand(
19600 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19601}
19602
19603void RISCVTargetLowering::analyzeInputArgs(
19604 MachineFunction &MF, CCState &CCInfo,
19605 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19606 RISCVCCAssignFn Fn) const {
19607 unsigned NumArgs = Ins.size();
19609
19610 for (unsigned i = 0; i != NumArgs; ++i) {
19611 MVT ArgVT = Ins[i].VT;
19612 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19613
19614 Type *ArgTy = nullptr;
19615 if (IsRet)
19616 ArgTy = FType->getReturnType();
19617 else if (Ins[i].isOrigArg())
19618 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19619
19620 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19621 /*IsFixed=*/true, IsRet, ArgTy)) {
19622 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19623 << ArgVT << '\n');
19624 llvm_unreachable(nullptr);
19625 }
19626 }
19627}
19628
19629void RISCVTargetLowering::analyzeOutputArgs(
19630 MachineFunction &MF, CCState &CCInfo,
19631 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19632 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19633 unsigned NumArgs = Outs.size();
19634
19635 for (unsigned i = 0; i != NumArgs; i++) {
19636 MVT ArgVT = Outs[i].VT;
19637 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19638 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19639
19640 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19641 Outs[i].IsFixed, IsRet, OrigTy)) {
19642 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19643 << ArgVT << "\n");
19644 llvm_unreachable(nullptr);
19645 }
19646 }
19647}
19648
19649// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19650// values.
19652 const CCValAssign &VA, const SDLoc &DL,
19653 const RISCVSubtarget &Subtarget) {
19654 if (VA.needsCustom()) {
19655 if (VA.getLocVT().isInteger() &&
19656 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19657 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19658 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19659 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19661 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19662 llvm_unreachable("Unexpected Custom handling.");
19663 }
19664
19665 switch (VA.getLocInfo()) {
19666 default:
19667 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19668 case CCValAssign::Full:
19669 break;
19670 case CCValAssign::BCvt:
19671 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19672 break;
19673 }
19674 return Val;
19675}
19676
19677// The caller is responsible for loading the full value if the argument is
19678// passed with CCValAssign::Indirect.
19680 const CCValAssign &VA, const SDLoc &DL,
19681 const ISD::InputArg &In,
19682 const RISCVTargetLowering &TLI) {
19685 EVT LocVT = VA.getLocVT();
19686 SDValue Val;
19687 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19688 Register VReg = RegInfo.createVirtualRegister(RC);
19689 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19690 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19691
19692 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19693 if (In.isOrigArg()) {
19694 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19695 if (OrigArg->getType()->isIntegerTy()) {
19696 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19697 // An input zero extended from i31 can also be considered sign extended.
19698 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19699 (BitWidth < 32 && In.Flags.isZExt())) {
19701 RVFI->addSExt32Register(VReg);
19702 }
19703 }
19704 }
19705
19707 return Val;
19708
19709 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19710}
19711
19713 const CCValAssign &VA, const SDLoc &DL,
19714 const RISCVSubtarget &Subtarget) {
19715 EVT LocVT = VA.getLocVT();
19716
19717 if (VA.needsCustom()) {
19718 if (LocVT.isInteger() &&
19719 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19720 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19721 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19722 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19723 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19724 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19725 llvm_unreachable("Unexpected Custom handling.");
19726 }
19727
19728 switch (VA.getLocInfo()) {
19729 default:
19730 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19731 case CCValAssign::Full:
19732 break;
19733 case CCValAssign::BCvt:
19734 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19735 break;
19736 }
19737 return Val;
19738}
19739
19740// The caller is responsible for loading the full value if the argument is
19741// passed with CCValAssign::Indirect.
19743 const CCValAssign &VA, const SDLoc &DL) {
19745 MachineFrameInfo &MFI = MF.getFrameInfo();
19746 EVT LocVT = VA.getLocVT();
19747 EVT ValVT = VA.getValVT();
19749 if (VA.getLocInfo() == CCValAssign::Indirect) {
19750 // When the value is a scalable vector, we save the pointer which points to
19751 // the scalable vector value in the stack. The ValVT will be the pointer
19752 // type, instead of the scalable vector type.
19753 ValVT = LocVT;
19754 }
19755 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19756 /*IsImmutable=*/true);
19757 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19758 SDValue Val;
19759
19761 switch (VA.getLocInfo()) {
19762 default:
19763 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19764 case CCValAssign::Full:
19766 case CCValAssign::BCvt:
19767 break;
19768 }
19769 Val = DAG.getExtLoad(
19770 ExtType, DL, LocVT, Chain, FIN,
19772 return Val;
19773}
19774
19776 const CCValAssign &VA,
19777 const CCValAssign &HiVA,
19778 const SDLoc &DL) {
19779 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19780 "Unexpected VA");
19782 MachineFrameInfo &MFI = MF.getFrameInfo();
19784
19785 assert(VA.isRegLoc() && "Expected register VA assignment");
19786
19787 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19788 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19789 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19790 SDValue Hi;
19791 if (HiVA.isMemLoc()) {
19792 // Second half of f64 is passed on the stack.
19793 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19794 /*IsImmutable=*/true);
19795 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19796 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19798 } else {
19799 // Second half of f64 is passed in another GPR.
19800 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19801 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19802 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19803 }
19804 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19805}
19806
19807// Transform physical registers into virtual registers.
19809 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19810 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19811 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19812
19814
19815 switch (CallConv) {
19816 default:
19817 report_fatal_error("Unsupported calling convention");
19818 case CallingConv::C:
19819 case CallingConv::Fast:
19821 case CallingConv::GRAAL:
19823 break;
19824 case CallingConv::GHC:
19825 if (Subtarget.hasStdExtE())
19826 report_fatal_error("GHC calling convention is not supported on RVE!");
19827 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19828 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19829 "(Zdinx/D) instruction set extensions");
19830 }
19831
19832 const Function &Func = MF.getFunction();
19833 if (Func.hasFnAttribute("interrupt")) {
19834 if (!Func.arg_empty())
19836 "Functions with the interrupt attribute cannot have arguments!");
19837
19838 StringRef Kind =
19839 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19840
19841 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19843 "Function interrupt attribute argument not supported!");
19844 }
19845
19846 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19847 MVT XLenVT = Subtarget.getXLenVT();
19848 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19849 // Used with vargs to acumulate store chains.
19850 std::vector<SDValue> OutChains;
19851
19852 // Assign locations to all of the incoming arguments.
19854 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19855
19856 if (CallConv == CallingConv::GHC)
19858 else
19859 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19861 : CC_RISCV);
19862
19863 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19864 CCValAssign &VA = ArgLocs[i];
19865 SDValue ArgValue;
19866 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19867 // case.
19868 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19869 assert(VA.needsCustom());
19870 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19871 } else if (VA.isRegLoc())
19872 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19873 else
19874 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19875
19876 if (VA.getLocInfo() == CCValAssign::Indirect) {
19877 // If the original argument was split and passed by reference (e.g. i128
19878 // on RV32), we need to load all parts of it here (using the same
19879 // address). Vectors may be partly split to registers and partly to the
19880 // stack, in which case the base address is partly offset and subsequent
19881 // stores are relative to that.
19882 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19884 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19885 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19886 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19887 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19888 CCValAssign &PartVA = ArgLocs[i + 1];
19889 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19890 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19891 if (PartVA.getValVT().isScalableVector())
19892 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19893 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19894 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19896 ++i;
19897 ++InsIdx;
19898 }
19899 continue;
19900 }
19901 InVals.push_back(ArgValue);
19902 }
19903
19904 if (any_of(ArgLocs,
19905 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19906 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19907
19908 if (IsVarArg) {
19909 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19910 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19911 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19912 MachineFrameInfo &MFI = MF.getFrameInfo();
19913 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19915
19916 // Size of the vararg save area. For now, the varargs save area is either
19917 // zero or large enough to hold a0-a7.
19918 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19919 int FI;
19920
19921 // If all registers are allocated, then all varargs must be passed on the
19922 // stack and we don't need to save any argregs.
19923 if (VarArgsSaveSize == 0) {
19924 int VaArgOffset = CCInfo.getStackSize();
19925 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19926 } else {
19927 int VaArgOffset = -VarArgsSaveSize;
19928 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19929
19930 // If saving an odd number of registers then create an extra stack slot to
19931 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19932 // offsets to even-numbered registered remain 2*XLEN-aligned.
19933 if (Idx % 2) {
19935 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19936 VarArgsSaveSize += XLenInBytes;
19937 }
19938
19939 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19940
19941 // Copy the integer registers that may have been used for passing varargs
19942 // to the vararg save area.
19943 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19944 const Register Reg = RegInfo.createVirtualRegister(RC);
19945 RegInfo.addLiveIn(ArgRegs[I], Reg);
19946 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19947 SDValue Store = DAG.getStore(
19948 Chain, DL, ArgValue, FIN,
19949 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19950 OutChains.push_back(Store);
19951 FIN =
19952 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19953 }
19954 }
19955
19956 // Record the frame index of the first variable argument
19957 // which is a value necessary to VASTART.
19958 RVFI->setVarArgsFrameIndex(FI);
19959 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19960 }
19961
19962 // All stores are grouped in one node to allow the matching between
19963 // the size of Ins and InVals. This only happens for vararg functions.
19964 if (!OutChains.empty()) {
19965 OutChains.push_back(Chain);
19966 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19967 }
19968
19969 return Chain;
19970}
19971
19972/// isEligibleForTailCallOptimization - Check whether the call is eligible
19973/// for tail call optimization.
19974/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19975bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19976 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19977 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19978
19979 auto CalleeCC = CLI.CallConv;
19980 auto &Outs = CLI.Outs;
19981 auto &Caller = MF.getFunction();
19982 auto CallerCC = Caller.getCallingConv();
19983
19984 // Exception-handling functions need a special set of instructions to
19985 // indicate a return to the hardware. Tail-calling another function would
19986 // probably break this.
19987 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19988 // should be expanded as new function attributes are introduced.
19989 if (Caller.hasFnAttribute("interrupt"))
19990 return false;
19991
19992 // Do not tail call opt if the stack is used to pass parameters.
19993 if (CCInfo.getStackSize() != 0)
19994 return false;
19995
19996 // Do not tail call opt if any parameters need to be passed indirectly.
19997 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19998 // passed indirectly. So the address of the value will be passed in a
19999 // register, or if not available, then the address is put on the stack. In
20000 // order to pass indirectly, space on the stack often needs to be allocated
20001 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20002 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20003 // are passed CCValAssign::Indirect.
20004 for (auto &VA : ArgLocs)
20005 if (VA.getLocInfo() == CCValAssign::Indirect)
20006 return false;
20007
20008 // Do not tail call opt if either caller or callee uses struct return
20009 // semantics.
20010 auto IsCallerStructRet = Caller.hasStructRetAttr();
20011 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20012 if (IsCallerStructRet || IsCalleeStructRet)
20013 return false;
20014
20015 // The callee has to preserve all registers the caller needs to preserve.
20016 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20017 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20018 if (CalleeCC != CallerCC) {
20019 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20020 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20021 return false;
20022 }
20023
20024 // Byval parameters hand the function a pointer directly into the stack area
20025 // we want to reuse during a tail call. Working around this *is* possible
20026 // but less efficient and uglier in LowerCall.
20027 for (auto &Arg : Outs)
20028 if (Arg.Flags.isByVal())
20029 return false;
20030
20031 return true;
20032}
20033
20035 return DAG.getDataLayout().getPrefTypeAlign(
20036 VT.getTypeForEVT(*DAG.getContext()));
20037}
20038
20039// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20040// and output parameter nodes.
20042 SmallVectorImpl<SDValue> &InVals) const {
20043 SelectionDAG &DAG = CLI.DAG;
20044 SDLoc &DL = CLI.DL;
20046 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20048 SDValue Chain = CLI.Chain;
20049 SDValue Callee = CLI.Callee;
20050 bool &IsTailCall = CLI.IsTailCall;
20051 CallingConv::ID CallConv = CLI.CallConv;
20052 bool IsVarArg = CLI.IsVarArg;
20053 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20054 MVT XLenVT = Subtarget.getXLenVT();
20055
20057
20058 // Analyze the operands of the call, assigning locations to each operand.
20060 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20061
20062 if (CallConv == CallingConv::GHC) {
20063 if (Subtarget.hasStdExtE())
20064 report_fatal_error("GHC calling convention is not supported on RVE!");
20065 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20066 } else
20067 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20069 : CC_RISCV);
20070
20071 // Check if it's really possible to do a tail call.
20072 if (IsTailCall)
20073 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20074
20075 if (IsTailCall)
20076 ++NumTailCalls;
20077 else if (CLI.CB && CLI.CB->isMustTailCall())
20078 report_fatal_error("failed to perform tail call elimination on a call "
20079 "site marked musttail");
20080
20081 // Get a count of how many bytes are to be pushed on the stack.
20082 unsigned NumBytes = ArgCCInfo.getStackSize();
20083
20084 // Create local copies for byval args
20085 SmallVector<SDValue, 8> ByValArgs;
20086 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20087 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20088 if (!Flags.isByVal())
20089 continue;
20090
20091 SDValue Arg = OutVals[i];
20092 unsigned Size = Flags.getByValSize();
20093 Align Alignment = Flags.getNonZeroByValAlign();
20094
20095 int FI =
20096 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20097 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20098 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20099
20100 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20101 /*IsVolatile=*/false,
20102 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20104 ByValArgs.push_back(FIPtr);
20105 }
20106
20107 if (!IsTailCall)
20108 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20109
20110 // Copy argument values to their designated locations.
20112 SmallVector<SDValue, 8> MemOpChains;
20113 SDValue StackPtr;
20114 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20115 ++i, ++OutIdx) {
20116 CCValAssign &VA = ArgLocs[i];
20117 SDValue ArgValue = OutVals[OutIdx];
20118 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20119
20120 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20121 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20122 assert(VA.isRegLoc() && "Expected register VA assignment");
20123 assert(VA.needsCustom());
20124 SDValue SplitF64 = DAG.getNode(
20125 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20126 SDValue Lo = SplitF64.getValue(0);
20127 SDValue Hi = SplitF64.getValue(1);
20128
20129 Register RegLo = VA.getLocReg();
20130 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20131
20132 // Get the CCValAssign for the Hi part.
20133 CCValAssign &HiVA = ArgLocs[++i];
20134
20135 if (HiVA.isMemLoc()) {
20136 // Second half of f64 is passed on the stack.
20137 if (!StackPtr.getNode())
20138 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20140 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20141 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20142 // Emit the store.
20143 MemOpChains.push_back(DAG.getStore(
20144 Chain, DL, Hi, Address,
20146 } else {
20147 // Second half of f64 is passed in another GPR.
20148 Register RegHigh = HiVA.getLocReg();
20149 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20150 }
20151 continue;
20152 }
20153
20154 // Promote the value if needed.
20155 // For now, only handle fully promoted and indirect arguments.
20156 if (VA.getLocInfo() == CCValAssign::Indirect) {
20157 // Store the argument in a stack slot and pass its address.
20158 Align StackAlign =
20159 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20160 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20161 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20162 // If the original argument was split (e.g. i128), we need
20163 // to store the required parts of it here (and pass just one address).
20164 // Vectors may be partly split to registers and partly to the stack, in
20165 // which case the base address is partly offset and subsequent stores are
20166 // relative to that.
20167 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20168 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20169 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20170 // Calculate the total size to store. We don't have access to what we're
20171 // actually storing other than performing the loop and collecting the
20172 // info.
20174 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20175 SDValue PartValue = OutVals[OutIdx + 1];
20176 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20177 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20178 EVT PartVT = PartValue.getValueType();
20179 if (PartVT.isScalableVector())
20180 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20181 StoredSize += PartVT.getStoreSize();
20182 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20183 Parts.push_back(std::make_pair(PartValue, Offset));
20184 ++i;
20185 ++OutIdx;
20186 }
20187 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20188 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20189 MemOpChains.push_back(
20190 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20192 for (const auto &Part : Parts) {
20193 SDValue PartValue = Part.first;
20194 SDValue PartOffset = Part.second;
20196 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20197 MemOpChains.push_back(
20198 DAG.getStore(Chain, DL, PartValue, Address,
20200 }
20201 ArgValue = SpillSlot;
20202 } else {
20203 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20204 }
20205
20206 // Use local copy if it is a byval arg.
20207 if (Flags.isByVal())
20208 ArgValue = ByValArgs[j++];
20209
20210 if (VA.isRegLoc()) {
20211 // Queue up the argument copies and emit them at the end.
20212 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20213 } else {
20214 assert(VA.isMemLoc() && "Argument not register or memory");
20215 assert(!IsTailCall && "Tail call not allowed if stack is used "
20216 "for passing parameters");
20217
20218 // Work out the address of the stack slot.
20219 if (!StackPtr.getNode())
20220 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20222 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20224
20225 // Emit the store.
20226 MemOpChains.push_back(
20227 DAG.getStore(Chain, DL, ArgValue, Address,
20229 }
20230 }
20231
20232 // Join the stores, which are independent of one another.
20233 if (!MemOpChains.empty())
20234 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20235
20236 SDValue Glue;
20237
20238 // Build a sequence of copy-to-reg nodes, chained and glued together.
20239 for (auto &Reg : RegsToPass) {
20240 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20241 Glue = Chain.getValue(1);
20242 }
20243
20244 // Validate that none of the argument registers have been marked as
20245 // reserved, if so report an error. Do the same for the return address if this
20246 // is not a tailcall.
20247 validateCCReservedRegs(RegsToPass, MF);
20248 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20250 MF.getFunction(),
20251 "Return address register required, but has been reserved."});
20252
20253 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20254 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20255 // split it and then direct call can be matched by PseudoCALL.
20256 bool CalleeIsLargeExternalSymbol = false;
20258 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20259 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20260 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20261 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20262 CalleeIsLargeExternalSymbol = true;
20263 }
20264 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20265 const GlobalValue *GV = S->getGlobal();
20266 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20267 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20268 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20269 }
20270
20271 // The first call operand is the chain and the second is the target address.
20273 Ops.push_back(Chain);
20274 Ops.push_back(Callee);
20275
20276 // Add argument registers to the end of the list so that they are
20277 // known live into the call.
20278 for (auto &Reg : RegsToPass)
20279 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20280
20281 if (!IsTailCall) {
20282 // Add a register mask operand representing the call-preserved registers.
20283 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20284 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20285 assert(Mask && "Missing call preserved mask for calling convention");
20286 Ops.push_back(DAG.getRegisterMask(Mask));
20287 }
20288
20289 // Glue the call to the argument copies, if any.
20290 if (Glue.getNode())
20291 Ops.push_back(Glue);
20292
20293 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20294 "Unexpected CFI type for a direct call");
20295
20296 // Emit the call.
20297 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20298
20299 // Use software guarded branch for large code model non-indirect calls
20300 // Tail call to external symbol will have a null CLI.CB and we need another
20301 // way to determine the callsite type
20302 bool NeedSWGuarded = false;
20304 Subtarget.hasStdExtZicfilp() &&
20305 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20306 NeedSWGuarded = true;
20307
20308 if (IsTailCall) {
20310 unsigned CallOpc =
20311 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20312 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20313 if (CLI.CFIType)
20314 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20315 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20316 return Ret;
20317 }
20318
20319 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20320 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20321 if (CLI.CFIType)
20322 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20323 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20324 Glue = Chain.getValue(1);
20325
20326 // Mark the end of the call, which is glued to the call itself.
20327 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20328 Glue = Chain.getValue(1);
20329
20330 // Assign locations to each value returned by this call.
20332 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20333 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20334
20335 // Copy all of the result registers out of their specified physreg.
20336 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20337 auto &VA = RVLocs[i];
20338 // Copy the value out
20339 SDValue RetValue =
20340 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20341 // Glue the RetValue to the end of the call sequence
20342 Chain = RetValue.getValue(1);
20343 Glue = RetValue.getValue(2);
20344
20345 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20346 assert(VA.needsCustom());
20347 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20348 MVT::i32, Glue);
20349 Chain = RetValue2.getValue(1);
20350 Glue = RetValue2.getValue(2);
20351 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20352 RetValue2);
20353 } else
20354 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20355
20356 InVals.push_back(RetValue);
20357 }
20358
20359 return Chain;
20360}
20361
20363 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20364 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20366 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20367
20368 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20369 MVT VT = Outs[i].VT;
20370 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20371 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20372 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20373 return false;
20374 }
20375 return true;
20376}
20377
20378SDValue
20380 bool IsVarArg,
20382 const SmallVectorImpl<SDValue> &OutVals,
20383 const SDLoc &DL, SelectionDAG &DAG) const {
20385 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20386
20387 // Stores the assignment of the return value to a location.
20389
20390 // Info about the registers and stack slot.
20391 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20392 *DAG.getContext());
20393
20394 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20395 nullptr, CC_RISCV);
20396
20397 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20398 report_fatal_error("GHC functions return void only");
20399
20400 SDValue Glue;
20401 SmallVector<SDValue, 4> RetOps(1, Chain);
20402
20403 // Copy the result values into the output registers.
20404 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20405 SDValue Val = OutVals[OutIdx];
20406 CCValAssign &VA = RVLocs[i];
20407 assert(VA.isRegLoc() && "Can only return in registers!");
20408
20409 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20410 // Handle returning f64 on RV32D with a soft float ABI.
20411 assert(VA.isRegLoc() && "Expected return via registers");
20412 assert(VA.needsCustom());
20413 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20414 DAG.getVTList(MVT::i32, MVT::i32), Val);
20415 SDValue Lo = SplitF64.getValue(0);
20416 SDValue Hi = SplitF64.getValue(1);
20417 Register RegLo = VA.getLocReg();
20418 Register RegHi = RVLocs[++i].getLocReg();
20419
20420 if (STI.isRegisterReservedByUser(RegLo) ||
20421 STI.isRegisterReservedByUser(RegHi))
20423 MF.getFunction(),
20424 "Return value register required, but has been reserved."});
20425
20426 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20427 Glue = Chain.getValue(1);
20428 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20429 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20430 Glue = Chain.getValue(1);
20431 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20432 } else {
20433 // Handle a 'normal' return.
20434 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20435 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20436
20437 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20439 MF.getFunction(),
20440 "Return value register required, but has been reserved."});
20441
20442 // Guarantee that all emitted copies are stuck together.
20443 Glue = Chain.getValue(1);
20444 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20445 }
20446 }
20447
20448 RetOps[0] = Chain; // Update chain.
20449
20450 // Add the glue node if we have it.
20451 if (Glue.getNode()) {
20452 RetOps.push_back(Glue);
20453 }
20454
20455 if (any_of(RVLocs,
20456 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20457 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20458
20459 unsigned RetOpc = RISCVISD::RET_GLUE;
20460 // Interrupt service routines use different return instructions.
20461 const Function &Func = DAG.getMachineFunction().getFunction();
20462 if (Func.hasFnAttribute("interrupt")) {
20463 if (!Func.getReturnType()->isVoidTy())
20465 "Functions with the interrupt attribute must have void return type!");
20466
20468 StringRef Kind =
20469 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20470
20471 if (Kind == "supervisor")
20472 RetOpc = RISCVISD::SRET_GLUE;
20473 else
20474 RetOpc = RISCVISD::MRET_GLUE;
20475 }
20476
20477 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20478}
20479
20480void RISCVTargetLowering::validateCCReservedRegs(
20481 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20482 MachineFunction &MF) const {
20483 const Function &F = MF.getFunction();
20484 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20485
20486 if (llvm::any_of(Regs, [&STI](auto Reg) {
20487 return STI.isRegisterReservedByUser(Reg.first);
20488 }))
20489 F.getContext().diagnose(DiagnosticInfoUnsupported{
20490 F, "Argument register required, but has been reserved."});
20491}
20492
20493// Check if the result of the node is only used as a return value, as
20494// otherwise we can't perform a tail-call.
20496 if (N->getNumValues() != 1)
20497 return false;
20498 if (!N->hasNUsesOfValue(1, 0))
20499 return false;
20500
20501 SDNode *Copy = *N->user_begin();
20502
20503 if (Copy->getOpcode() == ISD::BITCAST) {
20504 return isUsedByReturnOnly(Copy, Chain);
20505 }
20506
20507 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20508 // with soft float ABIs.
20509 if (Copy->getOpcode() != ISD::CopyToReg) {
20510 return false;
20511 }
20512
20513 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20514 // isn't safe to perform a tail call.
20515 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20516 return false;
20517
20518 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20519 bool HasRet = false;
20520 for (SDNode *Node : Copy->users()) {
20521 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20522 return false;
20523 HasRet = true;
20524 }
20525 if (!HasRet)
20526 return false;
20527
20528 Chain = Copy->getOperand(0);
20529 return true;
20530}
20531
20533 return CI->isTailCall();
20534}
20535
20536const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20537#define NODE_NAME_CASE(NODE) \
20538 case RISCVISD::NODE: \
20539 return "RISCVISD::" #NODE;
20540 // clang-format off
20541 switch ((RISCVISD::NodeType)Opcode) {
20543 break;
20544 NODE_NAME_CASE(RET_GLUE)
20545 NODE_NAME_CASE(SRET_GLUE)
20546 NODE_NAME_CASE(MRET_GLUE)
20547 NODE_NAME_CASE(CALL)
20548 NODE_NAME_CASE(TAIL)
20549 NODE_NAME_CASE(SELECT_CC)
20550 NODE_NAME_CASE(BR_CC)
20551 NODE_NAME_CASE(BuildGPRPair)
20552 NODE_NAME_CASE(SplitGPRPair)
20553 NODE_NAME_CASE(BuildPairF64)
20554 NODE_NAME_CASE(SplitF64)
20555 NODE_NAME_CASE(ADD_LO)
20556 NODE_NAME_CASE(HI)
20557 NODE_NAME_CASE(LLA)
20558 NODE_NAME_CASE(ADD_TPREL)
20559 NODE_NAME_CASE(MULHSU)
20560 NODE_NAME_CASE(SHL_ADD)
20561 NODE_NAME_CASE(SLLW)
20562 NODE_NAME_CASE(SRAW)
20563 NODE_NAME_CASE(SRLW)
20564 NODE_NAME_CASE(DIVW)
20565 NODE_NAME_CASE(DIVUW)
20566 NODE_NAME_CASE(REMUW)
20567 NODE_NAME_CASE(ROLW)
20568 NODE_NAME_CASE(RORW)
20569 NODE_NAME_CASE(CLZW)
20570 NODE_NAME_CASE(CTZW)
20571 NODE_NAME_CASE(ABSW)
20572 NODE_NAME_CASE(FMV_H_X)
20573 NODE_NAME_CASE(FMV_X_ANYEXTH)
20574 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20575 NODE_NAME_CASE(FMV_W_X_RV64)
20576 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20577 NODE_NAME_CASE(FCVT_X)
20578 NODE_NAME_CASE(FCVT_XU)
20579 NODE_NAME_CASE(FCVT_W_RV64)
20580 NODE_NAME_CASE(FCVT_WU_RV64)
20581 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20582 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20583 NODE_NAME_CASE(FROUND)
20584 NODE_NAME_CASE(FCLASS)
20585 NODE_NAME_CASE(FSGNJX)
20586 NODE_NAME_CASE(FMAX)
20587 NODE_NAME_CASE(FMIN)
20588 NODE_NAME_CASE(FLI)
20589 NODE_NAME_CASE(READ_COUNTER_WIDE)
20590 NODE_NAME_CASE(BREV8)
20591 NODE_NAME_CASE(ORC_B)
20592 NODE_NAME_CASE(ZIP)
20593 NODE_NAME_CASE(UNZIP)
20594 NODE_NAME_CASE(CLMUL)
20595 NODE_NAME_CASE(CLMULH)
20596 NODE_NAME_CASE(CLMULR)
20597 NODE_NAME_CASE(MOPR)
20598 NODE_NAME_CASE(MOPRR)
20599 NODE_NAME_CASE(SHA256SIG0)
20600 NODE_NAME_CASE(SHA256SIG1)
20601 NODE_NAME_CASE(SHA256SUM0)
20602 NODE_NAME_CASE(SHA256SUM1)
20603 NODE_NAME_CASE(SM4KS)
20604 NODE_NAME_CASE(SM4ED)
20605 NODE_NAME_CASE(SM3P0)
20606 NODE_NAME_CASE(SM3P1)
20607 NODE_NAME_CASE(TH_LWD)
20608 NODE_NAME_CASE(TH_LWUD)
20609 NODE_NAME_CASE(TH_LDD)
20610 NODE_NAME_CASE(TH_SWD)
20611 NODE_NAME_CASE(TH_SDD)
20612 NODE_NAME_CASE(VMV_V_V_VL)
20613 NODE_NAME_CASE(VMV_V_X_VL)
20614 NODE_NAME_CASE(VFMV_V_F_VL)
20615 NODE_NAME_CASE(VMV_X_S)
20616 NODE_NAME_CASE(VMV_S_X_VL)
20617 NODE_NAME_CASE(VFMV_S_F_VL)
20618 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20619 NODE_NAME_CASE(READ_VLENB)
20620 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20621 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20622 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20623 NODE_NAME_CASE(VSLIDEUP_VL)
20624 NODE_NAME_CASE(VSLIDE1UP_VL)
20625 NODE_NAME_CASE(VSLIDEDOWN_VL)
20626 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20627 NODE_NAME_CASE(VFSLIDE1UP_VL)
20628 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20629 NODE_NAME_CASE(VID_VL)
20630 NODE_NAME_CASE(VFNCVT_ROD_VL)
20631 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20632 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20633 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20634 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20635 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20636 NODE_NAME_CASE(VECREDUCE_AND_VL)
20637 NODE_NAME_CASE(VECREDUCE_OR_VL)
20638 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20639 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20640 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20641 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20642 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20643 NODE_NAME_CASE(ADD_VL)
20644 NODE_NAME_CASE(AND_VL)
20645 NODE_NAME_CASE(MUL_VL)
20646 NODE_NAME_CASE(OR_VL)
20647 NODE_NAME_CASE(SDIV_VL)
20648 NODE_NAME_CASE(SHL_VL)
20649 NODE_NAME_CASE(SREM_VL)
20650 NODE_NAME_CASE(SRA_VL)
20651 NODE_NAME_CASE(SRL_VL)
20652 NODE_NAME_CASE(ROTL_VL)
20653 NODE_NAME_CASE(ROTR_VL)
20654 NODE_NAME_CASE(SUB_VL)
20655 NODE_NAME_CASE(UDIV_VL)
20656 NODE_NAME_CASE(UREM_VL)
20657 NODE_NAME_CASE(XOR_VL)
20658 NODE_NAME_CASE(AVGFLOORS_VL)
20659 NODE_NAME_CASE(AVGFLOORU_VL)
20660 NODE_NAME_CASE(AVGCEILS_VL)
20661 NODE_NAME_CASE(AVGCEILU_VL)
20662 NODE_NAME_CASE(SADDSAT_VL)
20663 NODE_NAME_CASE(UADDSAT_VL)
20664 NODE_NAME_CASE(SSUBSAT_VL)
20665 NODE_NAME_CASE(USUBSAT_VL)
20666 NODE_NAME_CASE(FADD_VL)
20667 NODE_NAME_CASE(FSUB_VL)
20668 NODE_NAME_CASE(FMUL_VL)
20669 NODE_NAME_CASE(FDIV_VL)
20670 NODE_NAME_CASE(FNEG_VL)
20671 NODE_NAME_CASE(FABS_VL)
20672 NODE_NAME_CASE(FSQRT_VL)
20673 NODE_NAME_CASE(FCLASS_VL)
20674 NODE_NAME_CASE(VFMADD_VL)
20675 NODE_NAME_CASE(VFNMADD_VL)
20676 NODE_NAME_CASE(VFMSUB_VL)
20677 NODE_NAME_CASE(VFNMSUB_VL)
20678 NODE_NAME_CASE(VFWMADD_VL)
20679 NODE_NAME_CASE(VFWNMADD_VL)
20680 NODE_NAME_CASE(VFWMSUB_VL)
20681 NODE_NAME_CASE(VFWNMSUB_VL)
20682 NODE_NAME_CASE(FCOPYSIGN_VL)
20683 NODE_NAME_CASE(SMIN_VL)
20684 NODE_NAME_CASE(SMAX_VL)
20685 NODE_NAME_CASE(UMIN_VL)
20686 NODE_NAME_CASE(UMAX_VL)
20687 NODE_NAME_CASE(BITREVERSE_VL)
20688 NODE_NAME_CASE(BSWAP_VL)
20689 NODE_NAME_CASE(CTLZ_VL)
20690 NODE_NAME_CASE(CTTZ_VL)
20691 NODE_NAME_CASE(CTPOP_VL)
20692 NODE_NAME_CASE(VFMIN_VL)
20693 NODE_NAME_CASE(VFMAX_VL)
20694 NODE_NAME_CASE(MULHS_VL)
20695 NODE_NAME_CASE(MULHU_VL)
20696 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20697 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20698 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20699 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20700 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20701 NODE_NAME_CASE(SINT_TO_FP_VL)
20702 NODE_NAME_CASE(UINT_TO_FP_VL)
20703 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20704 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20705 NODE_NAME_CASE(FP_EXTEND_VL)
20706 NODE_NAME_CASE(FP_ROUND_VL)
20707 NODE_NAME_CASE(STRICT_FADD_VL)
20708 NODE_NAME_CASE(STRICT_FSUB_VL)
20709 NODE_NAME_CASE(STRICT_FMUL_VL)
20710 NODE_NAME_CASE(STRICT_FDIV_VL)
20711 NODE_NAME_CASE(STRICT_FSQRT_VL)
20712 NODE_NAME_CASE(STRICT_VFMADD_VL)
20713 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20714 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20715 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20716 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20717 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20718 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20719 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20720 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20721 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20722 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20723 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20724 NODE_NAME_CASE(STRICT_FSETCC_VL)
20725 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20726 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20727 NODE_NAME_CASE(VWMUL_VL)
20728 NODE_NAME_CASE(VWMULU_VL)
20729 NODE_NAME_CASE(VWMULSU_VL)
20730 NODE_NAME_CASE(VWADD_VL)
20731 NODE_NAME_CASE(VWADDU_VL)
20732 NODE_NAME_CASE(VWSUB_VL)
20733 NODE_NAME_CASE(VWSUBU_VL)
20734 NODE_NAME_CASE(VWADD_W_VL)
20735 NODE_NAME_CASE(VWADDU_W_VL)
20736 NODE_NAME_CASE(VWSUB_W_VL)
20737 NODE_NAME_CASE(VWSUBU_W_VL)
20738 NODE_NAME_CASE(VWSLL_VL)
20739 NODE_NAME_CASE(VFWMUL_VL)
20740 NODE_NAME_CASE(VFWADD_VL)
20741 NODE_NAME_CASE(VFWSUB_VL)
20742 NODE_NAME_CASE(VFWADD_W_VL)
20743 NODE_NAME_CASE(VFWSUB_W_VL)
20744 NODE_NAME_CASE(VWMACC_VL)
20745 NODE_NAME_CASE(VWMACCU_VL)
20746 NODE_NAME_CASE(VWMACCSU_VL)
20747 NODE_NAME_CASE(SETCC_VL)
20748 NODE_NAME_CASE(VMERGE_VL)
20749 NODE_NAME_CASE(VMAND_VL)
20750 NODE_NAME_CASE(VMOR_VL)
20751 NODE_NAME_CASE(VMXOR_VL)
20752 NODE_NAME_CASE(VMCLR_VL)
20753 NODE_NAME_CASE(VMSET_VL)
20754 NODE_NAME_CASE(VRGATHER_VX_VL)
20755 NODE_NAME_CASE(VRGATHER_VV_VL)
20756 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20757 NODE_NAME_CASE(VSEXT_VL)
20758 NODE_NAME_CASE(VZEXT_VL)
20759 NODE_NAME_CASE(VCPOP_VL)
20760 NODE_NAME_CASE(VFIRST_VL)
20761 NODE_NAME_CASE(READ_CSR)
20762 NODE_NAME_CASE(WRITE_CSR)
20763 NODE_NAME_CASE(SWAP_CSR)
20764 NODE_NAME_CASE(CZERO_EQZ)
20765 NODE_NAME_CASE(CZERO_NEZ)
20766 NODE_NAME_CASE(SW_GUARDED_BRIND)
20767 NODE_NAME_CASE(SW_GUARDED_CALL)
20768 NODE_NAME_CASE(SW_GUARDED_TAIL)
20769 NODE_NAME_CASE(TUPLE_INSERT)
20770 NODE_NAME_CASE(TUPLE_EXTRACT)
20771 NODE_NAME_CASE(SF_VC_XV_SE)
20772 NODE_NAME_CASE(SF_VC_IV_SE)
20773 NODE_NAME_CASE(SF_VC_VV_SE)
20774 NODE_NAME_CASE(SF_VC_FV_SE)
20775 NODE_NAME_CASE(SF_VC_XVV_SE)
20776 NODE_NAME_CASE(SF_VC_IVV_SE)
20777 NODE_NAME_CASE(SF_VC_VVV_SE)
20778 NODE_NAME_CASE(SF_VC_FVV_SE)
20779 NODE_NAME_CASE(SF_VC_XVW_SE)
20780 NODE_NAME_CASE(SF_VC_IVW_SE)
20781 NODE_NAME_CASE(SF_VC_VVW_SE)
20782 NODE_NAME_CASE(SF_VC_FVW_SE)
20783 NODE_NAME_CASE(SF_VC_V_X_SE)
20784 NODE_NAME_CASE(SF_VC_V_I_SE)
20785 NODE_NAME_CASE(SF_VC_V_XV_SE)
20786 NODE_NAME_CASE(SF_VC_V_IV_SE)
20787 NODE_NAME_CASE(SF_VC_V_VV_SE)
20788 NODE_NAME_CASE(SF_VC_V_FV_SE)
20789 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20790 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20791 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20792 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20793 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20794 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20795 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20796 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20797 }
20798 // clang-format on
20799 return nullptr;
20800#undef NODE_NAME_CASE
20801}
20802
20803/// getConstraintType - Given a constraint letter, return the type of
20804/// constraint it is for this target.
20807 if (Constraint.size() == 1) {
20808 switch (Constraint[0]) {
20809 default:
20810 break;
20811 case 'f':
20812 case 'R':
20813 return C_RegisterClass;
20814 case 'I':
20815 case 'J':
20816 case 'K':
20817 return C_Immediate;
20818 case 'A':
20819 return C_Memory;
20820 case 's':
20821 case 'S': // A symbolic address
20822 return C_Other;
20823 }
20824 } else {
20825 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
20826 return C_RegisterClass;
20827 if (Constraint == "cr" || Constraint == "cf")
20828 return C_RegisterClass;
20829 }
20830 return TargetLowering::getConstraintType(Constraint);
20831}
20832
20833std::pair<unsigned, const TargetRegisterClass *>
20835 StringRef Constraint,
20836 MVT VT) const {
20837 // First, see if this is a constraint that directly corresponds to a RISC-V
20838 // register class.
20839 if (Constraint.size() == 1) {
20840 switch (Constraint[0]) {
20841 case 'r':
20842 // TODO: Support fixed vectors up to XLen for P extension?
20843 if (VT.isVector())
20844 break;
20845 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20846 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20847 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20848 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20849 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20850 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20851 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20852 case 'f':
20853 if (VT == MVT::f16) {
20854 if (Subtarget.hasStdExtZfhmin())
20855 return std::make_pair(0U, &RISCV::FPR16RegClass);
20856 if (Subtarget.hasStdExtZhinxmin())
20857 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20858 } else if (VT == MVT::f32) {
20859 if (Subtarget.hasStdExtF())
20860 return std::make_pair(0U, &RISCV::FPR32RegClass);
20861 if (Subtarget.hasStdExtZfinx())
20862 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20863 } else if (VT == MVT::f64) {
20864 if (Subtarget.hasStdExtD())
20865 return std::make_pair(0U, &RISCV::FPR64RegClass);
20866 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20867 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20868 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20869 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20870 }
20871 break;
20872 case 'R':
20873 if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
20874 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20875 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20876 default:
20877 break;
20878 }
20879 } else if (Constraint == "vr") {
20880 for (const auto *RC :
20881 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
20882 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
20883 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
20884 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
20885 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
20886 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
20887 &RISCV::VRN2M4RegClass}) {
20888 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20889 return std::make_pair(0U, RC);
20890 }
20891 } else if (Constraint == "vd") {
20892 for (const auto *RC :
20893 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
20894 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
20895 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
20896 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
20897 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
20898 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
20899 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
20900 &RISCV::VRN2M4NoV0RegClass}) {
20901 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20902 return std::make_pair(0U, RC);
20903 }
20904 } else if (Constraint == "vm") {
20905 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20906 return std::make_pair(0U, &RISCV::VMV0RegClass);
20907 } else if (Constraint == "cr") {
20908 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20909 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20910 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20911 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20912 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20913 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20914 if (!VT.isVector())
20915 return std::make_pair(0U, &RISCV::GPRCRegClass);
20916 } else if (Constraint == "cf") {
20917 if (VT == MVT::f16) {
20918 if (Subtarget.hasStdExtZfhmin())
20919 return std::make_pair(0U, &RISCV::FPR16CRegClass);
20920 if (Subtarget.hasStdExtZhinxmin())
20921 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20922 } else if (VT == MVT::f32) {
20923 if (Subtarget.hasStdExtF())
20924 return std::make_pair(0U, &RISCV::FPR32CRegClass);
20925 if (Subtarget.hasStdExtZfinx())
20926 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20927 } else if (VT == MVT::f64) {
20928 if (Subtarget.hasStdExtD())
20929 return std::make_pair(0U, &RISCV::FPR64CRegClass);
20930 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20931 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20932 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20933 return std::make_pair(0U, &RISCV::GPRCRegClass);
20934 }
20935 }
20936
20937 // Clang will correctly decode the usage of register name aliases into their
20938 // official names. However, other frontends like `rustc` do not. This allows
20939 // users of these frontends to use the ABI names for registers in LLVM-style
20940 // register constraints.
20941 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20942 .Case("{zero}", RISCV::X0)
20943 .Case("{ra}", RISCV::X1)
20944 .Case("{sp}", RISCV::X2)
20945 .Case("{gp}", RISCV::X3)
20946 .Case("{tp}", RISCV::X4)
20947 .Case("{t0}", RISCV::X5)
20948 .Case("{t1}", RISCV::X6)
20949 .Case("{t2}", RISCV::X7)
20950 .Cases("{s0}", "{fp}", RISCV::X8)
20951 .Case("{s1}", RISCV::X9)
20952 .Case("{a0}", RISCV::X10)
20953 .Case("{a1}", RISCV::X11)
20954 .Case("{a2}", RISCV::X12)
20955 .Case("{a3}", RISCV::X13)
20956 .Case("{a4}", RISCV::X14)
20957 .Case("{a5}", RISCV::X15)
20958 .Case("{a6}", RISCV::X16)
20959 .Case("{a7}", RISCV::X17)
20960 .Case("{s2}", RISCV::X18)
20961 .Case("{s3}", RISCV::X19)
20962 .Case("{s4}", RISCV::X20)
20963 .Case("{s5}", RISCV::X21)
20964 .Case("{s6}", RISCV::X22)
20965 .Case("{s7}", RISCV::X23)
20966 .Case("{s8}", RISCV::X24)
20967 .Case("{s9}", RISCV::X25)
20968 .Case("{s10}", RISCV::X26)
20969 .Case("{s11}", RISCV::X27)
20970 .Case("{t3}", RISCV::X28)
20971 .Case("{t4}", RISCV::X29)
20972 .Case("{t5}", RISCV::X30)
20973 .Case("{t6}", RISCV::X31)
20974 .Default(RISCV::NoRegister);
20975 if (XRegFromAlias != RISCV::NoRegister)
20976 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20977
20978 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20979 // TableGen record rather than the AsmName to choose registers for InlineAsm
20980 // constraints, plus we want to match those names to the widest floating point
20981 // register type available, manually select floating point registers here.
20982 //
20983 // The second case is the ABI name of the register, so that frontends can also
20984 // use the ABI names in register constraint lists.
20985 if (Subtarget.hasStdExtF()) {
20986 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20987 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20988 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20989 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20990 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20991 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20992 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20993 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20994 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20995 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20996 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20997 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20998 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20999 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21000 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21001 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21002 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21003 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21004 .Cases("{f17}", "{fa7}", RISCV::F17_F)
21005 .Cases("{f18}", "{fs2}", RISCV::F18_F)
21006 .Cases("{f19}", "{fs3}", RISCV::F19_F)
21007 .Cases("{f20}", "{fs4}", RISCV::F20_F)
21008 .Cases("{f21}", "{fs5}", RISCV::F21_F)
21009 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21010 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21011 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21012 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21013 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21014 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21015 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21016 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21017 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21018 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21019 .Default(RISCV::NoRegister);
21020 if (FReg != RISCV::NoRegister) {
21021 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21022 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21023 unsigned RegNo = FReg - RISCV::F0_F;
21024 unsigned DReg = RISCV::F0_D + RegNo;
21025 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21026 }
21027 if (VT == MVT::f32 || VT == MVT::Other)
21028 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21029 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21030 unsigned RegNo = FReg - RISCV::F0_F;
21031 unsigned HReg = RISCV::F0_H + RegNo;
21032 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21033 }
21034 }
21035 }
21036
21037 if (Subtarget.hasVInstructions()) {
21038 Register VReg = StringSwitch<Register>(Constraint.lower())
21039 .Case("{v0}", RISCV::V0)
21040 .Case("{v1}", RISCV::V1)
21041 .Case("{v2}", RISCV::V2)
21042 .Case("{v3}", RISCV::V3)
21043 .Case("{v4}", RISCV::V4)
21044 .Case("{v5}", RISCV::V5)
21045 .Case("{v6}", RISCV::V6)
21046 .Case("{v7}", RISCV::V7)
21047 .Case("{v8}", RISCV::V8)
21048 .Case("{v9}", RISCV::V9)
21049 .Case("{v10}", RISCV::V10)
21050 .Case("{v11}", RISCV::V11)
21051 .Case("{v12}", RISCV::V12)
21052 .Case("{v13}", RISCV::V13)
21053 .Case("{v14}", RISCV::V14)
21054 .Case("{v15}", RISCV::V15)
21055 .Case("{v16}", RISCV::V16)
21056 .Case("{v17}", RISCV::V17)
21057 .Case("{v18}", RISCV::V18)
21058 .Case("{v19}", RISCV::V19)
21059 .Case("{v20}", RISCV::V20)
21060 .Case("{v21}", RISCV::V21)
21061 .Case("{v22}", RISCV::V22)
21062 .Case("{v23}", RISCV::V23)
21063 .Case("{v24}", RISCV::V24)
21064 .Case("{v25}", RISCV::V25)
21065 .Case("{v26}", RISCV::V26)
21066 .Case("{v27}", RISCV::V27)
21067 .Case("{v28}", RISCV::V28)
21068 .Case("{v29}", RISCV::V29)
21069 .Case("{v30}", RISCV::V30)
21070 .Case("{v31}", RISCV::V31)
21071 .Default(RISCV::NoRegister);
21072 if (VReg != RISCV::NoRegister) {
21073 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21074 return std::make_pair(VReg, &RISCV::VMRegClass);
21075 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21076 return std::make_pair(VReg, &RISCV::VRRegClass);
21077 for (const auto *RC :
21078 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21079 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21080 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21081 return std::make_pair(VReg, RC);
21082 }
21083 }
21084 }
21085 }
21086
21087 std::pair<Register, const TargetRegisterClass *> Res =
21089
21090 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21091 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21092 // Subtarget into account.
21093 if (Res.second == &RISCV::GPRF16RegClass ||
21094 Res.second == &RISCV::GPRF32RegClass ||
21095 Res.second == &RISCV::GPRPairRegClass)
21096 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21097
21098 return Res;
21099}
21100
21103 // Currently only support length 1 constraints.
21104 if (ConstraintCode.size() == 1) {
21105 switch (ConstraintCode[0]) {
21106 case 'A':
21108 default:
21109 break;
21110 }
21111 }
21112
21113 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21114}
21115
21117 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21118 SelectionDAG &DAG) const {
21119 // Currently only support length 1 constraints.
21120 if (Constraint.size() == 1) {
21121 switch (Constraint[0]) {
21122 case 'I':
21123 // Validate & create a 12-bit signed immediate operand.
21124 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21125 uint64_t CVal = C->getSExtValue();
21126 if (isInt<12>(CVal))
21127 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21128 Subtarget.getXLenVT()));
21129 }
21130 return;
21131 case 'J':
21132 // Validate & create an integer zero operand.
21133 if (isNullConstant(Op))
21134 Ops.push_back(
21135 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21136 return;
21137 case 'K':
21138 // Validate & create a 5-bit unsigned immediate operand.
21139 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21140 uint64_t CVal = C->getZExtValue();
21141 if (isUInt<5>(CVal))
21142 Ops.push_back(
21143 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21144 }
21145 return;
21146 case 'S':
21148 return;
21149 default:
21150 break;
21151 }
21152 }
21153 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21154}
21155
21157 Instruction *Inst,
21158 AtomicOrdering Ord) const {
21159 if (Subtarget.hasStdExtZtso()) {
21160 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21161 return Builder.CreateFence(Ord);
21162 return nullptr;
21163 }
21164
21165 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21166 return Builder.CreateFence(Ord);
21167 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21168 return Builder.CreateFence(AtomicOrdering::Release);
21169 return nullptr;
21170}
21171
21173 Instruction *Inst,
21174 AtomicOrdering Ord) const {
21175 if (Subtarget.hasStdExtZtso()) {
21176 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21177 return Builder.CreateFence(Ord);
21178 return nullptr;
21179 }
21180
21181 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21182 return Builder.CreateFence(AtomicOrdering::Acquire);
21183 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21186 return nullptr;
21187}
21188
21191 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21192 // point operations can't be used in an lr/sc sequence without breaking the
21193 // forward-progress guarantee.
21194 if (AI->isFloatingPointOperation() ||
21200
21201 // Don't expand forced atomics, we want to have __sync libcalls instead.
21202 if (Subtarget.hasForcedAtomics())
21204
21205 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21206 if (AI->getOperation() == AtomicRMWInst::Nand) {
21207 if (Subtarget.hasStdExtZacas() &&
21208 (Size >= 32 || Subtarget.hasStdExtZabha()))
21210 if (Size < 32)
21212 }
21213
21214 if (Size < 32 && !Subtarget.hasStdExtZabha())
21216
21218}
21219
21220static Intrinsic::ID
21222 if (XLen == 32) {
21223 switch (BinOp) {
21224 default:
21225 llvm_unreachable("Unexpected AtomicRMW BinOp");
21227 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21228 case AtomicRMWInst::Add:
21229 return Intrinsic::riscv_masked_atomicrmw_add_i32;
21230 case AtomicRMWInst::Sub:
21231 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21233 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21234 case AtomicRMWInst::Max:
21235 return Intrinsic::riscv_masked_atomicrmw_max_i32;
21236 case AtomicRMWInst::Min:
21237 return Intrinsic::riscv_masked_atomicrmw_min_i32;
21239 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21241 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21242 }
21243 }
21244
21245 if (XLen == 64) {
21246 switch (BinOp) {
21247 default:
21248 llvm_unreachable("Unexpected AtomicRMW BinOp");
21250 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21251 case AtomicRMWInst::Add:
21252 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21253 case AtomicRMWInst::Sub:
21254 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21256 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21257 case AtomicRMWInst::Max:
21258 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21259 case AtomicRMWInst::Min:
21260 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21262 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21264 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21265 }
21266 }
21267
21268 llvm_unreachable("Unexpected XLen\n");
21269}
21270
21272 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21273 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21274 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21275 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21276 // mask, as this produces better code than the LR/SC loop emitted by
21277 // int_riscv_masked_atomicrmw_xchg.
21278 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21279 isa<ConstantInt>(AI->getValOperand())) {
21280 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21281 if (CVal->isZero())
21282 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21283 Builder.CreateNot(Mask, "Inv_Mask"),
21284 AI->getAlign(), Ord);
21285 if (CVal->isMinusOne())
21286 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21287 AI->getAlign(), Ord);
21288 }
21289
21290 unsigned XLen = Subtarget.getXLen();
21291 Value *Ordering =
21292 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21293 Type *Tys[] = {AlignedAddr->getType()};
21295 AI->getModule(),
21297
21298 if (XLen == 64) {
21299 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21300 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21301 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21302 }
21303
21304 Value *Result;
21305
21306 // Must pass the shift amount needed to sign extend the loaded value prior
21307 // to performing a signed comparison for min/max. ShiftAmt is the number of
21308 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21309 // is the number of bits to left+right shift the value in order to
21310 // sign-extend.
21311 if (AI->getOperation() == AtomicRMWInst::Min ||
21313 const DataLayout &DL = AI->getDataLayout();
21314 unsigned ValWidth =
21315 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21316 Value *SextShamt =
21317 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21318 Result = Builder.CreateCall(LrwOpScwLoop,
21319 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21320 } else {
21321 Result =
21322 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21323 }
21324
21325 if (XLen == 64)
21326 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21327 return Result;
21328}
21329
21332 AtomicCmpXchgInst *CI) const {
21333 // Don't expand forced atomics, we want to have __sync libcalls instead.
21334 if (Subtarget.hasForcedAtomics())
21336
21338 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21339 (Size == 8 || Size == 16))
21342}
21343
21345 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21346 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21347 unsigned XLen = Subtarget.getXLen();
21348 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21349 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21350 if (XLen == 64) {
21351 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21352 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21353 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21354 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21355 }
21356 Type *Tys[] = {AlignedAddr->getType()};
21357 Value *Result = Builder.CreateIntrinsic(
21358 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21359 if (XLen == 64)
21360 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21361 return Result;
21362}
21363
21365 EVT DataVT) const {
21366 // We have indexed loads for all supported EEW types. Indices are always
21367 // zero extended.
21368 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21369 isTypeLegal(Extend.getValueType()) &&
21370 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21371 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21372}
21373
21375 EVT VT) const {
21376 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21377 return false;
21378
21379 switch (FPVT.getSimpleVT().SimpleTy) {
21380 case MVT::f16:
21381 return Subtarget.hasStdExtZfhmin();
21382 case MVT::f32:
21383 return Subtarget.hasStdExtF();
21384 case MVT::f64:
21385 return Subtarget.hasStdExtD();
21386 default:
21387 return false;
21388 }
21389}
21390
21392 // If we are using the small code model, we can reduce size of jump table
21393 // entry to 4 bytes.
21394 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21397 }
21399}
21400
21402 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21403 unsigned uid, MCContext &Ctx) const {
21404 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21406 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21407}
21408
21410 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21411 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21412 // a power of two as well.
21413 // FIXME: This doesn't work for zve32, but that's already broken
21414 // elsewhere for the same reason.
21415 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21416 static_assert(RISCV::RVVBitsPerBlock == 64,
21417 "RVVBitsPerBlock changed, audit needed");
21418 return true;
21419}
21420
21422 SDValue &Offset,
21424 SelectionDAG &DAG) const {
21425 // Target does not support indexed loads.
21426 if (!Subtarget.hasVendorXTHeadMemIdx())
21427 return false;
21428
21429 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21430 return false;
21431
21432 Base = Op->getOperand(0);
21433 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21434 int64_t RHSC = RHS->getSExtValue();
21435 if (Op->getOpcode() == ISD::SUB)
21436 RHSC = -(uint64_t)RHSC;
21437
21438 // The constants that can be encoded in the THeadMemIdx instructions
21439 // are of the form (sign_extend(imm5) << imm2).
21440 bool isLegalIndexedOffset = false;
21441 for (unsigned i = 0; i < 4; i++)
21442 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21443 isLegalIndexedOffset = true;
21444 break;
21445 }
21446
21447 if (!isLegalIndexedOffset)
21448 return false;
21449
21450 Offset = Op->getOperand(1);
21451 return true;
21452 }
21453
21454 return false;
21455}
21456
21458 SDValue &Offset,
21460 SelectionDAG &DAG) const {
21461 EVT VT;
21462 SDValue Ptr;
21463 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21464 VT = LD->getMemoryVT();
21465 Ptr = LD->getBasePtr();
21466 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21467 VT = ST->getMemoryVT();
21468 Ptr = ST->getBasePtr();
21469 } else
21470 return false;
21471
21472 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21473 return false;
21474
21475 AM = ISD::PRE_INC;
21476 return true;
21477}
21478
21480 SDValue &Base,
21481 SDValue &Offset,
21483 SelectionDAG &DAG) const {
21484 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21485 if (Op->getOpcode() != ISD::ADD)
21486 return false;
21487
21488 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21489 Base = LS->getBasePtr();
21490 else
21491 return false;
21492
21493 if (Base == Op->getOperand(0))
21494 Offset = Op->getOperand(1);
21495 else if (Base == Op->getOperand(1))
21496 Offset = Op->getOperand(0);
21497 else
21498 return false;
21499
21500 AM = ISD::POST_INC;
21501 return true;
21502 }
21503
21504 EVT VT;
21505 SDValue Ptr;
21506 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21507 VT = LD->getMemoryVT();
21508 Ptr = LD->getBasePtr();
21509 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21510 VT = ST->getMemoryVT();
21511 Ptr = ST->getBasePtr();
21512 } else
21513 return false;
21514
21515 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21516 return false;
21517 // Post-indexing updates the base, so it's not a valid transform
21518 // if that's not the same as the load's pointer.
21519 if (Ptr != Base)
21520 return false;
21521
21522 AM = ISD::POST_INC;
21523 return true;
21524}
21525
21527 EVT VT) const {
21528 EVT SVT = VT.getScalarType();
21529
21530 if (!SVT.isSimple())
21531 return false;
21532
21533 switch (SVT.getSimpleVT().SimpleTy) {
21534 case MVT::f16:
21535 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21536 : Subtarget.hasStdExtZfhOrZhinx();
21537 case MVT::f32:
21538 return Subtarget.hasStdExtFOrZfinx();
21539 case MVT::f64:
21540 return Subtarget.hasStdExtDOrZdinx();
21541 default:
21542 break;
21543 }
21544
21545 return false;
21546}
21547
21549 // Zacas will use amocas.w which does not require extension.
21550 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21551}
21552
21554 const Constant *PersonalityFn) const {
21555 return RISCV::X10;
21556}
21557
21559 const Constant *PersonalityFn) const {
21560 return RISCV::X11;
21561}
21562
21564 // Return false to suppress the unnecessary extensions if the LibCall
21565 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21566 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21567 Type.getSizeInBits() < Subtarget.getXLen()))
21568 return false;
21569
21570 return true;
21571}
21572
21574 bool IsSigned) const {
21575 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21576 return true;
21577
21578 return IsSigned;
21579}
21580
21582 SDValue C) const {
21583 // Check integral scalar types.
21584 if (!VT.isScalarInteger())
21585 return false;
21586
21587 // Omit the optimization if the sub target has the M extension and the data
21588 // size exceeds XLen.
21589 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21590 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21591 return false;
21592
21593 auto *ConstNode = cast<ConstantSDNode>(C);
21594 const APInt &Imm = ConstNode->getAPIntValue();
21595
21596 // Break the MUL to a SLLI and an ADD/SUB.
21597 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21598 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21599 return true;
21600
21601 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21602 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21603 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21604 (Imm - 8).isPowerOf2()))
21605 return true;
21606
21607 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21608 // a pair of LUI/ADDI.
21609 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21610 ConstNode->hasOneUse()) {
21611 APInt ImmS = Imm.ashr(Imm.countr_zero());
21612 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21613 (1 - ImmS).isPowerOf2())
21614 return true;
21615 }
21616
21617 return false;
21618}
21619
21621 SDValue ConstNode) const {
21622 // Let the DAGCombiner decide for vectors.
21623 EVT VT = AddNode.getValueType();
21624 if (VT.isVector())
21625 return true;
21626
21627 // Let the DAGCombiner decide for larger types.
21628 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21629 return true;
21630
21631 // It is worse if c1 is simm12 while c1*c2 is not.
21632 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21633 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21634 const APInt &C1 = C1Node->getAPIntValue();
21635 const APInt &C2 = C2Node->getAPIntValue();
21636 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21637 return false;
21638
21639 // Default to true and let the DAGCombiner decide.
21640 return true;
21641}
21642
21644 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21645 unsigned *Fast) const {
21646 if (!VT.isVector()) {
21647 if (Fast)
21648 *Fast = Subtarget.enableUnalignedScalarMem();
21649 return Subtarget.enableUnalignedScalarMem();
21650 }
21651
21652 // All vector implementations must support element alignment
21653 EVT ElemVT = VT.getVectorElementType();
21654 if (Alignment >= ElemVT.getStoreSize()) {
21655 if (Fast)
21656 *Fast = 1;
21657 return true;
21658 }
21659
21660 // Note: We lower an unmasked unaligned vector access to an equally sized
21661 // e8 element type access. Given this, we effectively support all unmasked
21662 // misaligned accesses. TODO: Work through the codegen implications of
21663 // allowing such accesses to be formed, and considered fast.
21664 if (Fast)
21665 *Fast = Subtarget.enableUnalignedVectorMem();
21666 return Subtarget.enableUnalignedVectorMem();
21667}
21668
21669
21671 const AttributeList &FuncAttributes) const {
21672 if (!Subtarget.hasVInstructions())
21673 return MVT::Other;
21674
21675 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21676 return MVT::Other;
21677
21678 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21679 // has an expansion threshold, and we want the number of hardware memory
21680 // operations to correspond roughly to that threshold. LMUL>1 operations
21681 // are typically expanded linearly internally, and thus correspond to more
21682 // than one actual memory operation. Note that store merging and load
21683 // combining will typically form larger LMUL operations from the LMUL1
21684 // operations emitted here, and that's okay because combining isn't
21685 // introducing new memory operations; it's just merging existing ones.
21686 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21687 if (Op.size() < MinVLenInBytes)
21688 // TODO: Figure out short memops. For the moment, do the default thing
21689 // which ends up using scalar sequences.
21690 return MVT::Other;
21691
21692 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21693 // fixed vectors.
21694 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21695 return MVT::Other;
21696
21697 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21698 // a large scalar constant and instead use vmv.v.x/i to do the
21699 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21700 // maximize the chance we can encode the size in the vsetvli.
21701 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21702 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21703
21704 // Do we have sufficient alignment for our preferred VT? If not, revert
21705 // to largest size allowed by our alignment criteria.
21706 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21707 Align RequiredAlign(PreferredVT.getStoreSize());
21708 if (Op.isFixedDstAlign())
21709 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21710 if (Op.isMemcpy())
21711 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21712 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21713 }
21714 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21715}
21716
21718 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21719 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21720 bool IsABIRegCopy = CC.has_value();
21721 EVT ValueVT = Val.getValueType();
21722
21723 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21724 if ((ValueVT == PairVT ||
21725 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21726 ValueVT == MVT::f64)) &&
21727 NumParts == 1 && PartVT == MVT::Untyped) {
21728 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21729 MVT XLenVT = Subtarget.getXLenVT();
21730 if (ValueVT == MVT::f64)
21731 Val = DAG.getBitcast(MVT::i64, Val);
21732 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
21733 // Always creating an MVT::Untyped part, so always use
21734 // RISCVISD::BuildGPRPair.
21735 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
21736 return true;
21737 }
21738
21739 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21740 PartVT == MVT::f32) {
21741 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21742 // nan, and cast to f32.
21743 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21744 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21745 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21746 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21747 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21748 Parts[0] = Val;
21749 return true;
21750 }
21751
21752 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
21753#ifndef NDEBUG
21754 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
21755 [[maybe_unused]] unsigned ValLMUL =
21757 ValNF * RISCV::RVVBitsPerBlock);
21758 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
21759 [[maybe_unused]] unsigned PartLMUL =
21761 PartNF * RISCV::RVVBitsPerBlock);
21762 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
21763 "RISC-V vector tuple type only accepts same register class type "
21764 "TUPLE_INSERT");
21765#endif
21766
21767 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
21768 Val, DAG.getVectorIdxConstant(0, DL));
21769 Parts[0] = Val;
21770 return true;
21771 }
21772
21773 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21774 LLVMContext &Context = *DAG.getContext();
21775 EVT ValueEltVT = ValueVT.getVectorElementType();
21776 EVT PartEltVT = PartVT.getVectorElementType();
21777 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21778 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21779 if (PartVTBitSize % ValueVTBitSize == 0) {
21780 assert(PartVTBitSize >= ValueVTBitSize);
21781 // If the element types are different, bitcast to the same element type of
21782 // PartVT first.
21783 // Give an example here, we want copy a <vscale x 1 x i8> value to
21784 // <vscale x 4 x i16>.
21785 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21786 // subvector, then we can bitcast to <vscale x 4 x i16>.
21787 if (ValueEltVT != PartEltVT) {
21788 if (PartVTBitSize > ValueVTBitSize) {
21789 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21790 assert(Count != 0 && "The number of element should not be zero.");
21791 EVT SameEltTypeVT =
21792 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21793 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21794 DAG.getUNDEF(SameEltTypeVT), Val,
21795 DAG.getVectorIdxConstant(0, DL));
21796 }
21797 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21798 } else {
21799 Val =
21800 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21801 Val, DAG.getVectorIdxConstant(0, DL));
21802 }
21803 Parts[0] = Val;
21804 return true;
21805 }
21806 }
21807
21808 return false;
21809}
21810
21812 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21813 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21814 bool IsABIRegCopy = CC.has_value();
21815
21816 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21817 if ((ValueVT == PairVT ||
21818 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21819 ValueVT == MVT::f64)) &&
21820 NumParts == 1 && PartVT == MVT::Untyped) {
21821 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21822 MVT XLenVT = Subtarget.getXLenVT();
21823
21824 SDValue Val = Parts[0];
21825 // Always starting with an MVT::Untyped part, so always use
21826 // RISCVISD::SplitGPRPair
21827 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
21828 Val);
21829 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
21830 Val.getValue(1));
21831 if (ValueVT == MVT::f64)
21832 Val = DAG.getBitcast(ValueVT, Val);
21833 return Val;
21834 }
21835
21836 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21837 PartVT == MVT::f32) {
21838 SDValue Val = Parts[0];
21839
21840 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21841 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21842 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21843 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21844 return Val;
21845 }
21846
21847 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21848 LLVMContext &Context = *DAG.getContext();
21849 SDValue Val = Parts[0];
21850 EVT ValueEltVT = ValueVT.getVectorElementType();
21851 EVT PartEltVT = PartVT.getVectorElementType();
21852 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21853 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21854 if (PartVTBitSize % ValueVTBitSize == 0) {
21855 assert(PartVTBitSize >= ValueVTBitSize);
21856 EVT SameEltTypeVT = ValueVT;
21857 // If the element types are different, convert it to the same element type
21858 // of PartVT.
21859 // Give an example here, we want copy a <vscale x 1 x i8> value from
21860 // <vscale x 4 x i16>.
21861 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21862 // then we can extract <vscale x 1 x i8>.
21863 if (ValueEltVT != PartEltVT) {
21864 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21865 assert(Count != 0 && "The number of element should not be zero.");
21866 SameEltTypeVT =
21867 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21868 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21869 }
21870 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21871 DAG.getVectorIdxConstant(0, DL));
21872 return Val;
21873 }
21874 }
21875 return SDValue();
21876}
21877
21879 // When aggressively optimizing for code size, we prefer to use a div
21880 // instruction, as it is usually smaller than the alternative sequence.
21881 // TODO: Add vector division?
21882 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21883 return OptSize && !VT.isVector();
21884}
21885
21887 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21888 // some situation.
21889 unsigned Opc = N->getOpcode();
21890 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21891 return false;
21892 return true;
21893}
21894
21895static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21896 Module *M = IRB.GetInsertBlock()->getModule();
21897 Function *ThreadPointerFunc =
21898 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
21899 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21900 IRB.CreateCall(ThreadPointerFunc), Offset);
21901}
21902
21904 // Fuchsia provides a fixed TLS slot for the stack cookie.
21905 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21906 if (Subtarget.isTargetFuchsia())
21907 return useTpOffset(IRB, -0x10);
21908
21909 // Android provides a fixed TLS slot for the stack cookie. See the definition
21910 // of TLS_SLOT_STACK_GUARD in
21911 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21912 if (Subtarget.isTargetAndroid())
21913 return useTpOffset(IRB, -0x18);
21914
21915 Module *M = IRB.GetInsertBlock()->getModule();
21916
21917 if (M->getStackProtectorGuard() == "tls") {
21918 // Users must specify the offset explicitly
21919 int Offset = M->getStackProtectorGuardOffset();
21920 return useTpOffset(IRB, Offset);
21921 }
21922
21924}
21925
21927 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21928 const DataLayout &DL) const {
21929 EVT VT = getValueType(DL, VTy);
21930 // Don't lower vlseg/vsseg for vector types that can't be split.
21931 if (!isTypeLegal(VT))
21932 return false;
21933
21935 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21936 Alignment))
21937 return false;
21938
21939 MVT ContainerVT = VT.getSimpleVT();
21940
21941 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21942 if (!Subtarget.useRVVForFixedLengthVectors())
21943 return false;
21944 // Sometimes the interleaved access pass picks up splats as interleaves of
21945 // one element. Don't lower these.
21946 if (FVTy->getNumElements() < 2)
21947 return false;
21948
21950 } else {
21951 // The intrinsics for scalable vectors are not overloaded on pointer type
21952 // and can only handle the default address space.
21953 if (AddrSpace)
21954 return false;
21955 }
21956
21957 // Need to make sure that EMUL * NFIELDS ≤ 8
21958 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21959 if (Fractional)
21960 return true;
21961 return Factor * LMUL <= 8;
21962}
21963
21965 Align Alignment) const {
21966 if (!Subtarget.hasVInstructions())
21967 return false;
21968
21969 // Only support fixed vectors if we know the minimum vector size.
21970 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21971 return false;
21972
21973 EVT ScalarType = DataType.getScalarType();
21974 if (!isLegalElementTypeForRVV(ScalarType))
21975 return false;
21976
21977 if (!Subtarget.enableUnalignedVectorMem() &&
21978 Alignment < ScalarType.getStoreSize())
21979 return false;
21980
21981 return true;
21982}
21983
21985 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21986 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21987 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21988 Intrinsic::riscv_seg8_load};
21989
21990/// Lower an interleaved load into a vlsegN intrinsic.
21991///
21992/// E.g. Lower an interleaved load (Factor = 2):
21993/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21994/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21995/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21996///
21997/// Into:
21998/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21999/// %ptr, i64 4)
22000/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22001/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22004 ArrayRef<unsigned> Indices, unsigned Factor) const {
22005 assert(Indices.size() == Shuffles.size());
22006
22007 IRBuilder<> Builder(LI);
22008
22009 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22010 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22012 LI->getDataLayout()))
22013 return false;
22014
22015 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22016
22017 // If the segment load is going to be performed segment at a time anyways
22018 // and there's only one element used, use a strided load instead. This
22019 // will be equally fast, and create less vector register pressure.
22020 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22021 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22022 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22023 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22024 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22025 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22026 Value *VL = Builder.getInt32(VTy->getNumElements());
22027
22028 CallInst *CI =
22029 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22030 {VTy, BasePtr->getType(), Stride->getType()},
22031 {BasePtr, Stride, Mask, VL});
22032 CI->addParamAttr(
22034 Shuffles[0]->replaceAllUsesWith(CI);
22035 return true;
22036 };
22037
22038 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22039
22040 CallInst *VlsegN = Builder.CreateIntrinsic(
22041 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22042 {LI->getPointerOperand(), VL});
22043
22044 for (unsigned i = 0; i < Shuffles.size(); i++) {
22045 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22046 Shuffles[i]->replaceAllUsesWith(SubVec);
22047 }
22048
22049 return true;
22050}
22051
22053 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22054 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22055 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22056 Intrinsic::riscv_seg8_store};
22057
22058/// Lower an interleaved store into a vssegN intrinsic.
22059///
22060/// E.g. Lower an interleaved store (Factor = 3):
22061/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22062/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22063/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22064///
22065/// Into:
22066/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22067/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22068/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22069/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22070/// %ptr, i32 4)
22071///
22072/// Note that the new shufflevectors will be removed and we'll only generate one
22073/// vsseg3 instruction in CodeGen.
22075 ShuffleVectorInst *SVI,
22076 unsigned Factor) const {
22077 IRBuilder<> Builder(SI);
22078 auto Mask = SVI->getShuffleMask();
22079 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22080 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22081 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22082 ShuffleVTy->getNumElements() / Factor);
22083 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22084 SI->getPointerAddressSpace(),
22085 SI->getDataLayout()))
22086 return false;
22087
22088 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22089
22090 unsigned Index;
22091 // If the segment store only has one active lane (i.e. the interleave is
22092 // just a spread shuffle), we can use a strided store instead. This will
22093 // be equally fast, and create less vector register pressure.
22094 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22095 isSpreadMask(Mask, Factor, Index)) {
22096 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22097 Value *Data = SVI->getOperand(0);
22098 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22099 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22100 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22101 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22102 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22103 Value *VL = Builder.getInt32(VTy->getNumElements());
22104
22105 CallInst *CI = Builder.CreateIntrinsic(
22106 Intrinsic::experimental_vp_strided_store,
22107 {Data->getType(), BasePtr->getType(), Stride->getType()},
22108 {Data, BasePtr, Stride, Mask, VL});
22109 CI->addParamAttr(
22110 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22111
22112 return true;
22113 }
22114
22116 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22117 {VTy, SI->getPointerOperandType(), XLenTy});
22118
22120
22121 for (unsigned i = 0; i < Factor; i++) {
22122 Value *Shuffle = Builder.CreateShuffleVector(
22123 SVI->getOperand(0), SVI->getOperand(1),
22124 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22125 Ops.push_back(Shuffle);
22126 }
22127 // This VL should be OK (should be executable in one vsseg instruction,
22128 // potentially under larger LMULs) because we checked that the fixed vector
22129 // type fits in isLegalInterleavedAccessType
22130 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22131 Ops.append({SI->getPointerOperand(), VL});
22132
22133 Builder.CreateCall(VssegNFunc, Ops);
22134
22135 return true;
22136}
22137
22139 IntrinsicInst *DI, LoadInst *LI,
22140 SmallVectorImpl<Instruction *> &DeadInsts) const {
22141 assert(LI->isSimple());
22142 IRBuilder<> Builder(LI);
22143
22144 // Only deinterleave2 supported at present.
22145 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
22146 return false;
22147
22148 const unsigned Factor = 2;
22149
22150 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
22151 const DataLayout &DL = LI->getDataLayout();
22152
22153 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22154 LI->getPointerAddressSpace(), DL))
22155 return false;
22156
22157 Value *Return;
22158 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22159
22160 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22161 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22162 Return =
22163 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22164 {ResVTy, LI->getPointerOperandType(), XLenTy},
22165 {LI->getPointerOperand(), VL});
22166 } else {
22167 static const Intrinsic::ID IntrIds[] = {
22168 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22169 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22170 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22171 Intrinsic::riscv_vlseg8};
22172
22173 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22174 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22175 Type *VecTupTy = TargetExtType::get(
22176 LI->getContext(), "riscv.vector.tuple",
22178 NumElts * SEW / 8),
22179 Factor);
22180
22181 Value *VL = Constant::getAllOnesValue(XLenTy);
22182
22183 Value *Vlseg = Builder.CreateIntrinsic(
22184 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22185 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22186 ConstantInt::get(XLenTy, Log2_64(SEW))});
22187
22188 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22189 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22190 for (unsigned i = 0; i < Factor; ++i) {
22191 Value *VecExtract = Builder.CreateIntrinsic(
22192 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22193 {Vlseg, Builder.getInt32(i)});
22194 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22195 }
22196 }
22197
22198 DI->replaceAllUsesWith(Return);
22199
22200 return true;
22201}
22202
22205 SmallVectorImpl<Instruction *> &DeadInsts) const {
22206 assert(SI->isSimple());
22207 IRBuilder<> Builder(SI);
22208
22209 // Only interleave2 supported at present.
22210 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
22211 return false;
22212
22213 const unsigned Factor = 2;
22214
22215 VectorType *InVTy = cast<VectorType>(II->getArgOperand(0)->getType());
22216 const DataLayout &DL = SI->getDataLayout();
22217
22218 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22219 SI->getPointerAddressSpace(), DL))
22220 return false;
22221
22222 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22223
22224 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22225 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22226 Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
22227 {InVTy, SI->getPointerOperandType(), XLenTy},
22228 {II->getArgOperand(0), II->getArgOperand(1),
22229 SI->getPointerOperand(), VL});
22230 } else {
22231 static const Intrinsic::ID IntrIds[] = {
22232 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22233 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22234 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22235 Intrinsic::riscv_vsseg8};
22236
22237 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22238 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22239 Type *VecTupTy = TargetExtType::get(
22240 SI->getContext(), "riscv.vector.tuple",
22241 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22242 NumElts * SEW / 8),
22243 Factor);
22244
22246 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22247
22248 Value *VL = Constant::getAllOnesValue(XLenTy);
22249
22250 Value *StoredVal = PoisonValue::get(VecTupTy);
22251 for (unsigned i = 0; i < Factor; ++i)
22252 StoredVal = Builder.CreateIntrinsic(
22253 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22254 {StoredVal, II->getArgOperand(i), Builder.getInt32(i)});
22255
22256 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22257 ConstantInt::get(XLenTy, Log2_64(SEW))});
22258 }
22259
22260 return true;
22261}
22262
22266 const TargetInstrInfo *TII) const {
22267 assert(MBBI->isCall() && MBBI->getCFIType() &&
22268 "Invalid call instruction for a KCFI check");
22269 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22270 MBBI->getOpcode()));
22271
22272 MachineOperand &Target = MBBI->getOperand(0);
22273 Target.setIsRenamable(false);
22274
22275 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22276 .addReg(Target.getReg())
22277 .addImm(MBBI->getCFIType())
22278 .getInstr();
22279}
22280
22281#define GET_REGISTER_MATCHER
22282#include "RISCVGenAsmMatcher.inc"
22283
22286 const MachineFunction &MF) const {
22288 if (Reg == RISCV::NoRegister)
22290 if (Reg == RISCV::NoRegister)
22292 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22293 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22294 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22295 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22296 StringRef(RegName) + "\"."));
22297 return Reg;
22298}
22299
22302 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22303
22304 if (NontemporalInfo == nullptr)
22306
22307 // 1 for default value work as __RISCV_NTLH_ALL
22308 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22309 // 3 -> __RISCV_NTLH_ALL_PRIVATE
22310 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22311 // 5 -> __RISCV_NTLH_ALL
22312 int NontemporalLevel = 5;
22313 const MDNode *RISCVNontemporalInfo =
22314 I.getMetadata("riscv-nontemporal-domain");
22315 if (RISCVNontemporalInfo != nullptr)
22316 NontemporalLevel =
22317 cast<ConstantInt>(
22318 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22319 ->getValue())
22320 ->getZExtValue();
22321
22322 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22323 "RISC-V target doesn't support this non-temporal domain.");
22324
22325 NontemporalLevel -= 2;
22327 if (NontemporalLevel & 0b1)
22328 Flags |= MONontemporalBit0;
22329 if (NontemporalLevel & 0b10)
22330 Flags |= MONontemporalBit1;
22331
22332 return Flags;
22333}
22334
22337
22338 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22340 TargetFlags |= (NodeFlags & MONontemporalBit0);
22341 TargetFlags |= (NodeFlags & MONontemporalBit1);
22342 return TargetFlags;
22343}
22344
22346 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22347 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22348}
22349
22351 if (VT.isScalableVector())
22352 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22353 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22354 return true;
22355 return Subtarget.hasStdExtZbb() &&
22356 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22357}
22358
22360 ISD::CondCode Cond) const {
22361 return isCtpopFast(VT) ? 0 : 1;
22362}
22363
22365 const Instruction *I) const {
22366 if (Subtarget.hasStdExtZalasr()) {
22367 if (Subtarget.hasStdExtZtso()) {
22368 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22369 // should be lowered to plain load/store. The easiest way to do this is
22370 // to say we should insert fences for them, and the fence insertion code
22371 // will just not insert any fences
22372 auto *LI = dyn_cast<LoadInst>(I);
22373 auto *SI = dyn_cast<StoreInst>(I);
22374 if ((LI &&
22375 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22376 (SI &&
22377 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22378 // Here, this is a load or store which is seq_cst, and needs a .aq or
22379 // .rl therefore we shouldn't try to insert fences
22380 return false;
22381 }
22382 // Here, we are a TSO inst that isn't a seq_cst load/store
22383 return isa<LoadInst>(I) || isa<StoreInst>(I);
22384 }
22385 return false;
22386 }
22387 // Note that one specific case requires fence insertion for an
22388 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22389 // than this hook due to limitations in the interface here.
22390 return isa<LoadInst>(I) || isa<StoreInst>(I);
22391}
22392
22394
22395 // GISel support is in progress or complete for these opcodes.
22396 unsigned Op = Inst.getOpcode();
22397 if (Op == Instruction::Add || Op == Instruction::Sub ||
22398 Op == Instruction::And || Op == Instruction::Or ||
22399 Op == Instruction::Xor || Op == Instruction::InsertElement ||
22400 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22401 Op == Instruction::Freeze || Op == Instruction::Store)
22402 return false;
22403
22404 if (Inst.getType()->isScalableTy())
22405 return true;
22406
22407 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22408 if (Inst.getOperand(i)->getType()->isScalableTy() &&
22409 !isa<ReturnInst>(&Inst))
22410 return true;
22411
22412 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22413 if (AI->getAllocatedType()->isScalableTy())
22414 return true;
22415 }
22416
22417 return false;
22418}
22419
22420SDValue
22421RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22422 SelectionDAG &DAG,
22423 SmallVectorImpl<SDNode *> &Created) const {
22425 if (isIntDivCheap(N->getValueType(0), Attr))
22426 return SDValue(N, 0); // Lower SDIV as SDIV
22427
22428 // Only perform this transform if short forward branch opt is supported.
22429 if (!Subtarget.hasShortForwardBranchOpt())
22430 return SDValue();
22431 EVT VT = N->getValueType(0);
22432 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22433 return SDValue();
22434
22435 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22436 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22437 return SDValue();
22438 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22439}
22440
22441bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22442 EVT VT, const APInt &AndMask) const {
22443 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22444 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22446}
22447
22448unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22449 return Subtarget.getMinimumJumpTableEntries();
22450}
22451
22454 int JTI,
22455 SelectionDAG &DAG) const {
22456 if (Subtarget.hasStdExtZicfilp()) {
22457 // When Zicfilp enabled, we need to use software guarded branch for jump
22458 // table branch.
22459 SDValue Chain = Value;
22460 // Jump table debug info is only needed if CodeView is enabled.
22462 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22463 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22464 }
22465 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22466}
22467
22468// If an output pattern produces multiple instructions tablegen may pick an
22469// arbitrary type from an instructions destination register class to use for the
22470// VT of that MachineSDNode. This VT may be used to look up the representative
22471// register class. If the type isn't legal, the default implementation will
22472// not find a register class.
22473//
22474// Some integer types smaller than XLen are listed in the GPR register class to
22475// support isel patterns for GISel, but are not legal in SelectionDAG. The
22476// arbitrary type tablegen picks may be one of these smaller types.
22477//
22478// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22479// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22480std::pair<const TargetRegisterClass *, uint8_t>
22481RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22482 MVT VT) const {
22483 switch (VT.SimpleTy) {
22484 default:
22485 break;
22486 case MVT::i8:
22487 case MVT::i16:
22488 case MVT::i32:
22490 case MVT::bf16:
22491 case MVT::f16:
22493 }
22494
22496}
22497
22499
22500#define GET_RISCVVIntrinsicsTable_IMPL
22501#include "RISCVGenSearchableTables.inc"
22502
22503} // namespace llvm::RISCVVIntrinsicsTable
22504
22506
22507 // If the function specifically requests inline stack probes, emit them.
22508 if (MF.getFunction().hasFnAttribute("probe-stack"))
22509 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22510 "inline-asm";
22511
22512 return false;
22513}
22514
22516 Align StackAlign) const {
22517 // The default stack probe size is 4096 if the function has no
22518 // stack-probe-size attribute.
22519 const Function &Fn = MF.getFunction();
22520 unsigned StackProbeSize =
22521 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22522 // Round down to the stack alignment.
22523 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22524 return StackProbeSize ? StackProbeSize : StackAlign.value();
22525}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1329
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1106
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1902
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1857
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:844
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1877
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:404
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:395
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:503
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:888
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:904
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:735
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:384
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1259
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1564
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1564
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1551
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1647
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1532
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:317
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)