LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
253 if (!Subtarget.hasVendorXCValu())
257 if (!Subtarget.hasVendorXCValu())
261
262 if (RV64LegalI32 && Subtarget.is64Bit())
264
266
269 if (RV64LegalI32 && Subtarget.is64Bit())
271
273
275
276 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
277 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
278
279 if (Subtarget.is64Bit()) {
281
282 if (!RV64LegalI32) {
285 MVT::i32, Custom);
287 MVT::i32, Custom);
288 if (!Subtarget.hasStdExtZbb())
290 } else {
292 if (Subtarget.hasStdExtZbb()) {
295 }
296 }
298 }
299 if (!Subtarget.hasStdExtZmmul()) {
301 if (RV64LegalI32 && Subtarget.is64Bit())
303 } else if (Subtarget.is64Bit()) {
305 if (!RV64LegalI32)
307 else
309 } else {
311 }
312
313 if (!Subtarget.hasStdExtM()) {
315 XLenVT, Expand);
316 if (RV64LegalI32 && Subtarget.is64Bit())
318 Promote);
319 } else if (Subtarget.is64Bit()) {
320 if (!RV64LegalI32)
322 {MVT::i8, MVT::i16, MVT::i32}, Custom);
323 }
324
325 if (RV64LegalI32 && Subtarget.is64Bit()) {
329 Expand);
330 }
331
334 Expand);
335
337 Custom);
338
339 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
340 if (!RV64LegalI32 && Subtarget.is64Bit())
342 } else if (Subtarget.hasVendorXTHeadBb()) {
343 if (Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXCVbitmanip()) {
348 } else {
350 if (RV64LegalI32 && Subtarget.is64Bit())
352 }
353
354 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
355 // pattern match it directly in isel.
357 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
358 Subtarget.hasVendorXTHeadBb())
359 ? Legal
360 : Expand);
361 if (RV64LegalI32 && Subtarget.is64Bit())
363 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
364 Subtarget.hasVendorXTHeadBb())
365 ? Promote
366 : Expand);
367
368
369 if (Subtarget.hasVendorXCVbitmanip()) {
371 } else {
372 // Zbkb can use rev8+brev8 to implement bitreverse.
374 Subtarget.hasStdExtZbkb() ? Custom : Expand);
375 }
376
377 if (Subtarget.hasStdExtZbb()) {
379 Legal);
380 if (RV64LegalI32 && Subtarget.is64Bit())
382 Promote);
383
384 if (Subtarget.is64Bit()) {
385 if (RV64LegalI32)
387 else
389 }
390 } else if (!Subtarget.hasVendorXCVbitmanip()) {
392 if (RV64LegalI32 && Subtarget.is64Bit())
394 }
395
396 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
397 Subtarget.hasVendorXCVbitmanip()) {
398 // We need the custom lowering to make sure that the resulting sequence
399 // for the 32bit case is efficient on 64bit targets.
400 if (Subtarget.is64Bit()) {
401 if (RV64LegalI32) {
403 Subtarget.hasStdExtZbb() ? Legal : Promote);
404 if (!Subtarget.hasStdExtZbb())
406 } else
408 }
409 } else {
411 if (RV64LegalI32 && Subtarget.is64Bit())
413 }
414
415 if (!RV64LegalI32 && Subtarget.is64Bit() &&
416 !Subtarget.hasShortForwardBranchOpt())
418
419 // We can use PseudoCCSUB to implement ABS.
420 if (Subtarget.hasShortForwardBranchOpt())
422
423 if (!Subtarget.hasVendorXTHeadCondMov()) {
425 if (RV64LegalI32 && Subtarget.is64Bit())
427 }
428
429 static const unsigned FPLegalNodeTypes[] = {
436
437 static const ISD::CondCode FPCCToExpand[] = {
441
442 static const unsigned FPOpToExpand[] = {
444 ISD::FREM};
445
446 static const unsigned FPRndMode[] = {
449
450 if (Subtarget.hasStdExtZfhminOrZhinxmin())
452
453 static const unsigned ZfhminZfbfminPromoteOps[] = {
463
464 if (Subtarget.hasStdExtZfbfmin()) {
473 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
475 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
476 // DAGCombiner::visitFP_ROUND probably needs improvements first.
478 }
479
480 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
481 if (Subtarget.hasStdExtZfhOrZhinx()) {
482 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
483 setOperationAction(FPRndMode, MVT::f16,
484 Subtarget.hasStdExtZfa() ? Legal : Custom);
487 } else {
488 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
491 MVT::f16, Legal);
492 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
493 // DAGCombiner::visitFP_ROUND probably needs improvements first.
495 }
496
499 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
502
504 Subtarget.hasStdExtZfa() ? Legal : Promote);
509 MVT::f16, Promote);
510
511 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
512 // complete support for all operations in LegalizeDAG.
517 MVT::f16, Promote);
518
519 // We need to custom promote this.
520 if (Subtarget.is64Bit())
522
524 Subtarget.hasStdExtZfa() ? Legal : Custom);
525 }
526
527 if (Subtarget.hasStdExtFOrZfinx()) {
528 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
529 setOperationAction(FPRndMode, MVT::f32,
530 Subtarget.hasStdExtZfa() ? Legal : Custom);
531 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
535 setOperationAction(FPOpToExpand, MVT::f32, Expand);
536 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
537 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
538 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
539 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
543 Subtarget.isSoftFPABI() ? LibCall : Custom);
546
547 if (Subtarget.hasStdExtZfa()) {
550 } else {
552 }
553 }
554
555 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtDOrZdinx()) {
559 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
560
561 if (!Subtarget.is64Bit())
563
564 if (Subtarget.hasStdExtZfa()) {
565 setOperationAction(FPRndMode, MVT::f64, Legal);
568 } else {
569 if (Subtarget.is64Bit())
570 setOperationAction(FPRndMode, MVT::f64, Custom);
571
573 }
574
577 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
582 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
583 setOperationAction(FPOpToExpand, MVT::f64, Expand);
584 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
585 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
586 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
587 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
591 Subtarget.isSoftFPABI() ? LibCall : Custom);
594 }
595
596 if (Subtarget.is64Bit()) {
599 MVT::i32, Custom);
601 }
602
603 if (Subtarget.hasStdExtFOrZfinx()) {
605 Custom);
606
609 XLenVT, Legal);
610
611 if (RV64LegalI32 && Subtarget.is64Bit())
614 MVT::i32, Legal);
615
618 }
619
622 XLenVT, Custom);
623
625
626 if (Subtarget.is64Bit())
628
629 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
630 // Unfortunately this can't be determined just from the ISA naming string.
632 Subtarget.is64Bit() ? Legal : Custom);
634 Subtarget.is64Bit() ? Legal : Custom);
635
638 if (Subtarget.is64Bit())
640
641 if (Subtarget.hasStdExtZicbop()) {
643 }
644
645 if (Subtarget.hasStdExtA()) {
647 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649 else
651 } else if (Subtarget.hasForcedAtomics()) {
653 } else {
655 }
656
658
660
661 if (getTargetMachine().getTargetTriple().isOSLinux()) {
662 // Custom lowering of llvm.clear_cache.
664 }
665
666 if (Subtarget.hasVInstructions()) {
668
670 if (RV64LegalI32 && Subtarget.is64Bit())
672
673 // RVV intrinsics may have illegal operands.
674 // We also need to custom legalize vmv.x.s.
677 {MVT::i8, MVT::i16}, Custom);
678 if (Subtarget.is64Bit())
680 MVT::i32, Custom);
681 else
683 MVT::i64, Custom);
684
686 MVT::Other, Custom);
687
688 static const unsigned IntegerVPOps[] = {
689 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
690 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
691 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
692 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
693 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
694 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
695 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
696 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
697 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
698 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
699 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
700 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
701 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
702 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
703 ISD::EXPERIMENTAL_VP_SPLAT};
704
705 static const unsigned FloatingPointVPOps[] = {
706 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
707 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
708 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
709 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
710 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
711 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
712 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
713 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
714 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
715 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
716 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
717 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
718 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
719 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
720
721 static const unsigned IntegerVecReduceOps[] = {
725
726 static const unsigned FloatingPointVecReduceOps[] = {
729
730 if (!Subtarget.is64Bit()) {
731 // We must custom-lower certain vXi64 operations on RV32 due to the vector
732 // element type being illegal.
734 MVT::i64, Custom);
735
736 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
737
738 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
739 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
740 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
741 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
742 MVT::i64, Custom);
743 }
744
745 for (MVT VT : BoolVecVTs) {
746 if (!isTypeLegal(VT))
747 continue;
748
750
751 // Mask VTs are custom-expanded into a series of standard nodes
755 VT, Custom);
756
758 Custom);
759
762 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
763 Expand);
764
765 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
766 Custom);
767
768 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
769
772 Custom);
773
775 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
776 Custom);
777
778 // RVV has native int->float & float->int conversions where the
779 // element type sizes are within one power-of-two of each other. Any
780 // wider distances between type sizes have to be lowered as sequences
781 // which progressively narrow the gap in stages.
786 VT, Custom);
788 Custom);
789
790 // Expand all extending loads to types larger than this, and truncating
791 // stores from types larger than this.
793 setTruncStoreAction(VT, OtherVT, Expand);
795 OtherVT, Expand);
796 }
797
798 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
799 ISD::VP_TRUNCATE, ISD::VP_SETCC},
800 VT, Custom);
801
804
806
807 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
808 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
809
812 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
813 }
814
815 for (MVT VT : IntVecVTs) {
816 if (!isTypeLegal(VT))
817 continue;
818
821
822 // Vectors implement MULHS/MULHU.
824
825 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
826 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
828
830 Legal);
831
833
834 // Custom-lower extensions and truncations from/to mask types.
836 VT, Custom);
837
838 // RVV has native int->float & float->int conversions where the
839 // element type sizes are within one power-of-two of each other. Any
840 // wider distances between type sizes have to be lowered as sequences
841 // which progressively narrow the gap in stages.
846 VT, Custom);
848 Custom);
852 VT, Legal);
853
854 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
855 // nodes which truncate by one power of two at a time.
857
858 // Custom-lower insert/extract operations to simplify patterns.
860 Custom);
861
862 // Custom-lower reduction operations to set up the corresponding custom
863 // nodes' operands.
864 setOperationAction(IntegerVecReduceOps, VT, Custom);
865
866 setOperationAction(IntegerVPOps, VT, Custom);
867
869
871 VT, Custom);
872
874 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
875 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
876 VT, Custom);
877
880 VT, Custom);
881
884
886
888 setTruncStoreAction(VT, OtherVT, Expand);
890 OtherVT, Expand);
891 }
892
895
896 // Splice
898
899 if (Subtarget.hasStdExtZvkb()) {
901 setOperationAction(ISD::VP_BSWAP, VT, Custom);
902 } else {
903 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
905 }
906
907 if (Subtarget.hasStdExtZvbb()) {
909 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
910 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
911 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
912 VT, Custom);
913 } else {
914 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
916 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
917 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
918 VT, Expand);
919
920 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
921 // range of f32.
922 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
923 if (isTypeLegal(FloatVT)) {
925 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
926 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
927 VT, Custom);
928 }
929 }
930 }
931
932 // Expand various CCs to best match the RVV ISA, which natively supports UNE
933 // but no other unordered comparisons, and supports all ordered comparisons
934 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
935 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
936 // and we pattern-match those back to the "original", swapping operands once
937 // more. This way we catch both operations and both "vf" and "fv" forms with
938 // fewer patterns.
939 static const ISD::CondCode VFPCCToExpand[] = {
943 };
944
945 // TODO: support more ops.
946 static const unsigned ZvfhminPromoteOps[] = {
954
955 // TODO: support more vp ops.
956 static const unsigned ZvfhminPromoteVPOps[] = {
957 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
958 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
959 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
960 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
961 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
962 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
963 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
964 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
965 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
966
967 // Sets common operation actions on RVV floating-point vector types.
968 const auto SetCommonVFPActions = [&](MVT VT) {
970 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
971 // sizes are within one power-of-two of each other. Therefore conversions
972 // between vXf16 and vXf64 must be lowered as sequences which convert via
973 // vXf32.
976 // Custom-lower insert/extract operations to simplify patterns.
978 Custom);
979 // Expand various condition codes (explained above).
980 setCondCodeAction(VFPCCToExpand, VT, Expand);
981
984
988 VT, Custom);
989
990 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
991
992 // Expand FP operations that need libcalls.
1004
1006
1008
1010 VT, Custom);
1011
1013 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1014 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1015 VT, Custom);
1016
1019
1022 VT, Custom);
1023
1026
1028
1029 setOperationAction(FloatingPointVPOps, VT, Custom);
1030
1032 Custom);
1035 VT, Legal);
1040 VT, Custom);
1041 };
1042
1043 // Sets common extload/truncstore actions on RVV floating-point vector
1044 // types.
1045 const auto SetCommonVFPExtLoadTruncStoreActions =
1046 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1047 for (auto SmallVT : SmallerVTs) {
1048 setTruncStoreAction(VT, SmallVT, Expand);
1049 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1050 }
1051 };
1052
1053 if (Subtarget.hasVInstructionsF16()) {
1054 for (MVT VT : F16VecVTs) {
1055 if (!isTypeLegal(VT))
1056 continue;
1057 SetCommonVFPActions(VT);
1058 }
1059 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1060 for (MVT VT : F16VecVTs) {
1061 if (!isTypeLegal(VT))
1062 continue;
1065 Custom);
1066 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1067 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1068 Custom);
1071 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1072 VT, Custom);
1075 VT, Custom);
1076 if (Subtarget.hasStdExtZfhmin())
1078 // load/store
1080
1081 // Custom split nxv32f16 since nxv32f32 if not legal.
1082 if (VT == MVT::nxv32f16) {
1083 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1084 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1085 continue;
1086 }
1087 // Add more promote ops.
1088 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1089 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1090 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1091 }
1092 }
1093
1094 // TODO: Could we merge some code with zvfhmin?
1095 if (Subtarget.hasVInstructionsBF16()) {
1096 for (MVT VT : BF16VecVTs) {
1097 if (!isTypeLegal(VT))
1098 continue;
1100 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1102 Custom);
1105 VT, Custom);
1107 if (Subtarget.hasStdExtZfbfmin())
1109 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1110 Custom);
1112 // TODO: Promote to fp32.
1113 }
1114 }
1115
1116 if (Subtarget.hasVInstructionsF32()) {
1117 for (MVT VT : F32VecVTs) {
1118 if (!isTypeLegal(VT))
1119 continue;
1120 SetCommonVFPActions(VT);
1121 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1122 }
1123 }
1124
1125 if (Subtarget.hasVInstructionsF64()) {
1126 for (MVT VT : F64VecVTs) {
1127 if (!isTypeLegal(VT))
1128 continue;
1129 SetCommonVFPActions(VT);
1130 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1131 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1132 }
1133 }
1134
1135 if (Subtarget.useRVVForFixedLengthVectors()) {
1137 if (!useRVVForFixedLengthVectorVT(VT))
1138 continue;
1139
1140 // By default everything must be expanded.
1141 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1144 setTruncStoreAction(VT, OtherVT, Expand);
1146 OtherVT, Expand);
1147 }
1148
1149 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1150 // expansion to a build_vector of 0s.
1152
1153 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1155 Custom);
1156
1158 Custom);
1159
1161 VT, Custom);
1162
1164
1166
1168
1170
1172
1174
1177 Custom);
1178
1180 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1181 Custom);
1182
1184 {
1193 },
1194 VT, Custom);
1196 Custom);
1197
1199
1200 // Operations below are different for between masks and other vectors.
1201 if (VT.getVectorElementType() == MVT::i1) {
1202 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1203 ISD::OR, ISD::XOR},
1204 VT, Custom);
1205
1206 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1207 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1208 VT, Custom);
1209
1210 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1211 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1212 continue;
1213 }
1214
1215 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1216 // it before type legalization for i64 vectors on RV32. It will then be
1217 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1218 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1219 // improvements first.
1220 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1223 }
1224
1227
1228 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1229 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1230 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1231 ISD::VP_SCATTER},
1232 VT, Custom);
1233
1237 VT, Custom);
1238
1241
1243
1244 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1245 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1247
1251 VT, Custom);
1252
1254
1257
1258 // Custom-lower reduction operations to set up the corresponding custom
1259 // nodes' operands.
1263 VT, Custom);
1264
1265 setOperationAction(IntegerVPOps, VT, Custom);
1266
1267 if (Subtarget.hasStdExtZvkb())
1269
1270 if (Subtarget.hasStdExtZvbb()) {
1273 VT, Custom);
1274 } else {
1275 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1276 // range of f32.
1277 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1278 if (isTypeLegal(FloatVT))
1281 Custom);
1282 }
1283 }
1284
1286 // There are no extending loads or truncating stores.
1287 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1288 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1289 setTruncStoreAction(VT, InnerVT, Expand);
1290 }
1291
1292 if (!useRVVForFixedLengthVectorVT(VT))
1293 continue;
1294
1295 // By default everything must be expanded.
1296 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1298
1299 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1300 // expansion to a build_vector of 0s.
1302
1305 VT, Custom);
1306
1307 // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
1308 // vp_stride_load/store, vp_gather/scatter can be hoisted to here.
1310
1313 Custom);
1314
1315 if (VT.getVectorElementType() == MVT::f16 &&
1316 !Subtarget.hasVInstructionsF16()) {
1317 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1319 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1320 Custom);
1322 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1323 VT, Custom);
1325 if (Subtarget.hasStdExtZfhmin()) {
1326 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1328 } else {
1329 // We need to custom legalize f16 build vectors if Zfhmin isn't
1330 // available.
1332 }
1333 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1334 // Don't promote f16 vector operations to f32 if f32 vector type is
1335 // not legal.
1336 // TODO: could split the f16 vector into two vectors and do promotion.
1337 if (!isTypeLegal(F32VecVT))
1338 continue;
1339 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1340 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1341 continue;
1342 }
1343
1344 if (VT.getVectorElementType() == MVT::bf16) {
1345 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1346 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1349 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1350 Custom);
1351 // TODO: Promote to fp32.
1352 continue;
1353 }
1354
1357 VT, Custom);
1358
1361
1362 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1363 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1364 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1365 ISD::VP_SCATTER},
1366 VT, Custom);
1367
1372 VT, Custom);
1373
1376 VT, Custom);
1377
1378 setCondCodeAction(VFPCCToExpand, VT, Expand);
1379
1382
1384
1385 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1386
1387 setOperationAction(FloatingPointVPOps, VT, Custom);
1388
1395 VT, Custom);
1396 }
1397
1398 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1399 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1400 Custom);
1401 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1403 if (Subtarget.hasStdExtFOrZfinx())
1405 if (Subtarget.hasStdExtDOrZdinx())
1407 }
1408 }
1409
1410 if (Subtarget.hasStdExtA()) {
1412 if (RV64LegalI32 && Subtarget.is64Bit())
1414 }
1415
1416 if (Subtarget.hasForcedAtomics()) {
1417 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1423 XLenVT, LibCall);
1424 }
1425
1426 if (Subtarget.hasVendorXTHeadMemIdx()) {
1427 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1428 setIndexedLoadAction(im, MVT::i8, Legal);
1429 setIndexedStoreAction(im, MVT::i8, Legal);
1430 setIndexedLoadAction(im, MVT::i16, Legal);
1431 setIndexedStoreAction(im, MVT::i16, Legal);
1432 setIndexedLoadAction(im, MVT::i32, Legal);
1433 setIndexedStoreAction(im, MVT::i32, Legal);
1434
1435 if (Subtarget.is64Bit()) {
1436 setIndexedLoadAction(im, MVT::i64, Legal);
1437 setIndexedStoreAction(im, MVT::i64, Legal);
1438 }
1439 }
1440 }
1441
1442 if (Subtarget.hasVendorXCVmem()) {
1446
1450 }
1451
1452 if (Subtarget.hasVendorXCValu()) {
1460 }
1461
1462 // Function alignments.
1463 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1464 setMinFunctionAlignment(FunctionAlignment);
1465 // Set preferred alignments.
1468
1472 if (Subtarget.is64Bit())
1474
1475 if (Subtarget.hasStdExtFOrZfinx())
1477
1478 if (Subtarget.hasStdExtZbb())
1480
1481 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1482 Subtarget.hasStdExtV())
1484
1485 if (Subtarget.hasStdExtZbkb())
1487 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1489 if (Subtarget.hasStdExtFOrZfinx())
1492 if (Subtarget.hasVInstructions())
1494 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1497 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1500 if (Subtarget.hasVendorXTHeadMemPair())
1502 if (Subtarget.useRVVForFixedLengthVectors())
1504
1505 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1506 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1507
1508 // Disable strict node mutation.
1509 IsStrictFPEnabled = true;
1510
1511 // Let the subtarget decide if a predictable select is more expensive than the
1512 // corresponding branch. This information is used in CGP/SelectOpt to decide
1513 // when to convert selects into branches.
1514 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1515}
1516
1518 LLVMContext &Context,
1519 EVT VT) const {
1520 if (!VT.isVector())
1521 return getPointerTy(DL);
1522 if (Subtarget.hasVInstructions() &&
1523 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1524 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1526}
1527
1528MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1529 return Subtarget.getXLenVT();
1530}
1531
1532// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1533bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1534 unsigned VF,
1535 bool IsScalable) const {
1536 if (!Subtarget.hasVInstructions())
1537 return true;
1538
1539 if (!IsScalable)
1540 return true;
1541
1542 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1543 return true;
1544
1545 // Don't allow VF=1 if those types are't legal.
1546 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1547 return true;
1548
1549 // VLEN=32 support is incomplete.
1550 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1551 return true;
1552
1553 // The maximum VF is for the smallest element width with LMUL=8.
1554 // VF must be a power of 2.
1555 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1556 return VF > MaxVF || !isPowerOf2_32(VF);
1557}
1558
1560 return !Subtarget.hasVInstructions() ||
1561 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1562}
1563
1565 const CallInst &I,
1566 MachineFunction &MF,
1567 unsigned Intrinsic) const {
1568 auto &DL = I.getDataLayout();
1569
1570 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1571 bool IsUnitStrided, bool UsePtrVal = false) {
1573 // We can't use ptrVal if the intrinsic can access memory before the
1574 // pointer. This means we can't use it for strided or indexed intrinsics.
1575 if (UsePtrVal)
1576 Info.ptrVal = I.getArgOperand(PtrOp);
1577 else
1578 Info.fallbackAddressSpace =
1579 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1580 Type *MemTy;
1581 if (IsStore) {
1582 // Store value is the first operand.
1583 MemTy = I.getArgOperand(0)->getType();
1584 } else {
1585 // Use return type. If it's segment load, return type is a struct.
1586 MemTy = I.getType();
1587 if (MemTy->isStructTy())
1588 MemTy = MemTy->getStructElementType(0);
1589 }
1590 if (!IsUnitStrided)
1591 MemTy = MemTy->getScalarType();
1592
1593 Info.memVT = getValueType(DL, MemTy);
1594 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1596 Info.flags |=
1598 return true;
1599 };
1600
1601 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1603
1605 switch (Intrinsic) {
1606 default:
1607 return false;
1608 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1609 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1610 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1611 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1612 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1613 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1614 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1615 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1616 case Intrinsic::riscv_masked_cmpxchg_i32:
1618 Info.memVT = MVT::i32;
1619 Info.ptrVal = I.getArgOperand(0);
1620 Info.offset = 0;
1621 Info.align = Align(4);
1624 return true;
1625 case Intrinsic::riscv_seg2_load:
1626 case Intrinsic::riscv_seg3_load:
1627 case Intrinsic::riscv_seg4_load:
1628 case Intrinsic::riscv_seg5_load:
1629 case Intrinsic::riscv_seg6_load:
1630 case Intrinsic::riscv_seg7_load:
1631 case Intrinsic::riscv_seg8_load:
1632 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1633 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1634 case Intrinsic::riscv_seg2_store:
1635 case Intrinsic::riscv_seg3_store:
1636 case Intrinsic::riscv_seg4_store:
1637 case Intrinsic::riscv_seg5_store:
1638 case Intrinsic::riscv_seg6_store:
1639 case Intrinsic::riscv_seg7_store:
1640 case Intrinsic::riscv_seg8_store:
1641 // Operands are (vec, ..., vec, ptr, vl)
1642 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1643 /*IsStore*/ true,
1644 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1645 case Intrinsic::riscv_vle:
1646 case Intrinsic::riscv_vle_mask:
1647 case Intrinsic::riscv_vleff:
1648 case Intrinsic::riscv_vleff_mask:
1649 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1650 /*IsStore*/ false,
1651 /*IsUnitStrided*/ true,
1652 /*UsePtrVal*/ true);
1653 case Intrinsic::riscv_vse:
1654 case Intrinsic::riscv_vse_mask:
1655 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1656 /*IsStore*/ true,
1657 /*IsUnitStrided*/ true,
1658 /*UsePtrVal*/ true);
1659 case Intrinsic::riscv_vlse:
1660 case Intrinsic::riscv_vlse_mask:
1661 case Intrinsic::riscv_vloxei:
1662 case Intrinsic::riscv_vloxei_mask:
1663 case Intrinsic::riscv_vluxei:
1664 case Intrinsic::riscv_vluxei_mask:
1665 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1666 /*IsStore*/ false,
1667 /*IsUnitStrided*/ false);
1668 case Intrinsic::riscv_vsse:
1669 case Intrinsic::riscv_vsse_mask:
1670 case Intrinsic::riscv_vsoxei:
1671 case Intrinsic::riscv_vsoxei_mask:
1672 case Intrinsic::riscv_vsuxei:
1673 case Intrinsic::riscv_vsuxei_mask:
1674 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1675 /*IsStore*/ true,
1676 /*IsUnitStrided*/ false);
1677 case Intrinsic::riscv_vlseg2:
1678 case Intrinsic::riscv_vlseg3:
1679 case Intrinsic::riscv_vlseg4:
1680 case Intrinsic::riscv_vlseg5:
1681 case Intrinsic::riscv_vlseg6:
1682 case Intrinsic::riscv_vlseg7:
1683 case Intrinsic::riscv_vlseg8:
1684 case Intrinsic::riscv_vlseg2ff:
1685 case Intrinsic::riscv_vlseg3ff:
1686 case Intrinsic::riscv_vlseg4ff:
1687 case Intrinsic::riscv_vlseg5ff:
1688 case Intrinsic::riscv_vlseg6ff:
1689 case Intrinsic::riscv_vlseg7ff:
1690 case Intrinsic::riscv_vlseg8ff:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1692 /*IsStore*/ false,
1693 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1694 case Intrinsic::riscv_vlseg2_mask:
1695 case Intrinsic::riscv_vlseg3_mask:
1696 case Intrinsic::riscv_vlseg4_mask:
1697 case Intrinsic::riscv_vlseg5_mask:
1698 case Intrinsic::riscv_vlseg6_mask:
1699 case Intrinsic::riscv_vlseg7_mask:
1700 case Intrinsic::riscv_vlseg8_mask:
1701 case Intrinsic::riscv_vlseg2ff_mask:
1702 case Intrinsic::riscv_vlseg3ff_mask:
1703 case Intrinsic::riscv_vlseg4ff_mask:
1704 case Intrinsic::riscv_vlseg5ff_mask:
1705 case Intrinsic::riscv_vlseg6ff_mask:
1706 case Intrinsic::riscv_vlseg7ff_mask:
1707 case Intrinsic::riscv_vlseg8ff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1711 case Intrinsic::riscv_vlsseg2:
1712 case Intrinsic::riscv_vlsseg3:
1713 case Intrinsic::riscv_vlsseg4:
1714 case Intrinsic::riscv_vlsseg5:
1715 case Intrinsic::riscv_vlsseg6:
1716 case Intrinsic::riscv_vlsseg7:
1717 case Intrinsic::riscv_vlsseg8:
1718 case Intrinsic::riscv_vloxseg2:
1719 case Intrinsic::riscv_vloxseg3:
1720 case Intrinsic::riscv_vloxseg4:
1721 case Intrinsic::riscv_vloxseg5:
1722 case Intrinsic::riscv_vloxseg6:
1723 case Intrinsic::riscv_vloxseg7:
1724 case Intrinsic::riscv_vloxseg8:
1725 case Intrinsic::riscv_vluxseg2:
1726 case Intrinsic::riscv_vluxseg3:
1727 case Intrinsic::riscv_vluxseg4:
1728 case Intrinsic::riscv_vluxseg5:
1729 case Intrinsic::riscv_vluxseg6:
1730 case Intrinsic::riscv_vluxseg7:
1731 case Intrinsic::riscv_vluxseg8:
1732 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1733 /*IsStore*/ false,
1734 /*IsUnitStrided*/ false);
1735 case Intrinsic::riscv_vlsseg2_mask:
1736 case Intrinsic::riscv_vlsseg3_mask:
1737 case Intrinsic::riscv_vlsseg4_mask:
1738 case Intrinsic::riscv_vlsseg5_mask:
1739 case Intrinsic::riscv_vlsseg6_mask:
1740 case Intrinsic::riscv_vlsseg7_mask:
1741 case Intrinsic::riscv_vlsseg8_mask:
1742 case Intrinsic::riscv_vloxseg2_mask:
1743 case Intrinsic::riscv_vloxseg3_mask:
1744 case Intrinsic::riscv_vloxseg4_mask:
1745 case Intrinsic::riscv_vloxseg5_mask:
1746 case Intrinsic::riscv_vloxseg6_mask:
1747 case Intrinsic::riscv_vloxseg7_mask:
1748 case Intrinsic::riscv_vloxseg8_mask:
1749 case Intrinsic::riscv_vluxseg2_mask:
1750 case Intrinsic::riscv_vluxseg3_mask:
1751 case Intrinsic::riscv_vluxseg4_mask:
1752 case Intrinsic::riscv_vluxseg5_mask:
1753 case Intrinsic::riscv_vluxseg6_mask:
1754 case Intrinsic::riscv_vluxseg7_mask:
1755 case Intrinsic::riscv_vluxseg8_mask:
1756 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1757 /*IsStore*/ false,
1758 /*IsUnitStrided*/ false);
1759 case Intrinsic::riscv_vsseg2:
1760 case Intrinsic::riscv_vsseg3:
1761 case Intrinsic::riscv_vsseg4:
1762 case Intrinsic::riscv_vsseg5:
1763 case Intrinsic::riscv_vsseg6:
1764 case Intrinsic::riscv_vsseg7:
1765 case Intrinsic::riscv_vsseg8:
1766 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1767 /*IsStore*/ true,
1768 /*IsUnitStrided*/ false);
1769 case Intrinsic::riscv_vsseg2_mask:
1770 case Intrinsic::riscv_vsseg3_mask:
1771 case Intrinsic::riscv_vsseg4_mask:
1772 case Intrinsic::riscv_vsseg5_mask:
1773 case Intrinsic::riscv_vsseg6_mask:
1774 case Intrinsic::riscv_vsseg7_mask:
1775 case Intrinsic::riscv_vsseg8_mask:
1776 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1777 /*IsStore*/ true,
1778 /*IsUnitStrided*/ false);
1779 case Intrinsic::riscv_vssseg2:
1780 case Intrinsic::riscv_vssseg3:
1781 case Intrinsic::riscv_vssseg4:
1782 case Intrinsic::riscv_vssseg5:
1783 case Intrinsic::riscv_vssseg6:
1784 case Intrinsic::riscv_vssseg7:
1785 case Intrinsic::riscv_vssseg8:
1786 case Intrinsic::riscv_vsoxseg2:
1787 case Intrinsic::riscv_vsoxseg3:
1788 case Intrinsic::riscv_vsoxseg4:
1789 case Intrinsic::riscv_vsoxseg5:
1790 case Intrinsic::riscv_vsoxseg6:
1791 case Intrinsic::riscv_vsoxseg7:
1792 case Intrinsic::riscv_vsoxseg8:
1793 case Intrinsic::riscv_vsuxseg2:
1794 case Intrinsic::riscv_vsuxseg3:
1795 case Intrinsic::riscv_vsuxseg4:
1796 case Intrinsic::riscv_vsuxseg5:
1797 case Intrinsic::riscv_vsuxseg6:
1798 case Intrinsic::riscv_vsuxseg7:
1799 case Intrinsic::riscv_vsuxseg8:
1800 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1801 /*IsStore*/ true,
1802 /*IsUnitStrided*/ false);
1803 case Intrinsic::riscv_vssseg2_mask:
1804 case Intrinsic::riscv_vssseg3_mask:
1805 case Intrinsic::riscv_vssseg4_mask:
1806 case Intrinsic::riscv_vssseg5_mask:
1807 case Intrinsic::riscv_vssseg6_mask:
1808 case Intrinsic::riscv_vssseg7_mask:
1809 case Intrinsic::riscv_vssseg8_mask:
1810 case Intrinsic::riscv_vsoxseg2_mask:
1811 case Intrinsic::riscv_vsoxseg3_mask:
1812 case Intrinsic::riscv_vsoxseg4_mask:
1813 case Intrinsic::riscv_vsoxseg5_mask:
1814 case Intrinsic::riscv_vsoxseg6_mask:
1815 case Intrinsic::riscv_vsoxseg7_mask:
1816 case Intrinsic::riscv_vsoxseg8_mask:
1817 case Intrinsic::riscv_vsuxseg2_mask:
1818 case Intrinsic::riscv_vsuxseg3_mask:
1819 case Intrinsic::riscv_vsuxseg4_mask:
1820 case Intrinsic::riscv_vsuxseg5_mask:
1821 case Intrinsic::riscv_vsuxseg6_mask:
1822 case Intrinsic::riscv_vsuxseg7_mask:
1823 case Intrinsic::riscv_vsuxseg8_mask:
1824 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1825 /*IsStore*/ true,
1826 /*IsUnitStrided*/ false);
1827 }
1828}
1829
1831 const AddrMode &AM, Type *Ty,
1832 unsigned AS,
1833 Instruction *I) const {
1834 // No global is ever allowed as a base.
1835 if (AM.BaseGV)
1836 return false;
1837
1838 // RVV instructions only support register addressing.
1839 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1840 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1841
1842 // Require a 12-bit signed offset.
1843 if (!isInt<12>(AM.BaseOffs))
1844 return false;
1845
1846 switch (AM.Scale) {
1847 case 0: // "r+i" or just "i", depending on HasBaseReg.
1848 break;
1849 case 1:
1850 if (!AM.HasBaseReg) // allow "r+i".
1851 break;
1852 return false; // disallow "r+r" or "r+r+i".
1853 default:
1854 return false;
1855 }
1856
1857 return true;
1858}
1859
1861 return isInt<12>(Imm);
1862}
1863
1865 return isInt<12>(Imm);
1866}
1867
1868// On RV32, 64-bit integers are split into their high and low parts and held
1869// in two different registers, so the trunc is free since the low register can
1870// just be used.
1871// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1872// isTruncateFree?
1874 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1875 return false;
1876 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1877 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1878 return (SrcBits == 64 && DestBits == 32);
1879}
1880
1882 // We consider i64->i32 free on RV64 since we have good selection of W
1883 // instructions that make promoting operations back to i64 free in many cases.
1884 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1885 !DstVT.isInteger())
1886 return false;
1887 unsigned SrcBits = SrcVT.getSizeInBits();
1888 unsigned DestBits = DstVT.getSizeInBits();
1889 return (SrcBits == 64 && DestBits == 32);
1890}
1891
1893 EVT SrcVT = Val.getValueType();
1894 // free truncate from vnsrl and vnsra
1895 if (Subtarget.hasStdExtV() &&
1896 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1897 SrcVT.isVector() && VT2.isVector()) {
1898 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1899 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1900 if (SrcBits == DestBits * 2) {
1901 return true;
1902 }
1903 }
1904 return TargetLowering::isTruncateFree(Val, VT2);
1905}
1906
1908 // Zexts are free if they can be combined with a load.
1909 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1910 // poorly with type legalization of compares preferring sext.
1911 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1912 EVT MemVT = LD->getMemoryVT();
1913 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1914 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1915 LD->getExtensionType() == ISD::ZEXTLOAD))
1916 return true;
1917 }
1918
1919 return TargetLowering::isZExtFree(Val, VT2);
1920}
1921
1923 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1924}
1925
1927 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1928}
1929
1931 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1932}
1933
1935 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1936 Subtarget.hasVendorXCVbitmanip();
1937}
1938
1940 const Instruction &AndI) const {
1941 // We expect to be able to match a bit extraction instruction if the Zbs
1942 // extension is supported and the mask is a power of two. However, we
1943 // conservatively return false if the mask would fit in an ANDI instruction,
1944 // on the basis that it's possible the sinking+duplication of the AND in
1945 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1946 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1947 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1948 return false;
1949 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1950 if (!Mask)
1951 return false;
1952 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1953}
1954
1956 EVT VT = Y.getValueType();
1957
1958 // FIXME: Support vectors once we have tests.
1959 if (VT.isVector())
1960 return false;
1961
1962 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1963 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1964}
1965
1967 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1968 if (Subtarget.hasStdExtZbs())
1969 return X.getValueType().isScalarInteger();
1970 auto *C = dyn_cast<ConstantSDNode>(Y);
1971 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1972 if (Subtarget.hasVendorXTHeadBs())
1973 return C != nullptr;
1974 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1975 return C && C->getAPIntValue().ule(10);
1976}
1977
1979 EVT VT) const {
1980 // Only enable for rvv.
1981 if (!VT.isVector() || !Subtarget.hasVInstructions())
1982 return false;
1983
1984 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1985 return false;
1986
1987 return true;
1988}
1989
1991 Type *Ty) const {
1992 assert(Ty->isIntegerTy());
1993
1994 unsigned BitSize = Ty->getIntegerBitWidth();
1995 if (BitSize > Subtarget.getXLen())
1996 return false;
1997
1998 // Fast path, assume 32-bit immediates are cheap.
1999 int64_t Val = Imm.getSExtValue();
2000 if (isInt<32>(Val))
2001 return true;
2002
2003 // A constant pool entry may be more aligned thant he load we're trying to
2004 // replace. If we don't support unaligned scalar mem, prefer the constant
2005 // pool.
2006 // TODO: Can the caller pass down the alignment?
2007 if (!Subtarget.enableUnalignedScalarMem())
2008 return true;
2009
2010 // Prefer to keep the load if it would require many instructions.
2011 // This uses the same threshold we use for constant pools but doesn't
2012 // check useConstantPoolForLargeInts.
2013 // TODO: Should we keep the load only when we're definitely going to emit a
2014 // constant pool?
2015
2017 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2018}
2019
2023 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2024 SelectionDAG &DAG) const {
2025 // One interesting pattern that we'd want to form is 'bit extract':
2026 // ((1 >> Y) & 1) ==/!= 0
2027 // But we also need to be careful not to try to reverse that fold.
2028
2029 // Is this '((1 >> Y) & 1)'?
2030 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2031 return false; // Keep the 'bit extract' pattern.
2032
2033 // Will this be '((1 >> Y) & 1)' after the transform?
2034 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2035 return true; // Do form the 'bit extract' pattern.
2036
2037 // If 'X' is a constant, and we transform, then we will immediately
2038 // try to undo the fold, thus causing endless combine loop.
2039 // So only do the transform if X is not a constant. This matches the default
2040 // implementation of this function.
2041 return !XC;
2042}
2043
2044bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2045 switch (Opcode) {
2046 case Instruction::Add:
2047 case Instruction::Sub:
2048 case Instruction::Mul:
2049 case Instruction::And:
2050 case Instruction::Or:
2051 case Instruction::Xor:
2052 case Instruction::FAdd:
2053 case Instruction::FSub:
2054 case Instruction::FMul:
2055 case Instruction::FDiv:
2056 case Instruction::ICmp:
2057 case Instruction::FCmp:
2058 return true;
2059 case Instruction::Shl:
2060 case Instruction::LShr:
2061 case Instruction::AShr:
2062 case Instruction::UDiv:
2063 case Instruction::SDiv:
2064 case Instruction::URem:
2065 case Instruction::SRem:
2066 case Instruction::Select:
2067 return Operand == 1;
2068 default:
2069 return false;
2070 }
2071}
2072
2073
2075 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2076 return false;
2077
2078 if (canSplatOperand(I->getOpcode(), Operand))
2079 return true;
2080
2081 auto *II = dyn_cast<IntrinsicInst>(I);
2082 if (!II)
2083 return false;
2084
2085 switch (II->getIntrinsicID()) {
2086 case Intrinsic::fma:
2087 case Intrinsic::vp_fma:
2088 return Operand == 0 || Operand == 1;
2089 case Intrinsic::vp_shl:
2090 case Intrinsic::vp_lshr:
2091 case Intrinsic::vp_ashr:
2092 case Intrinsic::vp_udiv:
2093 case Intrinsic::vp_sdiv:
2094 case Intrinsic::vp_urem:
2095 case Intrinsic::vp_srem:
2096 case Intrinsic::ssub_sat:
2097 case Intrinsic::vp_ssub_sat:
2098 case Intrinsic::usub_sat:
2099 case Intrinsic::vp_usub_sat:
2100 return Operand == 1;
2101 // These intrinsics are commutative.
2102 case Intrinsic::vp_add:
2103 case Intrinsic::vp_mul:
2104 case Intrinsic::vp_and:
2105 case Intrinsic::vp_or:
2106 case Intrinsic::vp_xor:
2107 case Intrinsic::vp_fadd:
2108 case Intrinsic::vp_fmul:
2109 case Intrinsic::vp_icmp:
2110 case Intrinsic::vp_fcmp:
2111 case Intrinsic::smin:
2112 case Intrinsic::vp_smin:
2113 case Intrinsic::umin:
2114 case Intrinsic::vp_umin:
2115 case Intrinsic::smax:
2116 case Intrinsic::vp_smax:
2117 case Intrinsic::umax:
2118 case Intrinsic::vp_umax:
2119 case Intrinsic::sadd_sat:
2120 case Intrinsic::vp_sadd_sat:
2121 case Intrinsic::uadd_sat:
2122 case Intrinsic::vp_uadd_sat:
2123 // These intrinsics have 'vr' versions.
2124 case Intrinsic::vp_sub:
2125 case Intrinsic::vp_fsub:
2126 case Intrinsic::vp_fdiv:
2127 return Operand == 0 || Operand == 1;
2128 default:
2129 return false;
2130 }
2131}
2132
2133/// Check if sinking \p I's operands to I's basic block is profitable, because
2134/// the operands can be folded into a target instruction, e.g.
2135/// splats of scalars can fold into vector instructions.
2137 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2138 using namespace llvm::PatternMatch;
2139
2140 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2141 return false;
2142
2143 // Don't sink splat operands if the target prefers it. Some targets requires
2144 // S2V transfer buffers and we can run out of them copying the same value
2145 // repeatedly.
2146 // FIXME: It could still be worth doing if it would improve vector register
2147 // pressure and prevent a vector spill.
2148 if (!Subtarget.sinkSplatOperands())
2149 return false;
2150
2151 for (auto OpIdx : enumerate(I->operands())) {
2152 if (!canSplatOperand(I, OpIdx.index()))
2153 continue;
2154
2155 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2156 // Make sure we are not already sinking this operand
2157 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2158 continue;
2159
2160 // We are looking for a splat that can be sunk.
2162 m_Undef(), m_ZeroMask())))
2163 continue;
2164
2165 // Don't sink i1 splats.
2166 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2167 continue;
2168
2169 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2170 // and vector registers
2171 for (Use &U : Op->uses()) {
2172 Instruction *Insn = cast<Instruction>(U.getUser());
2173 if (!canSplatOperand(Insn, U.getOperandNo()))
2174 return false;
2175 }
2176
2177 Ops.push_back(&Op->getOperandUse(0));
2178 Ops.push_back(&OpIdx.value());
2179 }
2180 return true;
2181}
2182
2184 unsigned Opc = VecOp.getOpcode();
2185
2186 // Assume target opcodes can't be scalarized.
2187 // TODO - do we have any exceptions?
2188 if (Opc >= ISD::BUILTIN_OP_END)
2189 return false;
2190
2191 // If the vector op is not supported, try to convert to scalar.
2192 EVT VecVT = VecOp.getValueType();
2193 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2194 return true;
2195
2196 // If the vector op is supported, but the scalar op is not, the transform may
2197 // not be worthwhile.
2198 // Permit a vector binary operation can be converted to scalar binary
2199 // operation which is custom lowered with illegal type.
2200 EVT ScalarVT = VecVT.getScalarType();
2201 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2202 isOperationCustom(Opc, ScalarVT);
2203}
2204
2206 const GlobalAddressSDNode *GA) const {
2207 // In order to maximise the opportunity for common subexpression elimination,
2208 // keep a separate ADD node for the global address offset instead of folding
2209 // it in the global address node. Later peephole optimisations may choose to
2210 // fold it back in when profitable.
2211 return false;
2212}
2213
2214// Return one of the followings:
2215// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2216// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2217// positive counterpart, which will be materialized from the first returned
2218// element. The second returned element indicated that there should be a FNEG
2219// followed.
2220// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2221std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2222 EVT VT) const {
2223 if (!Subtarget.hasStdExtZfa())
2224 return std::make_pair(-1, false);
2225
2226 bool IsSupportedVT = false;
2227 if (VT == MVT::f16) {
2228 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2229 } else if (VT == MVT::f32) {
2230 IsSupportedVT = true;
2231 } else if (VT == MVT::f64) {
2232 assert(Subtarget.hasStdExtD() && "Expect D extension");
2233 IsSupportedVT = true;
2234 }
2235
2236 if (!IsSupportedVT)
2237 return std::make_pair(-1, false);
2238
2240 if (Index < 0 && Imm.isNegative())
2241 // Try the combination of its positive counterpart + FNEG.
2242 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2243 else
2244 return std::make_pair(Index, false);
2245}
2246
2248 bool ForCodeSize) const {
2249 bool IsLegalVT = false;
2250 if (VT == MVT::f16)
2251 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2252 else if (VT == MVT::f32)
2253 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2254 else if (VT == MVT::f64)
2255 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2256 else if (VT == MVT::bf16)
2257 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2258
2259 if (!IsLegalVT)
2260 return false;
2261
2262 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2263 return true;
2264
2265 // Cannot create a 64 bit floating-point immediate value for rv32.
2266 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2267 // td can handle +0.0 or -0.0 already.
2268 // -0.0 can be created by fmv + fneg.
2269 return Imm.isZero();
2270 }
2271
2272 // Special case: fmv + fneg
2273 if (Imm.isNegZero())
2274 return true;
2275
2276 // Building an integer and then converting requires a fmv at the end of
2277 // the integer sequence.
2278 const int Cost =
2279 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2280 Subtarget);
2281 return Cost <= FPImmCost;
2282}
2283
2284// TODO: This is very conservative.
2286 unsigned Index) const {
2288 return false;
2289
2290 // Only support extracting a fixed from a fixed vector for now.
2291 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2292 return false;
2293
2294 EVT EltVT = ResVT.getVectorElementType();
2295 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2296
2297 // The smallest type we can slide is i8.
2298 // TODO: We can extract index 0 from a mask vector without a slide.
2299 if (EltVT == MVT::i1)
2300 return false;
2301
2302 unsigned ResElts = ResVT.getVectorNumElements();
2303 unsigned SrcElts = SrcVT.getVectorNumElements();
2304
2305 unsigned MinVLen = Subtarget.getRealMinVLen();
2306 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2307
2308 // If we're extracting only data from the first VLEN bits of the source
2309 // then we can always do this with an m1 vslidedown.vx. Restricting the
2310 // Index ensures we can use a vslidedown.vi.
2311 // TODO: We can generalize this when the exact VLEN is known.
2312 if (Index + ResElts <= MinVLMAX && Index < 31)
2313 return true;
2314
2315 // Convervatively only handle extracting half of a vector.
2316 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2317 // a cheap extract. However, this case is important in practice for
2318 // shuffled extracts of longer vectors. How resolve?
2319 if ((ResElts * 2) != SrcElts)
2320 return false;
2321
2322 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2323 // cheap.
2324 if (Index >= 32)
2325 return false;
2326
2327 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2328 // the upper half of a vector until we have more test coverage.
2329 return Index == 0 || Index == ResElts;
2330}
2331
2334 EVT VT) const {
2335 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2336 // We might still end up using a GPR but that will be decided based on ABI.
2337 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2338 !Subtarget.hasStdExtZfhminOrZhinxmin())
2339 return MVT::f32;
2340
2342
2343 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2344 return MVT::i64;
2345
2346 return PartVT;
2347}
2348
2351 EVT VT) const {
2352 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2353 // We might still end up using a GPR but that will be decided based on ABI.
2354 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2355 !Subtarget.hasStdExtZfhminOrZhinxmin())
2356 return 1;
2357
2359}
2360
2362 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2363 unsigned &NumIntermediates, MVT &RegisterVT) const {
2365 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2366
2367 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2368 IntermediateVT = MVT::i64;
2369
2370 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2371 RegisterVT = MVT::i64;
2372
2373 return NumRegs;
2374}
2375
2376// Changes the condition code and swaps operands if necessary, so the SetCC
2377// operation matches one of the comparisons supported directly by branches
2378// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2379// with 1/-1.
2380static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2381 ISD::CondCode &CC, SelectionDAG &DAG) {
2382 // If this is a single bit test that can't be handled by ANDI, shift the
2383 // bit to be tested to the MSB and perform a signed compare with 0.
2384 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2385 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2386 isa<ConstantSDNode>(LHS.getOperand(1))) {
2387 uint64_t Mask = LHS.getConstantOperandVal(1);
2388 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2389 unsigned ShAmt = 0;
2390 if (isPowerOf2_64(Mask)) {
2392 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2393 } else {
2394 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2395 }
2396
2397 LHS = LHS.getOperand(0);
2398 if (ShAmt != 0)
2399 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2400 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2401 return;
2402 }
2403 }
2404
2405 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2406 int64_t C = RHSC->getSExtValue();
2407 switch (CC) {
2408 default: break;
2409 case ISD::SETGT:
2410 // Convert X > -1 to X >= 0.
2411 if (C == -1) {
2412 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2413 CC = ISD::SETGE;
2414 return;
2415 }
2416 break;
2417 case ISD::SETLT:
2418 // Convert X < 1 to 0 >= X.
2419 if (C == 1) {
2420 RHS = LHS;
2421 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2422 CC = ISD::SETGE;
2423 return;
2424 }
2425 break;
2426 }
2427 }
2428
2429 switch (CC) {
2430 default:
2431 break;
2432 case ISD::SETGT:
2433 case ISD::SETLE:
2434 case ISD::SETUGT:
2435 case ISD::SETULE:
2437 std::swap(LHS, RHS);
2438 break;
2439 }
2440}
2441
2443 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2444 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2445 if (VT.getVectorElementType() == MVT::i1)
2446 KnownSize *= 8;
2447
2448 switch (KnownSize) {
2449 default:
2450 llvm_unreachable("Invalid LMUL.");
2451 case 8:
2453 case 16:
2455 case 32:
2457 case 64:
2459 case 128:
2461 case 256:
2463 case 512:
2465 }
2466}
2467
2469 switch (LMul) {
2470 default:
2471 llvm_unreachable("Invalid LMUL.");
2476 return RISCV::VRRegClassID;
2478 return RISCV::VRM2RegClassID;
2480 return RISCV::VRM4RegClassID;
2482 return RISCV::VRM8RegClassID;
2483 }
2484}
2485
2487 RISCVII::VLMUL LMUL = getLMUL(VT);
2488 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2489 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2490 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2491 LMUL == RISCVII::VLMUL::LMUL_1) {
2492 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2493 "Unexpected subreg numbering");
2494 return RISCV::sub_vrm1_0 + Index;
2495 }
2496 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2497 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2498 "Unexpected subreg numbering");
2499 return RISCV::sub_vrm2_0 + Index;
2500 }
2501 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2502 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2503 "Unexpected subreg numbering");
2504 return RISCV::sub_vrm4_0 + Index;
2505 }
2506 llvm_unreachable("Invalid vector type.");
2507}
2508
2510 if (VT.getVectorElementType() == MVT::i1)
2511 return RISCV::VRRegClassID;
2512 return getRegClassIDForLMUL(getLMUL(VT));
2513}
2514
2515// Attempt to decompose a subvector insert/extract between VecVT and
2516// SubVecVT via subregister indices. Returns the subregister index that
2517// can perform the subvector insert/extract with the given element index, as
2518// well as the index corresponding to any leftover subvectors that must be
2519// further inserted/extracted within the register class for SubVecVT.
2520std::pair<unsigned, unsigned>
2522 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2523 const RISCVRegisterInfo *TRI) {
2524 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2525 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2526 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2527 "Register classes not ordered");
2528 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2529 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2530 // Try to compose a subregister index that takes us from the incoming
2531 // LMUL>1 register class down to the outgoing one. At each step we half
2532 // the LMUL:
2533 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2534 // Note that this is not guaranteed to find a subregister index, such as
2535 // when we are extracting from one VR type to another.
2536 unsigned SubRegIdx = RISCV::NoSubRegister;
2537 for (const unsigned RCID :
2538 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2539 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2540 VecVT = VecVT.getHalfNumVectorElementsVT();
2541 bool IsHi =
2542 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2543 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2544 getSubregIndexByMVT(VecVT, IsHi));
2545 if (IsHi)
2546 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2547 }
2548 return {SubRegIdx, InsertExtractIdx};
2549}
2550
2551// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2552// stores for those types.
2553bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2554 return !Subtarget.useRVVForFixedLengthVectors() ||
2555 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2556}
2557
2559 if (!ScalarTy.isSimple())
2560 return false;
2561 switch (ScalarTy.getSimpleVT().SimpleTy) {
2562 case MVT::iPTR:
2563 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2564 case MVT::i8:
2565 case MVT::i16:
2566 case MVT::i32:
2567 return true;
2568 case MVT::i64:
2569 return Subtarget.hasVInstructionsI64();
2570 case MVT::f16:
2571 return Subtarget.hasVInstructionsF16();
2572 case MVT::f32:
2573 return Subtarget.hasVInstructionsF32();
2574 case MVT::f64:
2575 return Subtarget.hasVInstructionsF64();
2576 default:
2577 return false;
2578 }
2579}
2580
2581
2582unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2583 return NumRepeatedDivisors;
2584}
2585
2587 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2588 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2589 "Unexpected opcode");
2590 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2591 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2593 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2594 if (!II)
2595 return SDValue();
2596 return Op.getOperand(II->VLOperand + 1 + HasChain);
2597}
2598
2600 const RISCVSubtarget &Subtarget) {
2601 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2602 if (!Subtarget.useRVVForFixedLengthVectors())
2603 return false;
2604
2605 // We only support a set of vector types with a consistent maximum fixed size
2606 // across all supported vector element types to avoid legalization issues.
2607 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2608 // fixed-length vector type we support is 1024 bytes.
2609 if (VT.getFixedSizeInBits() > 1024 * 8)
2610 return false;
2611
2612 unsigned MinVLen = Subtarget.getRealMinVLen();
2613
2614 MVT EltVT = VT.getVectorElementType();
2615
2616 // Don't use RVV for vectors we cannot scalarize if required.
2617 switch (EltVT.SimpleTy) {
2618 // i1 is supported but has different rules.
2619 default:
2620 return false;
2621 case MVT::i1:
2622 // Masks can only use a single register.
2623 if (VT.getVectorNumElements() > MinVLen)
2624 return false;
2625 MinVLen /= 8;
2626 break;
2627 case MVT::i8:
2628 case MVT::i16:
2629 case MVT::i32:
2630 break;
2631 case MVT::i64:
2632 if (!Subtarget.hasVInstructionsI64())
2633 return false;
2634 break;
2635 case MVT::f16:
2636 if (!Subtarget.hasVInstructionsF16Minimal())
2637 return false;
2638 break;
2639 case MVT::bf16:
2640 if (!Subtarget.hasVInstructionsBF16())
2641 return false;
2642 break;
2643 case MVT::f32:
2644 if (!Subtarget.hasVInstructionsF32())
2645 return false;
2646 break;
2647 case MVT::f64:
2648 if (!Subtarget.hasVInstructionsF64())
2649 return false;
2650 break;
2651 }
2652
2653 // Reject elements larger than ELEN.
2654 if (EltVT.getSizeInBits() > Subtarget.getELen())
2655 return false;
2656
2657 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2658 // Don't use RVV for types that don't fit.
2659 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2660 return false;
2661
2662 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2663 // the base fixed length RVV support in place.
2664 if (!VT.isPow2VectorType())
2665 return false;
2666
2667 return true;
2668}
2669
2670bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2671 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2672}
2673
2674// Return the largest legal scalable vector type that matches VT's element type.
2676 const RISCVSubtarget &Subtarget) {
2677 // This may be called before legal types are setup.
2678 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2679 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2680 "Expected legal fixed length vector!");
2681
2682 unsigned MinVLen = Subtarget.getRealMinVLen();
2683 unsigned MaxELen = Subtarget.getELen();
2684
2685 MVT EltVT = VT.getVectorElementType();
2686 switch (EltVT.SimpleTy) {
2687 default:
2688 llvm_unreachable("unexpected element type for RVV container");
2689 case MVT::i1:
2690 case MVT::i8:
2691 case MVT::i16:
2692 case MVT::i32:
2693 case MVT::i64:
2694 case MVT::bf16:
2695 case MVT::f16:
2696 case MVT::f32:
2697 case MVT::f64: {
2698 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2699 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2700 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2701 unsigned NumElts =
2703 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2704 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2705 return MVT::getScalableVectorVT(EltVT, NumElts);
2706 }
2707 }
2708}
2709
2711 const RISCVSubtarget &Subtarget) {
2713 Subtarget);
2714}
2715
2717 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2718}
2719
2720// Grow V to consume an entire RVV register.
2722 const RISCVSubtarget &Subtarget) {
2723 assert(VT.isScalableVector() &&
2724 "Expected to convert into a scalable vector!");
2725 assert(V.getValueType().isFixedLengthVector() &&
2726 "Expected a fixed length vector operand!");
2727 SDLoc DL(V);
2728 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2729 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2730}
2731
2732// Shrink V so it's just big enough to maintain a VT's worth of data.
2734 const RISCVSubtarget &Subtarget) {
2736 "Expected to convert into a fixed length vector!");
2737 assert(V.getValueType().isScalableVector() &&
2738 "Expected a scalable vector operand!");
2739 SDLoc DL(V);
2740 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2741 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2742}
2743
2744/// Return the type of the mask type suitable for masking the provided
2745/// vector type. This is simply an i1 element type vector of the same
2746/// (possibly scalable) length.
2747static MVT getMaskTypeFor(MVT VecVT) {
2748 assert(VecVT.isVector());
2750 return MVT::getVectorVT(MVT::i1, EC);
2751}
2752
2753/// Creates an all ones mask suitable for masking a vector of type VecTy with
2754/// vector length VL. .
2755static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2756 SelectionDAG &DAG) {
2757 MVT MaskVT = getMaskTypeFor(VecVT);
2758 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2759}
2760
2761static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2762 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2763 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2764 // canonicalize the representation. InsertVSETVLI will pick the immediate
2765 // encoding later if profitable.
2766 const auto [MinVLMAX, MaxVLMAX] =
2767 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2768 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2769 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2770
2771 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2772}
2773
2774static std::pair<SDValue, SDValue>
2776 const RISCVSubtarget &Subtarget) {
2777 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2778 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2779 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2780 return {Mask, VL};
2781}
2782
2783static std::pair<SDValue, SDValue>
2784getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2785 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2786 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2787 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2788 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2789 return {Mask, VL};
2790}
2791
2792// Gets the two common "VL" operands: an all-ones mask and the vector length.
2793// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2794// the vector type that the fixed-length vector is contained in. Otherwise if
2795// VecVT is scalable, then ContainerVT should be the same as VecVT.
2796static std::pair<SDValue, SDValue>
2797getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2798 const RISCVSubtarget &Subtarget) {
2799 if (VecVT.isFixedLengthVector())
2800 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2801 Subtarget);
2802 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2803 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2804}
2805
2807 SelectionDAG &DAG) const {
2808 assert(VecVT.isScalableVector() && "Expected scalable vector");
2809 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2810 VecVT.getVectorElementCount());
2811}
2812
2813std::pair<unsigned, unsigned>
2815 const RISCVSubtarget &Subtarget) {
2816 assert(VecVT.isScalableVector() && "Expected scalable vector");
2817
2818 unsigned EltSize = VecVT.getScalarSizeInBits();
2819 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2820
2821 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2822 unsigned MaxVLMAX =
2823 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2824
2825 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2826 unsigned MinVLMAX =
2827 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2828
2829 return std::make_pair(MinVLMAX, MaxVLMAX);
2830}
2831
2832// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2833// of either is (currently) supported. This can get us into an infinite loop
2834// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2835// as a ..., etc.
2836// Until either (or both) of these can reliably lower any node, reporting that
2837// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2838// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2839// which is not desirable.
2841 EVT VT, unsigned DefinedValues) const {
2842 return false;
2843}
2844
2846 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2847 // implementation-defined.
2848 if (!VT.isVector())
2850 unsigned DLenFactor = Subtarget.getDLenFactor();
2851 unsigned Cost;
2852 if (VT.isScalableVector()) {
2853 unsigned LMul;
2854 bool Fractional;
2855 std::tie(LMul, Fractional) =
2857 if (Fractional)
2858 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2859 else
2860 Cost = (LMul * DLenFactor);
2861 } else {
2862 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2863 }
2864 return Cost;
2865}
2866
2867
2868/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2869/// is generally quadratic in the number of vreg implied by LMUL. Note that
2870/// operand (index and possibly mask) are handled separately.
2872 return getLMULCost(VT) * getLMULCost(VT);
2873}
2874
2875/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2876/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2877/// or may track the vrgather.vv cost. It is implementation-dependent.
2879 return getLMULCost(VT);
2880}
2881
2882/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2883/// for the type VT. (This does not cover the vslide1up or vslide1down
2884/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2885/// or may track the vrgather.vv cost. It is implementation-dependent.
2887 return getLMULCost(VT);
2888}
2889
2890/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2891/// for the type VT. (This does not cover the vslide1up or vslide1down
2892/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2893/// or may track the vrgather.vv cost. It is implementation-dependent.
2895 return getLMULCost(VT);
2896}
2897
2899 const RISCVSubtarget &Subtarget) {
2900 // RISC-V FP-to-int conversions saturate to the destination register size, but
2901 // don't produce 0 for nan. We can use a conversion instruction and fix the
2902 // nan case with a compare and a select.
2903 SDValue Src = Op.getOperand(0);
2904
2905 MVT DstVT = Op.getSimpleValueType();
2906 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2907
2908 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2909
2910 if (!DstVT.isVector()) {
2911 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2912 // the result.
2913 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2914 Src.getValueType() == MVT::bf16) {
2915 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2916 }
2917
2918 unsigned Opc;
2919 if (SatVT == DstVT)
2920 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2921 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2923 else
2924 return SDValue();
2925 // FIXME: Support other SatVTs by clamping before or after the conversion.
2926
2927 SDLoc DL(Op);
2928 SDValue FpToInt = DAG.getNode(
2929 Opc, DL, DstVT, Src,
2931
2932 if (Opc == RISCVISD::FCVT_WU_RV64)
2933 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2934
2935 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2936 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2938 }
2939
2940 // Vectors.
2941
2942 MVT DstEltVT = DstVT.getVectorElementType();
2943 MVT SrcVT = Src.getSimpleValueType();
2944 MVT SrcEltVT = SrcVT.getVectorElementType();
2945 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2946 unsigned DstEltSize = DstEltVT.getSizeInBits();
2947
2948 // Only handle saturating to the destination type.
2949 if (SatVT != DstEltVT)
2950 return SDValue();
2951
2952 MVT DstContainerVT = DstVT;
2953 MVT SrcContainerVT = SrcVT;
2954 if (DstVT.isFixedLengthVector()) {
2955 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2956 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2957 assert(DstContainerVT.getVectorElementCount() ==
2958 SrcContainerVT.getVectorElementCount() &&
2959 "Expected same element count");
2960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2961 }
2962
2963 SDLoc DL(Op);
2964
2965 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2966
2967 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2968 {Src, Src, DAG.getCondCode(ISD::SETNE),
2969 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2970
2971 // Need to widen by more than 1 step, promote the FP type, then do a widening
2972 // convert.
2973 if (DstEltSize > (2 * SrcEltSize)) {
2974 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2975 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2976 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2977 }
2978
2979 MVT CvtContainerVT = DstContainerVT;
2980 MVT CvtEltVT = DstEltVT;
2981 if (SrcEltSize > (2 * DstEltSize)) {
2982 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2983 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2984 }
2985
2986 unsigned RVVOpc =
2988 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2989
2990 while (CvtContainerVT != DstContainerVT) {
2991 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2992 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2993 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2994 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2996 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2997 }
2998
2999 SDValue SplatZero = DAG.getNode(
3000 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3001 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3002 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3003 Res, DAG.getUNDEF(DstContainerVT), VL);
3004
3005 if (DstVT.isFixedLengthVector())
3006 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3007
3008 return Res;
3009}
3010
3012 switch (Opc) {
3013 case ISD::FROUNDEVEN:
3015 case ISD::VP_FROUNDEVEN:
3016 return RISCVFPRndMode::RNE;
3017 case ISD::FTRUNC:
3018 case ISD::STRICT_FTRUNC:
3019 case ISD::VP_FROUNDTOZERO:
3020 return RISCVFPRndMode::RTZ;
3021 case ISD::FFLOOR:
3022 case ISD::STRICT_FFLOOR:
3023 case ISD::VP_FFLOOR:
3024 return RISCVFPRndMode::RDN;
3025 case ISD::FCEIL:
3026 case ISD::STRICT_FCEIL:
3027 case ISD::VP_FCEIL:
3028 return RISCVFPRndMode::RUP;
3029 case ISD::FROUND:
3030 case ISD::STRICT_FROUND:
3031 case ISD::VP_FROUND:
3032 return RISCVFPRndMode::RMM;
3033 case ISD::FRINT:
3034 return RISCVFPRndMode::DYN;
3035 }
3036
3038}
3039
3040// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3041// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3042// the integer domain and back. Taking care to avoid converting values that are
3043// nan or already correct.
3044static SDValue
3046 const RISCVSubtarget &Subtarget) {
3047 MVT VT = Op.getSimpleValueType();
3048 assert(VT.isVector() && "Unexpected type");
3049
3050 SDLoc DL(Op);
3051
3052 SDValue Src = Op.getOperand(0);
3053
3054 MVT ContainerVT = VT;
3055 if (VT.isFixedLengthVector()) {
3056 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3057 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3058 }
3059
3060 SDValue Mask, VL;
3061 if (Op->isVPOpcode()) {
3062 Mask = Op.getOperand(1);
3063 if (VT.isFixedLengthVector())
3064 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3065 Subtarget);
3066 VL = Op.getOperand(2);
3067 } else {
3068 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3069 }
3070
3071 // Freeze the source since we are increasing the number of uses.
3072 Src = DAG.getFreeze(Src);
3073
3074 // We do the conversion on the absolute value and fix the sign at the end.
3075 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3076
3077 // Determine the largest integer that can be represented exactly. This and
3078 // values larger than it don't have any fractional bits so don't need to
3079 // be converted.
3080 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3081 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3082 APFloat MaxVal = APFloat(FltSem);
3083 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3084 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3085 SDValue MaxValNode =
3086 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3087 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3088 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3089
3090 // If abs(Src) was larger than MaxVal or nan, keep it.
3091 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3092 Mask =
3093 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3094 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3095 Mask, Mask, VL});
3096
3097 // Truncate to integer and convert back to FP.
3098 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3099 MVT XLenVT = Subtarget.getXLenVT();
3100 SDValue Truncated;
3101
3102 switch (Op.getOpcode()) {
3103 default:
3104 llvm_unreachable("Unexpected opcode");
3105 case ISD::FCEIL:
3106 case ISD::VP_FCEIL:
3107 case ISD::FFLOOR:
3108 case ISD::VP_FFLOOR:
3109 case ISD::FROUND:
3110 case ISD::FROUNDEVEN:
3111 case ISD::VP_FROUND:
3112 case ISD::VP_FROUNDEVEN:
3113 case ISD::VP_FROUNDTOZERO: {
3116 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3117 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3118 break;
3119 }
3120 case ISD::FTRUNC:
3121 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3122 Mask, VL);
3123 break;
3124 case ISD::FRINT:
3125 case ISD::VP_FRINT:
3126 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3127 break;
3128 case ISD::FNEARBYINT:
3129 case ISD::VP_FNEARBYINT:
3130 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3131 Mask, VL);
3132 break;
3133 }
3134
3135 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3136 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3137 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3138 Mask, VL);
3139
3140 // Restore the original sign so that -0.0 is preserved.
3141 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3142 Src, Src, Mask, VL);
3143
3144 if (!VT.isFixedLengthVector())
3145 return Truncated;
3146
3147 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3148}
3149
3150// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3151// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3152// qNan and coverting the new source to integer and back to FP.
3153static SDValue
3155 const RISCVSubtarget &Subtarget) {
3156 SDLoc DL(Op);
3157 MVT VT = Op.getSimpleValueType();
3158 SDValue Chain = Op.getOperand(0);
3159 SDValue Src = Op.getOperand(1);
3160
3161 MVT ContainerVT = VT;
3162 if (VT.isFixedLengthVector()) {
3163 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3164 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3165 }
3166
3167 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3168
3169 // Freeze the source since we are increasing the number of uses.
3170 Src = DAG.getFreeze(Src);
3171
3172 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3173 MVT MaskVT = Mask.getSimpleValueType();
3175 DAG.getVTList(MaskVT, MVT::Other),
3176 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3177 DAG.getUNDEF(MaskVT), Mask, VL});
3178 Chain = Unorder.getValue(1);
3180 DAG.getVTList(ContainerVT, MVT::Other),
3181 {Chain, Src, Src, Src, Unorder, VL});
3182 Chain = Src.getValue(1);
3183
3184 // We do the conversion on the absolute value and fix the sign at the end.
3185 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3186
3187 // Determine the largest integer that can be represented exactly. This and
3188 // values larger than it don't have any fractional bits so don't need to
3189 // be converted.
3190 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3191 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3192 APFloat MaxVal = APFloat(FltSem);
3193 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3194 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3195 SDValue MaxValNode =
3196 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3197 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3198 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3199
3200 // If abs(Src) was larger than MaxVal or nan, keep it.
3201 Mask = DAG.getNode(
3202 RISCVISD::SETCC_VL, DL, MaskVT,
3203 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3204
3205 // Truncate to integer and convert back to FP.
3206 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3207 MVT XLenVT = Subtarget.getXLenVT();
3208 SDValue Truncated;
3209
3210 switch (Op.getOpcode()) {
3211 default:
3212 llvm_unreachable("Unexpected opcode");
3213 case ISD::STRICT_FCEIL:
3214 case ISD::STRICT_FFLOOR:
3215 case ISD::STRICT_FROUND:
3219 Truncated = DAG.getNode(
3220 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3221 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3222 break;
3223 }
3224 case ISD::STRICT_FTRUNC:
3225 Truncated =
3227 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3228 break;
3231 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3232 Mask, VL);
3233 break;
3234 }
3235 Chain = Truncated.getValue(1);
3236
3237 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3238 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3239 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3240 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3241 Truncated, Mask, VL);
3242 Chain = Truncated.getValue(1);
3243 }
3244
3245 // Restore the original sign so that -0.0 is preserved.
3246 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3247 Src, Src, Mask, VL);
3248
3249 if (VT.isFixedLengthVector())
3250 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3251 return DAG.getMergeValues({Truncated, Chain}, DL);
3252}
3253
3254static SDValue
3256 const RISCVSubtarget &Subtarget) {
3257 MVT VT = Op.getSimpleValueType();
3258 if (VT.isVector())
3259 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3260
3261 if (DAG.shouldOptForSize())
3262 return SDValue();
3263
3264 SDLoc DL(Op);
3265 SDValue Src = Op.getOperand(0);
3266
3267 // Create an integer the size of the mantissa with the MSB set. This and all
3268 // values larger than it don't have any fractional bits so don't need to be
3269 // converted.
3270 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3271 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3272 APFloat MaxVal = APFloat(FltSem);
3273 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3274 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3275 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3276
3278 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3279 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3280}
3281
3282// Expand vector LRINT and LLRINT by converting to the integer domain.
3284 const RISCVSubtarget &Subtarget) {
3285 MVT VT = Op.getSimpleValueType();
3286 assert(VT.isVector() && "Unexpected type");
3287
3288 SDLoc DL(Op);
3289 SDValue Src = Op.getOperand(0);
3290 MVT ContainerVT = VT;
3291
3292 if (VT.isFixedLengthVector()) {
3293 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3294 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3295 }
3296
3297 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3298 SDValue Truncated =
3299 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3300
3301 if (!VT.isFixedLengthVector())
3302 return Truncated;
3303
3304 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3305}
3306
3307static SDValue
3309 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3310 SDValue Offset, SDValue Mask, SDValue VL,
3312 if (Merge.isUndef())
3314 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3315 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3316 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3317}
3318
3319static SDValue
3320getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3322 SDValue VL,
3324 if (Merge.isUndef())
3326 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3327 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3328 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3329}
3330
3331static MVT getLMUL1VT(MVT VT) {
3333 "Unexpected vector MVT");
3337}
3338
3342 int64_t Addend;
3343};
3344
3345static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3347 // We will use a SINT_TO_FP to materialize this constant so we should use a
3348 // signed APSInt here.
3349 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3350 // We use an arbitrary rounding mode here. If a floating-point is an exact
3351 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3352 // the rounding mode changes the output value, then it is not an exact
3353 // integer.
3355 bool IsExact;
3356 // If it is out of signed integer range, it will return an invalid operation.
3357 // If it is not an exact integer, IsExact is false.
3358 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3360 !IsExact)
3361 return std::nullopt;
3362 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3363}
3364
3365// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3366// to the (non-zero) step S and start value X. This can be then lowered as the
3367// RVV sequence (VID * S) + X, for example.
3368// The step S is represented as an integer numerator divided by a positive
3369// denominator. Note that the implementation currently only identifies
3370// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3371// cannot detect 2/3, for example.
3372// Note that this method will also match potentially unappealing index
3373// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3374// determine whether this is worth generating code for.
3375static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3376 unsigned EltSizeInBits) {
3377 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3378 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3379 return std::nullopt;
3380 bool IsInteger = Op.getValueType().isInteger();
3381
3382 std::optional<unsigned> SeqStepDenom;
3383 std::optional<int64_t> SeqStepNum, SeqAddend;
3384 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3385 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3386
3387 // First extract the ops into a list of constant integer values. This may not
3388 // be possible for floats if they're not all representable as integers.
3390 const unsigned OpSize = Op.getScalarValueSizeInBits();
3391 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3392 if (Elt.isUndef()) {
3393 Elts[Idx] = std::nullopt;
3394 continue;
3395 }
3396 if (IsInteger) {
3397 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3398 } else {
3399 auto ExactInteger =
3400 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3401 if (!ExactInteger)
3402 return std::nullopt;
3403 Elts[Idx] = *ExactInteger;
3404 }
3405 }
3406
3407 for (auto [Idx, Elt] : enumerate(Elts)) {
3408 // Assume undef elements match the sequence; we just have to be careful
3409 // when interpolating across them.
3410 if (!Elt)
3411 continue;
3412
3413 if (PrevElt) {
3414 // Calculate the step since the last non-undef element, and ensure
3415 // it's consistent across the entire sequence.
3416 unsigned IdxDiff = Idx - PrevElt->second;
3417 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3418
3419 // A zero-value value difference means that we're somewhere in the middle
3420 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3421 // step change before evaluating the sequence.
3422 if (ValDiff == 0)
3423 continue;
3424
3425 int64_t Remainder = ValDiff % IdxDiff;
3426 // Normalize the step if it's greater than 1.
3427 if (Remainder != ValDiff) {
3428 // The difference must cleanly divide the element span.
3429 if (Remainder != 0)
3430 return std::nullopt;
3431 ValDiff /= IdxDiff;
3432 IdxDiff = 1;
3433 }
3434
3435 if (!SeqStepNum)
3436 SeqStepNum = ValDiff;
3437 else if (ValDiff != SeqStepNum)
3438 return std::nullopt;
3439
3440 if (!SeqStepDenom)
3441 SeqStepDenom = IdxDiff;
3442 else if (IdxDiff != *SeqStepDenom)
3443 return std::nullopt;
3444 }
3445
3446 // Record this non-undef element for later.
3447 if (!PrevElt || PrevElt->first != *Elt)
3448 PrevElt = std::make_pair(*Elt, Idx);
3449 }
3450
3451 // We need to have logged a step for this to count as a legal index sequence.
3452 if (!SeqStepNum || !SeqStepDenom)
3453 return std::nullopt;
3454
3455 // Loop back through the sequence and validate elements we might have skipped
3456 // while waiting for a valid step. While doing this, log any sequence addend.
3457 for (auto [Idx, Elt] : enumerate(Elts)) {
3458 if (!Elt)
3459 continue;
3460 uint64_t ExpectedVal =
3461 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3462 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3463 if (!SeqAddend)
3464 SeqAddend = Addend;
3465 else if (Addend != SeqAddend)
3466 return std::nullopt;
3467 }
3468
3469 assert(SeqAddend && "Must have an addend if we have a step");
3470
3471 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3472}
3473
3474// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3475// and lower it as a VRGATHER_VX_VL from the source vector.
3476static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3477 SelectionDAG &DAG,
3478 const RISCVSubtarget &Subtarget) {
3479 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3480 return SDValue();
3481 SDValue Vec = SplatVal.getOperand(0);
3482 // Only perform this optimization on vectors of the same size for simplicity.
3483 // Don't perform this optimization for i1 vectors.
3484 // FIXME: Support i1 vectors, maybe by promoting to i8?
3485 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3486 return SDValue();
3487 SDValue Idx = SplatVal.getOperand(1);
3488 // The index must be a legal type.
3489 if (Idx.getValueType() != Subtarget.getXLenVT())
3490 return SDValue();
3491
3492 MVT ContainerVT = VT;
3493 if (VT.isFixedLengthVector()) {
3494 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3495 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3496 }
3497
3498 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3499
3500 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3501 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3502
3503 if (!VT.isFixedLengthVector())
3504 return Gather;
3505
3506 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3507}
3508
3509
3510/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3511/// which constitute a large proportion of the elements. In such cases we can
3512/// splat a vector with the dominant element and make up the shortfall with
3513/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3514/// Note that this includes vectors of 2 elements by association. The
3515/// upper-most element is the "dominant" one, allowing us to use a splat to
3516/// "insert" the upper element, and an insert of the lower element at position
3517/// 0, which improves codegen.
3519 const RISCVSubtarget &Subtarget) {
3520 MVT VT = Op.getSimpleValueType();
3521 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3522
3523 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3524
3525 SDLoc DL(Op);
3526 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3527
3528 MVT XLenVT = Subtarget.getXLenVT();
3529 unsigned NumElts = Op.getNumOperands();
3530
3531 SDValue DominantValue;
3532 unsigned MostCommonCount = 0;
3533 DenseMap<SDValue, unsigned> ValueCounts;
3534 unsigned NumUndefElts =
3535 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3536
3537 // Track the number of scalar loads we know we'd be inserting, estimated as
3538 // any non-zero floating-point constant. Other kinds of element are either
3539 // already in registers or are materialized on demand. The threshold at which
3540 // a vector load is more desirable than several scalar materializion and
3541 // vector-insertion instructions is not known.
3542 unsigned NumScalarLoads = 0;
3543
3544 for (SDValue V : Op->op_values()) {
3545 if (V.isUndef())
3546 continue;
3547
3548 ValueCounts.insert(std::make_pair(V, 0));
3549 unsigned &Count = ValueCounts[V];
3550 if (0 == Count)
3551 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3552 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3553
3554 // Is this value dominant? In case of a tie, prefer the highest element as
3555 // it's cheaper to insert near the beginning of a vector than it is at the
3556 // end.
3557 if (++Count >= MostCommonCount) {
3558 DominantValue = V;
3559 MostCommonCount = Count;
3560 }
3561 }
3562
3563 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3564 unsigned NumDefElts = NumElts - NumUndefElts;
3565 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3566
3567 // Don't perform this optimization when optimizing for size, since
3568 // materializing elements and inserting them tends to cause code bloat.
3569 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3570 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3571 ((MostCommonCount > DominantValueCountThreshold) ||
3572 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3573 // Start by splatting the most common element.
3574 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3575
3576 DenseSet<SDValue> Processed{DominantValue};
3577
3578 // We can handle an insert into the last element (of a splat) via
3579 // v(f)slide1down. This is slightly better than the vslideup insert
3580 // lowering as it avoids the need for a vector group temporary. It
3581 // is also better than using vmerge.vx as it avoids the need to
3582 // materialize the mask in a vector register.
3583 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3584 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3585 LastOp != DominantValue) {
3586 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3587 auto OpCode =
3589 if (!VT.isFloatingPoint())
3590 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3591 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3592 LastOp, Mask, VL);
3593 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3594 Processed.insert(LastOp);
3595 }
3596
3597 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3598 for (const auto &OpIdx : enumerate(Op->ops())) {
3599 const SDValue &V = OpIdx.value();
3600 if (V.isUndef() || !Processed.insert(V).second)
3601 continue;
3602 if (ValueCounts[V] == 1) {
3603 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3604 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3605 } else {
3606 // Blend in all instances of this value using a VSELECT, using a
3607 // mask where each bit signals whether that element is the one
3608 // we're after.
3610 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3611 return DAG.getConstant(V == V1, DL, XLenVT);
3612 });
3613 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3614 DAG.getBuildVector(SelMaskTy, DL, Ops),
3615 DAG.getSplatBuildVector(VT, DL, V), Vec);
3616 }
3617 }
3618
3619 return Vec;
3620 }
3621
3622 return SDValue();
3623}
3624
3626 const RISCVSubtarget &Subtarget) {
3627 MVT VT = Op.getSimpleValueType();
3628 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3629
3630 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3631
3632 SDLoc DL(Op);
3633 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3634
3635 MVT XLenVT = Subtarget.getXLenVT();
3636 unsigned NumElts = Op.getNumOperands();
3637
3638 if (VT.getVectorElementType() == MVT::i1) {
3639 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3640 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3641 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3642 }
3643
3644 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3645 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3646 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3647 }
3648
3649 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3650 // scalar integer chunks whose bit-width depends on the number of mask
3651 // bits and XLEN.
3652 // First, determine the most appropriate scalar integer type to use. This
3653 // is at most XLenVT, but may be shrunk to a smaller vector element type
3654 // according to the size of the final vector - use i8 chunks rather than
3655 // XLenVT if we're producing a v8i1. This results in more consistent
3656 // codegen across RV32 and RV64.
3657 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3658 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3659 // If we have to use more than one INSERT_VECTOR_ELT then this
3660 // optimization is likely to increase code size; avoid peforming it in
3661 // such a case. We can use a load from a constant pool in this case.
3662 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3663 return SDValue();
3664 // Now we can create our integer vector type. Note that it may be larger
3665 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3666 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3667 MVT IntegerViaVecVT =
3668 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3669 IntegerViaVecElts);
3670
3671 uint64_t Bits = 0;
3672 unsigned BitPos = 0, IntegerEltIdx = 0;
3673 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3674
3675 for (unsigned I = 0; I < NumElts;) {
3676 SDValue V = Op.getOperand(I);
3677 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3678 Bits |= ((uint64_t)BitValue << BitPos);
3679 ++BitPos;
3680 ++I;
3681
3682 // Once we accumulate enough bits to fill our scalar type or process the
3683 // last element, insert into our vector and clear our accumulated data.
3684 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3685 if (NumViaIntegerBits <= 32)
3686 Bits = SignExtend64<32>(Bits);
3687 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3688 Elts[IntegerEltIdx] = Elt;
3689 Bits = 0;
3690 BitPos = 0;
3691 IntegerEltIdx++;
3692 }
3693 }
3694
3695 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3696
3697 if (NumElts < NumViaIntegerBits) {
3698 // If we're producing a smaller vector than our minimum legal integer
3699 // type, bitcast to the equivalent (known-legal) mask type, and extract
3700 // our final mask.
3701 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3702 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3703 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3704 DAG.getConstant(0, DL, XLenVT));
3705 } else {
3706 // Else we must have produced an integer type with the same size as the
3707 // mask type; bitcast for the final result.
3708 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3709 Vec = DAG.getBitcast(VT, Vec);
3710 }
3711
3712 return Vec;
3713 }
3714
3715 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3716 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3718 if (!VT.isFloatingPoint())
3719 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3720 Splat =
3721 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3722 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3723 }
3724
3725 // Try and match index sequences, which we can lower to the vid instruction
3726 // with optional modifications. An all-undef vector is matched by
3727 // getSplatValue, above.
3728 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3729 int64_t StepNumerator = SimpleVID->StepNumerator;
3730 unsigned StepDenominator = SimpleVID->StepDenominator;
3731 int64_t Addend = SimpleVID->Addend;
3732
3733 assert(StepNumerator != 0 && "Invalid step");
3734 bool Negate = false;
3735 int64_t SplatStepVal = StepNumerator;
3736 unsigned StepOpcode = ISD::MUL;
3737 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3738 // anyway as the shift of 63 won't fit in uimm5.
3739 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3740 isPowerOf2_64(std::abs(StepNumerator))) {
3741 Negate = StepNumerator < 0;
3742 StepOpcode = ISD::SHL;
3743 SplatStepVal = Log2_64(std::abs(StepNumerator));
3744 }
3745
3746 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3747 // threshold since it's the immediate value many RVV instructions accept.
3748 // There is no vmul.vi instruction so ensure multiply constant can fit in
3749 // a single addi instruction.
3750 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3751 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3752 isPowerOf2_32(StepDenominator) &&
3753 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3754 MVT VIDVT =
3756 MVT VIDContainerVT =
3757 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3758 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3759 // Convert right out of the scalable type so we can use standard ISD
3760 // nodes for the rest of the computation. If we used scalable types with
3761 // these, we'd lose the fixed-length vector info and generate worse
3762 // vsetvli code.
3763 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3764 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3765 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3766 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3767 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3768 }
3769 if (StepDenominator != 1) {
3770 SDValue SplatStep =
3771 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3772 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3773 }
3774 if (Addend != 0 || Negate) {
3775 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3776 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3777 VID);
3778 }
3779 if (VT.isFloatingPoint()) {
3780 // TODO: Use vfwcvt to reduce register pressure.
3781 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3782 }
3783 return VID;
3784 }
3785 }
3786
3787 // For very small build_vectors, use a single scalar insert of a constant.
3788 // TODO: Base this on constant rematerialization cost, not size.
3789 const unsigned EltBitSize = VT.getScalarSizeInBits();
3790 if (VT.getSizeInBits() <= 32 &&
3792 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3793 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3794 "Unexpected sequence type");
3795 // If we can use the original VL with the modified element type, this
3796 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3797 // be moved into InsertVSETVLI?
3798 unsigned ViaVecLen =
3799 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3800 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3801
3802 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3803 uint64_t SplatValue = 0;
3804 // Construct the amalgamated value at this larger vector type.
3805 for (const auto &OpIdx : enumerate(Op->op_values())) {
3806 const auto &SeqV = OpIdx.value();
3807 if (!SeqV.isUndef())
3808 SplatValue |=
3809 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3810 }
3811
3812 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3813 // achieve better constant materializion.
3814 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3815 SplatValue = SignExtend64<32>(SplatValue);
3816
3817 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3818 DAG.getUNDEF(ViaVecVT),
3819 DAG.getConstant(SplatValue, DL, XLenVT),
3820 DAG.getVectorIdxConstant(0, DL));
3821 if (ViaVecLen != 1)
3823 MVT::getVectorVT(ViaIntVT, 1), Vec,
3824 DAG.getConstant(0, DL, XLenVT));
3825 return DAG.getBitcast(VT, Vec);
3826 }
3827
3828
3829 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3830 // when re-interpreted as a vector with a larger element type. For example,
3831 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3832 // could be instead splat as
3833 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3834 // TODO: This optimization could also work on non-constant splats, but it
3835 // would require bit-manipulation instructions to construct the splat value.
3836 SmallVector<SDValue> Sequence;
3837 const auto *BV = cast<BuildVectorSDNode>(Op);
3838 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3840 BV->getRepeatedSequence(Sequence) &&
3841 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3842 unsigned SeqLen = Sequence.size();
3843 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3844 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3845 ViaIntVT == MVT::i64) &&
3846 "Unexpected sequence type");
3847
3848 // If we can use the original VL with the modified element type, this
3849 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3850 // be moved into InsertVSETVLI?
3851 const unsigned RequiredVL = NumElts / SeqLen;
3852 const unsigned ViaVecLen =
3853 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3854 NumElts : RequiredVL;
3855 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3856
3857 unsigned EltIdx = 0;
3858 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3859 uint64_t SplatValue = 0;
3860 // Construct the amalgamated value which can be splatted as this larger
3861 // vector type.
3862 for (const auto &SeqV : Sequence) {
3863 if (!SeqV.isUndef())
3864 SplatValue |=
3865 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3866 EltIdx++;
3867 }
3868
3869 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3870 // achieve better constant materializion.
3871 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3872 SplatValue = SignExtend64<32>(SplatValue);
3873
3874 // Since we can't introduce illegal i64 types at this stage, we can only
3875 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3876 // way we can use RVV instructions to splat.
3877 assert((ViaIntVT.bitsLE(XLenVT) ||
3878 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3879 "Unexpected bitcast sequence");
3880 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3881 SDValue ViaVL =
3882 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3883 MVT ViaContainerVT =
3884 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3885 SDValue Splat =
3886 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3887 DAG.getUNDEF(ViaContainerVT),
3888 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3889 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3890 if (ViaVecLen != RequiredVL)
3892 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3893 DAG.getConstant(0, DL, XLenVT));
3894 return DAG.getBitcast(VT, Splat);
3895 }
3896 }
3897
3898 // If the number of signbits allows, see if we can lower as a <N x i8>.
3899 // Our main goal here is to reduce LMUL (and thus work) required to
3900 // build the constant, but we will also narrow if the resulting
3901 // narrow vector is known to materialize cheaply.
3902 // TODO: We really should be costing the smaller vector. There are
3903 // profitable cases this misses.
3904 if (EltBitSize > 8 && VT.isInteger() &&
3905 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3906 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3907 if (EltBitSize - SignBits < 8) {
3908 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3909 DL, Op->ops());
3910 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3911 Source, DAG, Subtarget);
3912 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3913 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3914 }
3915 }
3916
3917 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3918 return Res;
3919
3920 // For constant vectors, use generic constant pool lowering. Otherwise,
3921 // we'd have to materialize constants in GPRs just to move them into the
3922 // vector.
3923 return SDValue();
3924}
3925
3926static unsigned getPACKOpcode(unsigned DestBW,
3927 const RISCVSubtarget &Subtarget) {
3928 switch (DestBW) {
3929 default:
3930 llvm_unreachable("Unsupported pack size");
3931 case 16:
3932 return RISCV::PACKH;
3933 case 32:
3934 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3935 case 64:
3936 assert(Subtarget.is64Bit());
3937 return RISCV::PACK;
3938 }
3939}
3940
3941/// Double the element size of the build vector to reduce the number
3942/// of vslide1down in the build vector chain. In the worst case, this
3943/// trades three scalar operations for 1 vector operation. Scalar
3944/// operations are generally lower latency, and for out-of-order cores
3945/// we also benefit from additional parallelism.
3947 const RISCVSubtarget &Subtarget) {
3948 SDLoc DL(Op);
3949 MVT VT = Op.getSimpleValueType();
3950 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3951 MVT ElemVT = VT.getVectorElementType();
3952 if (!ElemVT.isInteger())
3953 return SDValue();
3954
3955 // TODO: Relax these architectural restrictions, possibly with costing
3956 // of the actual instructions required.
3957 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3958 return SDValue();
3959
3960 unsigned NumElts = VT.getVectorNumElements();
3961 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
3962 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
3963 NumElts % 2 != 0)
3964 return SDValue();
3965
3966 // Produce [B,A] packed into a type twice as wide. Note that all
3967 // scalars are XLenVT, possibly masked (see below).
3968 MVT XLenVT = Subtarget.getXLenVT();
3969 SDValue Mask = DAG.getConstant(
3970 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
3971 auto pack = [&](SDValue A, SDValue B) {
3972 // Bias the scheduling of the inserted operations to near the
3973 // definition of the element - this tends to reduce register
3974 // pressure overall.
3975 SDLoc ElemDL(B);
3976 if (Subtarget.hasStdExtZbkb())
3977 // Note that we're relying on the high bits of the result being
3978 // don't care. For PACKW, the result is *sign* extended.
3979 return SDValue(
3980 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
3981 ElemDL, XLenVT, A, B),
3982 0);
3983
3984 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
3985 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
3986 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
3987 SDNodeFlags Flags;
3988 Flags.setDisjoint(true);
3989 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
3990 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);
3991 };
3992
3993 SmallVector<SDValue> NewOperands;
3994 NewOperands.reserve(NumElts / 2);
3995 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
3996 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
3997 assert(NumElts == NewOperands.size() * 2);
3998 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
3999 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4000 return DAG.getNode(ISD::BITCAST, DL, VT,
4001 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4002}
4003
4004// Convert to an vXf16 build_vector to vXi16 with bitcasts.
4006 MVT VT = Op.getSimpleValueType();
4007 MVT IVT = VT.changeVectorElementType(MVT::i16);
4009 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
4010 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4011 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
4012 return DAG.getBitcast(VT, Res);
4013}
4014
4016 const RISCVSubtarget &Subtarget) {
4017 MVT VT = Op.getSimpleValueType();
4018 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4019
4020 // If we don't have scalar f16, we need to bitcast to an i16 vector.
4021 if (VT.getVectorElementType() == MVT::f16 &&
4022 !Subtarget.hasStdExtZfhmin())
4023 return lowerBUILD_VECTORvXf16(Op, DAG);
4024
4025 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4027 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4028
4029 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4030
4031 SDLoc DL(Op);
4032 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4033
4034 MVT XLenVT = Subtarget.getXLenVT();
4035
4036 if (VT.getVectorElementType() == MVT::i1) {
4037 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4038 // vector type, we have a legal equivalently-sized i8 type, so we can use
4039 // that.
4040 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4041 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4042
4043 SDValue WideVec;
4044 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4045 // For a splat, perform a scalar truncate before creating the wider
4046 // vector.
4047 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4048 DAG.getConstant(1, DL, Splat.getValueType()));
4049 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4050 } else {
4051 SmallVector<SDValue, 8> Ops(Op->op_values());
4052 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4053 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4054 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4055 }
4056
4057 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4058 }
4059
4060 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4061 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4062 return Gather;
4063 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4065 if (!VT.isFloatingPoint())
4066 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4067 Splat =
4068 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4069 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4070 }
4071
4072 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4073 return Res;
4074
4075 // If we're compiling for an exact VLEN value, we can split our work per
4076 // register in the register group.
4077 if (const auto VLen = Subtarget.getRealVLen();
4078 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4079 MVT ElemVT = VT.getVectorElementType();
4080 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4081 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4082 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4083 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4084 assert(M1VT == getLMUL1VT(M1VT));
4085
4086 // The following semantically builds up a fixed length concat_vector
4087 // of the component build_vectors. We eagerly lower to scalable and
4088 // insert_subvector here to avoid DAG combining it back to a large
4089 // build_vector.
4090 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
4091 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4092 SDValue Vec = DAG.getUNDEF(ContainerVT);
4093 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4094 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4095 SDValue SubBV =
4096 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4097 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4098 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4099 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4100 DAG.getVectorIdxConstant(InsertIdx, DL));
4101 }
4102 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4103 }
4104
4105 // If we're about to resort to vslide1down (or stack usage), pack our
4106 // elements into the widest scalar type we can. This will force a VL/VTYPE
4107 // toggle, but reduces the critical path, the number of vslide1down ops
4108 // required, and possibly enables scalar folds of the values.
4109 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4110 return Res;
4111
4112 // For m1 vectors, if we have non-undef values in both halves of our vector,
4113 // split the vector into low and high halves, build them separately, then
4114 // use a vselect to combine them. For long vectors, this cuts the critical
4115 // path of the vslide1down sequence in half, and gives us an opportunity
4116 // to special case each half independently. Note that we don't change the
4117 // length of the sub-vectors here, so if both fallback to the generic
4118 // vslide1down path, we should be able to fold the vselect into the final
4119 // vslidedown (for the undef tail) for the first half w/ masking.
4120 unsigned NumElts = VT.getVectorNumElements();
4121 unsigned NumUndefElts =
4122 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4123 unsigned NumDefElts = NumElts - NumUndefElts;
4124 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4125 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4126 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4127 SmallVector<SDValue> MaskVals;
4128 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4129 SubVecAOps.reserve(NumElts);
4130 SubVecBOps.reserve(NumElts);
4131 for (unsigned i = 0; i < NumElts; i++) {
4132 SDValue Elem = Op->getOperand(i);
4133 if (i < NumElts / 2) {
4134 SubVecAOps.push_back(Elem);
4135 SubVecBOps.push_back(UndefElem);
4136 } else {
4137 SubVecAOps.push_back(UndefElem);
4138 SubVecBOps.push_back(Elem);
4139 }
4140 bool SelectMaskVal = (i < NumElts / 2);
4141 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4142 }
4143 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4144 MaskVals.size() == NumElts);
4145
4146 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4147 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4148 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4149 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4150 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4151 }
4152
4153 // Cap the cost at a value linear to the number of elements in the vector.
4154 // The default lowering is to use the stack. The vector store + scalar loads
4155 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4156 // being (at least) linear in LMUL. As a result, using the vslidedown
4157 // lowering for every element ends up being VL*LMUL..
4158 // TODO: Should we be directly costing the stack alternative? Doing so might
4159 // give us a more accurate upper bound.
4160 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4161
4162 // TODO: unify with TTI getSlideCost.
4163 InstructionCost PerSlideCost = 1;
4164 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4165 default: break;
4167 PerSlideCost = 2;
4168 break;
4170 PerSlideCost = 4;
4171 break;
4173 PerSlideCost = 8;
4174 break;
4175 }
4176
4177 // TODO: Should we be using the build instseq then cost + evaluate scheme
4178 // we use for integer constants here?
4179 unsigned UndefCount = 0;
4180 for (const SDValue &V : Op->ops()) {
4181 if (V.isUndef()) {
4182 UndefCount++;
4183 continue;
4184 }
4185 if (UndefCount) {
4186 LinearBudget -= PerSlideCost;
4187 UndefCount = 0;
4188 }
4189 LinearBudget -= PerSlideCost;
4190 }
4191 if (UndefCount) {
4192 LinearBudget -= PerSlideCost;
4193 }
4194
4195 if (LinearBudget < 0)
4196 return SDValue();
4197
4198 assert((!VT.isFloatingPoint() ||
4199 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4200 "Illegal type which will result in reserved encoding");
4201
4202 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4203
4204 SDValue Vec;
4205 UndefCount = 0;
4206 for (SDValue V : Op->ops()) {
4207 if (V.isUndef()) {
4208 UndefCount++;
4209 continue;
4210 }
4211
4212 // Start our sequence with a TA splat in the hopes that hardware is able to
4213 // recognize there's no dependency on the prior value of our temporary
4214 // register.
4215 if (!Vec) {
4216 Vec = DAG.getSplatVector(VT, DL, V);
4217 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4218 UndefCount = 0;
4219 continue;
4220 }
4221
4222 if (UndefCount) {
4223 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4224 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4225 Vec, Offset, Mask, VL, Policy);
4226 UndefCount = 0;
4227 }
4228 auto OpCode =
4230 if (!VT.isFloatingPoint())
4231 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4232 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4233 V, Mask, VL);
4234 }
4235 if (UndefCount) {
4236 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4237 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4238 Vec, Offset, Mask, VL, Policy);
4239 }
4240 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4241}
4242
4243static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4245 SelectionDAG &DAG) {
4246 if (!Passthru)
4247 Passthru = DAG.getUNDEF(VT);
4248 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4249 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4250 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4251 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4252 // node in order to try and match RVV vector/scalar instructions.
4253 if ((LoC >> 31) == HiC)
4254 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4255
4256 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4257 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4258 // vlmax vsetvli or vsetivli to change the VL.
4259 // FIXME: Support larger constants?
4260 // FIXME: Support non-constant VLs by saturating?
4261 if (LoC == HiC) {
4262 SDValue NewVL;
4263 if (isAllOnesConstant(VL) ||
4264 (isa<RegisterSDNode>(VL) &&
4265 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4266 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4267 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4268 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4269
4270 if (NewVL) {
4271 MVT InterVT =
4272 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4273 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4274 DAG.getUNDEF(InterVT), Lo, NewVL);
4275 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4276 }
4277 }
4278 }
4279
4280 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4281 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4282 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4283 Hi.getConstantOperandVal(1) == 31)
4284 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4285
4286 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4287 // even if it might be sign extended.
4288 if (Hi.isUndef())
4289 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4290
4291 // Fall back to a stack store and stride x0 vector load.
4292 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4293 Hi, VL);
4294}
4295
4296// Called by type legalization to handle splat of i64 on RV32.
4297// FIXME: We can optimize this when the type has sign or zero bits in one
4298// of the halves.
4299static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4300 SDValue Scalar, SDValue VL,
4301 SelectionDAG &DAG) {
4302 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4303 SDValue Lo, Hi;
4304 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4305 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4306}
4307
4308// This function lowers a splat of a scalar operand Splat with the vector
4309// length VL. It ensures the final sequence is type legal, which is useful when
4310// lowering a splat after type legalization.
4311static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4312 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4313 const RISCVSubtarget &Subtarget) {
4314 bool HasPassthru = Passthru && !Passthru.isUndef();
4315 if (!HasPassthru && !Passthru)
4316 Passthru = DAG.getUNDEF(VT);
4317 if (VT.isFloatingPoint())
4318 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4319
4320 MVT XLenVT = Subtarget.getXLenVT();
4321
4322 // Simplest case is that the operand needs to be promoted to XLenVT.
4323 if (Scalar.getValueType().bitsLE(XLenVT)) {
4324 // If the operand is a constant, sign extend to increase our chances
4325 // of being able to use a .vi instruction. ANY_EXTEND would become a
4326 // a zero extend and the simm5 check in isel would fail.
4327 // FIXME: Should we ignore the upper bits in isel instead?
4328 unsigned ExtOpc =
4329 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4330 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4331 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4332 }
4333
4334 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4335 "Unexpected scalar for splat lowering!");
4336
4337 if (isOneConstant(VL) && isNullConstant(Scalar))
4338 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4339 DAG.getConstant(0, DL, XLenVT), VL);
4340
4341 // Otherwise use the more complicated splatting algorithm.
4342 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4343}
4344
4345// This function lowers an insert of a scalar operand Scalar into lane
4346// 0 of the vector regardless of the value of VL. The contents of the
4347// remaining lanes of the result vector are unspecified. VL is assumed
4348// to be non-zero.
4350 const SDLoc &DL, SelectionDAG &DAG,
4351 const RISCVSubtarget &Subtarget) {
4352 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4353
4354 const MVT XLenVT = Subtarget.getXLenVT();
4355 SDValue Passthru = DAG.getUNDEF(VT);
4356
4357 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4358 isNullConstant(Scalar.getOperand(1))) {
4359 SDValue ExtractedVal = Scalar.getOperand(0);
4360 // The element types must be the same.
4361 if (ExtractedVal.getValueType().getVectorElementType() ==
4362 VT.getVectorElementType()) {
4363 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4364 MVT ExtractedContainerVT = ExtractedVT;
4365 if (ExtractedContainerVT.isFixedLengthVector()) {
4366 ExtractedContainerVT = getContainerForFixedLengthVector(
4367 DAG, ExtractedContainerVT, Subtarget);
4368 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4369 ExtractedVal, DAG, Subtarget);
4370 }
4371 if (ExtractedContainerVT.bitsLE(VT))
4372 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4373 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4374 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4375 DAG.getVectorIdxConstant(0, DL));
4376 }
4377 }
4378
4379
4380 if (VT.isFloatingPoint())
4381 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4382 DAG.getUNDEF(VT), Scalar, VL);
4383
4384 // Avoid the tricky legalization cases by falling back to using the
4385 // splat code which already handles it gracefully.
4386 if (!Scalar.getValueType().bitsLE(XLenVT))
4387 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4388 DAG.getConstant(1, DL, XLenVT),
4389 VT, DL, DAG, Subtarget);
4390
4391 // If the operand is a constant, sign extend to increase our chances
4392 // of being able to use a .vi instruction. ANY_EXTEND would become a
4393 // a zero extend and the simm5 check in isel would fail.
4394 // FIXME: Should we ignore the upper bits in isel instead?
4395 unsigned ExtOpc =
4396 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4397 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4398 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4399 DAG.getUNDEF(VT), Scalar, VL);
4400}
4401
4402// Is this a shuffle extracts either the even or odd elements of a vector?
4403// That is, specifically, either (a) or (b) below.
4404// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4405// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4406// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4407// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4408// Returns {Src Vector, Even Elements} om success
4409static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4410 SDValue V2, ArrayRef<int> Mask,
4411 const RISCVSubtarget &Subtarget) {
4412 // Need to be able to widen the vector.
4413 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4414 return false;
4415
4416 // Both input must be extracts.
4417 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4418 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4419 return false;
4420
4421 // Extracting from the same source.
4422 SDValue Src = V1.getOperand(0);
4423 if (Src != V2.getOperand(0))
4424 return false;
4425
4426 // Src needs to have twice the number of elements.
4427 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4428 return false;
4429
4430 // The extracts must extract the two halves of the source.
4431 if (V1.getConstantOperandVal(1) != 0 ||
4432 V2.getConstantOperandVal(1) != Mask.size())
4433 return false;
4434
4435 // First index must be the first even or odd element from V1.
4436 if (Mask[0] != 0 && Mask[0] != 1)
4437 return false;
4438
4439 // The others must increase by 2 each time.
4440 // TODO: Support undef elements?
4441 for (unsigned i = 1; i != Mask.size(); ++i)
4442 if (Mask[i] != Mask[i - 1] + 2)
4443 return false;
4444
4445 return true;
4446}
4447
4448/// Is this shuffle interleaving contiguous elements from one vector into the
4449/// even elements and contiguous elements from another vector into the odd
4450/// elements. \p EvenSrc will contain the element that should be in the first
4451/// even element. \p OddSrc will contain the element that should be in the first
4452/// odd element. These can be the first element in a source or the element half
4453/// way through the source.
4454static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4455 int &OddSrc, const RISCVSubtarget &Subtarget) {
4456 // We need to be able to widen elements to the next larger integer type.
4457 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4458 return false;
4459
4460 int Size = Mask.size();
4461 int NumElts = VT.getVectorNumElements();
4462 assert(Size == (int)NumElts && "Unexpected mask size");
4463
4464 SmallVector<unsigned, 2> StartIndexes;
4465 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4466 return false;
4467
4468 EvenSrc = StartIndexes[0];
4469 OddSrc = StartIndexes[1];
4470
4471 // One source should be low half of first vector.
4472 if (EvenSrc != 0 && OddSrc != 0)
4473 return false;
4474
4475 // Subvectors will be subtracted from either at the start of the two input
4476 // vectors, or at the start and middle of the first vector if it's an unary
4477 // interleave.
4478 // In both cases, HalfNumElts will be extracted.
4479 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4480 // we'll create an illegal extract_subvector.
4481 // FIXME: We could support other values using a slidedown first.
4482 int HalfNumElts = NumElts / 2;
4483 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4484}
4485
4486/// Match shuffles that concatenate two vectors, rotate the concatenation,
4487/// and then extract the original number of elements from the rotated result.
4488/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4489/// returned rotation amount is for a rotate right, where elements move from
4490/// higher elements to lower elements. \p LoSrc indicates the first source
4491/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4492/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4493/// 0 or 1 if a rotation is found.
4494///
4495/// NOTE: We talk about rotate to the right which matches how bit shift and
4496/// rotate instructions are described where LSBs are on the right, but LLVM IR
4497/// and the table below write vectors with the lowest elements on the left.
4498static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4499 int Size = Mask.size();
4500
4501 // We need to detect various ways of spelling a rotation:
4502 // [11, 12, 13, 14, 15, 0, 1, 2]
4503 // [-1, 12, 13, 14, -1, -1, 1, -1]
4504 // [-1, -1, -1, -1, -1, -1, 1, 2]
4505 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4506 // [-1, 4, 5, 6, -1, -1, 9, -1]
4507 // [-1, 4, 5, 6, -1, -1, -1, -1]
4508 int Rotation = 0;
4509 LoSrc = -1;
4510 HiSrc = -1;
4511 for (int i = 0; i != Size; ++i) {
4512 int M = Mask[i];
4513 if (M < 0)
4514 continue;
4515
4516 // Determine where a rotate vector would have started.
4517 int StartIdx = i - (M % Size);
4518 // The identity rotation isn't interesting, stop.
4519 if (StartIdx == 0)
4520 return -1;
4521
4522 // If we found the tail of a vector the rotation must be the missing
4523 // front. If we found the head of a vector, it must be how much of the
4524 // head.
4525 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4526
4527 if (Rotation == 0)
4528 Rotation = CandidateRotation;
4529 else if (Rotation != CandidateRotation)
4530 // The rotations don't match, so we can't match this mask.
4531 return -1;
4532
4533 // Compute which value this mask is pointing at.
4534 int MaskSrc = M < Size ? 0 : 1;
4535
4536 // Compute which of the two target values this index should be assigned to.
4537 // This reflects whether the high elements are remaining or the low elemnts
4538 // are remaining.
4539 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4540
4541 // Either set up this value if we've not encountered it before, or check
4542 // that it remains consistent.
4543 if (TargetSrc < 0)
4544 TargetSrc = MaskSrc;
4545 else if (TargetSrc != MaskSrc)
4546 // This may be a rotation, but it pulls from the inputs in some
4547 // unsupported interleaving.
4548 return -1;
4549 }
4550
4551 // Check that we successfully analyzed the mask, and normalize the results.
4552 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4553 assert((LoSrc >= 0 || HiSrc >= 0) &&
4554 "Failed to find a rotated input vector!");
4555
4556 return Rotation;
4557}
4558
4559// Lower a deinterleave shuffle to vnsrl.
4560// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4561// -> [p, q, r, s] (EvenElts == false)
4562// VT is the type of the vector to return, <[vscale x ]n x ty>
4563// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4565 bool EvenElts,
4566 const RISCVSubtarget &Subtarget,
4567 SelectionDAG &DAG) {
4568 // The result is a vector of type <m x n x ty>
4569 MVT ContainerVT = VT;
4570 // Convert fixed vectors to scalable if needed
4571 if (ContainerVT.isFixedLengthVector()) {
4572 assert(Src.getSimpleValueType().isFixedLengthVector());
4573 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4574
4575 // The source is a vector of type <m x n*2 x ty>
4576 MVT SrcContainerVT =
4578 ContainerVT.getVectorElementCount() * 2);
4579 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4580 }
4581
4582 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4583
4584 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4585 // This also converts FP to int.
4586 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4587 MVT WideSrcContainerVT = MVT::getVectorVT(
4588 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4589 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4590
4591 // The integer version of the container type.
4592 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4593
4594 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4595 // the original element size.
4596 unsigned Shift = EvenElts ? 0 : EltBits;
4597 SDValue SplatShift = DAG.getNode(
4598 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4599 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4600 SDValue Res =
4601 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4602 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4603 // Cast back to FP if needed.
4604 Res = DAG.getBitcast(ContainerVT, Res);
4605
4606 if (VT.isFixedLengthVector())
4607 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4608 return Res;
4609}
4610
4611// Lower the following shuffle to vslidedown.
4612// a)
4613// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4614// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4615// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4616// b)
4617// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4618// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4619// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4620// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4621// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4622// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4624 SDValue V1, SDValue V2,
4625 ArrayRef<int> Mask,
4626 const RISCVSubtarget &Subtarget,
4627 SelectionDAG &DAG) {
4628 auto findNonEXTRACT_SUBVECTORParent =
4629 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4630 uint64_t Offset = 0;
4631 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4632 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4633 // a scalable vector. But we don't want to match the case.
4634 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4635 Offset += Parent.getConstantOperandVal(1);
4636 Parent = Parent.getOperand(0);
4637 }
4638 return std::make_pair(Parent, Offset);
4639 };
4640
4641 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4642 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4643
4644 // Extracting from the same source.
4645 SDValue Src = V1Src;
4646 if (Src != V2Src)
4647 return SDValue();
4648
4649 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4650 SmallVector<int, 16> NewMask(Mask);
4651 for (size_t i = 0; i != NewMask.size(); ++i) {
4652 if (NewMask[i] == -1)
4653 continue;
4654
4655 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4656 NewMask[i] = NewMask[i] + V1IndexOffset;
4657 } else {
4658 // Minus NewMask.size() is needed. Otherwise, the b case would be
4659 // <5,6,7,12> instead of <5,6,7,8>.
4660 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4661 }
4662 }
4663
4664 // First index must be known and non-zero. It will be used as the slidedown
4665 // amount.
4666 if (NewMask[0] <= 0)
4667 return SDValue();
4668
4669 // NewMask is also continuous.
4670 for (unsigned i = 1; i != NewMask.size(); ++i)
4671 if (NewMask[i - 1] + 1 != NewMask[i])
4672 return SDValue();
4673
4674 MVT XLenVT = Subtarget.getXLenVT();
4675 MVT SrcVT = Src.getSimpleValueType();
4676 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4677 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4678 SDValue Slidedown =
4679 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4680 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4681 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4682 return DAG.getNode(
4684 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4685 DAG.getConstant(0, DL, XLenVT));
4686}
4687
4688// Because vslideup leaves the destination elements at the start intact, we can
4689// use it to perform shuffles that insert subvectors:
4690//
4691// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4692// ->
4693// vsetvli zero, 8, e8, mf2, ta, ma
4694// vslideup.vi v8, v9, 4
4695//
4696// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4697// ->
4698// vsetvli zero, 5, e8, mf2, tu, ma
4699// vslideup.v1 v8, v9, 2
4701 SDValue V1, SDValue V2,
4702 ArrayRef<int> Mask,
4703 const RISCVSubtarget &Subtarget,
4704 SelectionDAG &DAG) {
4705 unsigned NumElts = VT.getVectorNumElements();
4706 int NumSubElts, Index;
4707 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4708 Index))
4709 return SDValue();
4710
4711 bool OpsSwapped = Mask[Index] < (int)NumElts;
4712 SDValue InPlace = OpsSwapped ? V2 : V1;
4713 SDValue ToInsert = OpsSwapped ? V1 : V2;
4714
4715 MVT XLenVT = Subtarget.getXLenVT();
4716 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4717 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4718 // We slide up by the index that the subvector is being inserted at, and set
4719 // VL to the index + the number of elements being inserted.
4721 // If the we're adding a suffix to the in place vector, i.e. inserting right
4722 // up to the very end of it, then we don't actually care about the tail.
4723 if (NumSubElts + Index >= (int)NumElts)
4724 Policy |= RISCVII::TAIL_AGNOSTIC;
4725
4726 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4727 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4728 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4729
4730 SDValue Res;
4731 // If we're inserting into the lowest elements, use a tail undisturbed
4732 // vmv.v.v.
4733 if (Index == 0)
4734 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4735 VL);
4736 else
4737 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4738 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4739 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4740}
4741
4742/// Match v(f)slide1up/down idioms. These operations involve sliding
4743/// N-1 elements to make room for an inserted scalar at one end.
4745 SDValue V1, SDValue V2,
4746 ArrayRef<int> Mask,
4747 const RISCVSubtarget &Subtarget,
4748 SelectionDAG &DAG) {
4749 bool OpsSwapped = false;
4750 if (!isa<BuildVectorSDNode>(V1)) {
4751 if (!isa<BuildVectorSDNode>(V2))
4752 return SDValue();
4753 std::swap(V1, V2);
4754 OpsSwapped = true;
4755 }
4756 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4757 if (!Splat)
4758 return SDValue();
4759
4760 // Return true if the mask could describe a slide of Mask.size() - 1
4761 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4762 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4763 const unsigned S = (Offset > 0) ? 0 : -Offset;
4764 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4765 for (unsigned i = S; i != E; ++i)
4766 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4767 return false;
4768 return true;
4769 };
4770
4771 const unsigned NumElts = VT.getVectorNumElements();
4772 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4773 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4774 return SDValue();
4775
4776 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4777 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4778 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4779 return SDValue();
4780
4781 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4782 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4783 auto OpCode = IsVSlidedown ?
4786 if (!VT.isFloatingPoint())
4787 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4788 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4789 DAG.getUNDEF(ContainerVT),
4790 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4791 Splat, TrueMask, VL);
4792 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4793}
4794
4795// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4796// to create an interleaved vector of <[vscale x] n*2 x ty>.
4797// This requires that the size of ty is less than the subtarget's maximum ELEN.
4799 const SDLoc &DL, SelectionDAG &DAG,
4800 const RISCVSubtarget &Subtarget) {
4801 MVT VecVT = EvenV.getSimpleValueType();
4802 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4803 // Convert fixed vectors to scalable if needed
4804 if (VecContainerVT.isFixedLengthVector()) {
4805 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4806 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4807 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4808 }
4809
4810 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4811
4812 // We're working with a vector of the same size as the resulting
4813 // interleaved vector, but with half the number of elements and
4814 // twice the SEW (Hence the restriction on not using the maximum
4815 // ELEN)
4816 MVT WideVT =
4818 VecVT.getVectorElementCount());
4819 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4820 if (WideContainerVT.isFixedLengthVector())
4821 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4822
4823 // Bitcast the input vectors to integers in case they are FP
4824 VecContainerVT = VecContainerVT.changeTypeToInteger();
4825 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4826 OddV = DAG.getBitcast(VecContainerVT, OddV);
4827
4828 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4829 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4830
4831 SDValue Interleaved;
4832 if (OddV.isUndef()) {
4833 // If OddV is undef, this is a zero extend.
4834 // FIXME: Not only does this optimize the code, it fixes some correctness
4835 // issues because MIR does not have freeze.
4836 Interleaved =
4837 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4838 } else if (Subtarget.hasStdExtZvbb()) {
4839 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4840 SDValue OffsetVec =
4841 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4842 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4843 OffsetVec, Passthru, Mask, VL);
4844 if (!EvenV.isUndef())
4845 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4846 Interleaved, EvenV, Passthru, Mask, VL);
4847 } else if (EvenV.isUndef()) {
4848 Interleaved =
4849 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4850
4851 SDValue OffsetVec =
4852 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4853 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4854 Interleaved, OffsetVec, Passthru, Mask, VL);
4855 } else {
4856 // FIXME: We should freeze the odd vector here. We already handled the case
4857 // of provably undef/poison above.
4858
4859 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4860 // vwaddu.vv
4861 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4862 OddV, Passthru, Mask, VL);
4863
4864 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4865 SDValue AllOnesVec = DAG.getSplatVector(
4866 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4867 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4868 OddV, AllOnesVec, Passthru, Mask, VL);
4869
4870 // Add the two together so we get
4871 // (OddV * 0xff...ff) + (OddV + EvenV)
4872 // = (OddV * 0x100...00) + EvenV
4873 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4874 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4875 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4876 Interleaved, OddsMul, Passthru, Mask, VL);
4877 }
4878
4879 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4880 MVT ResultContainerVT = MVT::getVectorVT(
4881 VecVT.getVectorElementType(), // Make sure to use original type
4882 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4883 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4884
4885 // Convert back to a fixed vector if needed
4886 MVT ResultVT =
4889 if (ResultVT.isFixedLengthVector())
4890 Interleaved =
4891 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4892
4893 return Interleaved;
4894}
4895
4896// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4897// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4899 SelectionDAG &DAG,
4900 const RISCVSubtarget &Subtarget) {
4901 SDLoc DL(SVN);
4902 MVT VT = SVN->getSimpleValueType(0);
4903 SDValue V = SVN->getOperand(0);
4904 unsigned NumElts = VT.getVectorNumElements();
4905
4906 assert(VT.getVectorElementType() == MVT::i1);
4907
4909 SVN->getMask().size()) ||
4910 !SVN->getOperand(1).isUndef())
4911 return SDValue();
4912
4913 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4914 EVT ViaVT = EVT::getVectorVT(
4915 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4916 EVT ViaBitVT =
4917 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4918
4919 // If we don't have zvbb or the larger element type > ELEN, the operation will
4920 // be illegal.
4922 ViaVT) ||
4923 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4924 return SDValue();
4925
4926 // If the bit vector doesn't fit exactly into the larger element type, we need
4927 // to insert it into the larger vector and then shift up the reversed bits
4928 // afterwards to get rid of the gap introduced.
4929 if (ViaEltSize > NumElts)
4930 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4931 V, DAG.getVectorIdxConstant(0, DL));
4932
4933 SDValue Res =
4934 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4935
4936 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4937 // element type.
4938 if (ViaEltSize > NumElts)
4939 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4940 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4941
4942 Res = DAG.getBitcast(ViaBitVT, Res);
4943
4944 if (ViaEltSize > NumElts)
4945 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4946 DAG.getVectorIdxConstant(0, DL));
4947 return Res;
4948}
4949
4951 SelectionDAG &DAG,
4952 const RISCVSubtarget &Subtarget,
4953 MVT &RotateVT, unsigned &RotateAmt) {
4954 SDLoc DL(SVN);
4955
4956 EVT VT = SVN->getValueType(0);
4957 unsigned NumElts = VT.getVectorNumElements();
4958 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4959 unsigned NumSubElts;
4960 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4961 NumElts, NumSubElts, RotateAmt))
4962 return false;
4963 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4964 NumElts / NumSubElts);
4965
4966 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4967 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4968}
4969
4970// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4971// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4972// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4974 SelectionDAG &DAG,
4975 const RISCVSubtarget &Subtarget) {
4976 SDLoc DL(SVN);
4977
4978 EVT VT = SVN->getValueType(0);
4979 unsigned RotateAmt;
4980 MVT RotateVT;
4981 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4982 return SDValue();
4983
4984 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4985
4986 SDValue Rotate;
4987 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4988 // so canonicalize to vrev8.
4989 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4990 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4991 else
4992 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4993 DAG.getConstant(RotateAmt, DL, RotateVT));
4994
4995 return DAG.getBitcast(VT, Rotate);
4996}
4997
4998// If compiling with an exactly known VLEN, see if we can split a
4999// shuffle on m2 or larger into a small number of m1 sized shuffles
5000// which write each destination registers exactly once.
5002 SelectionDAG &DAG,
5003 const RISCVSubtarget &Subtarget) {
5004 SDLoc DL(SVN);
5005 MVT VT = SVN->getSimpleValueType(0);
5006 SDValue V1 = SVN->getOperand(0);
5007 SDValue V2 = SVN->getOperand(1);
5008 ArrayRef<int> Mask = SVN->getMask();
5009 unsigned NumElts = VT.getVectorNumElements();
5010
5011 // If we don't know exact data layout, not much we can do. If this
5012 // is already m1 or smaller, no point in splitting further.
5013 const auto VLen = Subtarget.getRealVLen();
5014 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5015 return SDValue();
5016
5017 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5018 // expansion for.
5019 unsigned RotateAmt;
5020 MVT RotateVT;
5021 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5022 return SDValue();
5023
5024 MVT ElemVT = VT.getVectorElementType();
5025 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5026 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5027
5029 OutMasks(VRegsPerSrc, {-1, {}});
5030
5031 // Check if our mask can be done as a 1-to-1 mapping from source
5032 // to destination registers in the group without needing to
5033 // write each destination more than once.
5034 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5035 int DstVecIdx = DstIdx / ElemsPerVReg;
5036 int DstSubIdx = DstIdx % ElemsPerVReg;
5037 int SrcIdx = Mask[DstIdx];
5038 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5039 continue;
5040 int SrcVecIdx = SrcIdx / ElemsPerVReg;
5041 int SrcSubIdx = SrcIdx % ElemsPerVReg;
5042 if (OutMasks[DstVecIdx].first == -1)
5043 OutMasks[DstVecIdx].first = SrcVecIdx;
5044 if (OutMasks[DstVecIdx].first != SrcVecIdx)
5045 // Note: This case could easily be handled by keeping track of a chain
5046 // of source values and generating two element shuffles below. This is
5047 // less an implementation question, and more a profitability one.
5048 return SDValue();
5049
5050 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5051 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5052 }
5053
5054 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5055 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5056 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5057 assert(M1VT == getLMUL1VT(M1VT));
5058 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5059 SDValue Vec = DAG.getUNDEF(ContainerVT);
5060 // The following semantically builds up a fixed length concat_vector
5061 // of the component shuffle_vectors. We eagerly lower to scalable here
5062 // to avoid DAG combining it back to a large shuffle_vector again.
5063 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5064 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5065 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5066 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5067 if (SrcVecIdx == -1)
5068 continue;
5069 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5070 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5071 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5072 DAG.getVectorIdxConstant(ExtractIdx, DL));
5073 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5074 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5075 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5076 unsigned InsertIdx = DstVecIdx * NumOpElts;
5077 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5078 DAG.getVectorIdxConstant(InsertIdx, DL));
5079 }
5080 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5081}
5082
5084 const RISCVSubtarget &Subtarget) {
5085 SDValue V1 = Op.getOperand(0);
5086 SDValue V2 = Op.getOperand(1);
5087 SDLoc DL(Op);
5088 MVT XLenVT = Subtarget.getXLenVT();
5089 MVT VT = Op.getSimpleValueType();
5090 unsigned NumElts = VT.getVectorNumElements();
5091 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5092
5093 if (VT.getVectorElementType() == MVT::i1) {
5094 // Lower to a vror.vi of a larger element type if possible before we promote
5095 // i1s to i8s.
5096 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5097 return V;
5098 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5099 return V;
5100
5101 // Promote i1 shuffle to i8 shuffle.
5102 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5103 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5104 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5105 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5106 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5107 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5108 ISD::SETNE);
5109 }
5110
5111 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5112
5113 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5114
5115 if (SVN->isSplat()) {
5116 const int Lane = SVN->getSplatIndex();
5117 if (Lane >= 0) {
5118 MVT SVT = VT.getVectorElementType();
5119
5120 // Turn splatted vector load into a strided load with an X0 stride.
5121 SDValue V = V1;
5122 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5123 // with undef.
5124 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5125 int Offset = Lane;
5126 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5127 int OpElements =
5128 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5129 V = V.getOperand(Offset / OpElements);
5130 Offset %= OpElements;
5131 }
5132
5133 // We need to ensure the load isn't atomic or volatile.
5134 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5135 auto *Ld = cast<LoadSDNode>(V);
5136 Offset *= SVT.getStoreSize();
5137 SDValue NewAddr = DAG.getMemBasePlusOffset(
5138 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5139
5140 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5141 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5142 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5143 SDValue IntID =
5144 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5145 SDValue Ops[] = {Ld->getChain(),
5146 IntID,
5147 DAG.getUNDEF(ContainerVT),
5148 NewAddr,
5149 DAG.getRegister(RISCV::X0, XLenVT),
5150 VL};
5151 SDValue NewLoad = DAG.getMemIntrinsicNode(
5152 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5154 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5155 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5156 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5157 }
5158
5159 MVT SplatVT = ContainerVT;
5160
5161 // If we don't have Zfh, we need to use an integer scalar load.
5162 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5163 SVT = MVT::i16;
5164 SplatVT = ContainerVT.changeVectorElementType(SVT);
5165 }
5166
5167 // Otherwise use a scalar load and splat. This will give the best
5168 // opportunity to fold a splat into the operation. ISel can turn it into
5169 // the x0 strided load if we aren't able to fold away the select.
5170 if (SVT.isFloatingPoint())
5171 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5172 Ld->getPointerInfo().getWithOffset(Offset),
5173 Ld->getOriginalAlign(),
5174 Ld->getMemOperand()->getFlags());
5175 else
5176 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5177 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5178 Ld->getOriginalAlign(),
5179 Ld->getMemOperand()->getFlags());
5181
5182 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5184 SDValue Splat =
5185 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5186 Splat = DAG.getBitcast(ContainerVT, Splat);
5187 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5188 }
5189
5190 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5191 assert(Lane < (int)NumElts && "Unexpected lane!");
5192 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5193 V1, DAG.getConstant(Lane, DL, XLenVT),
5194 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5195 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5196 }
5197 }
5198
5199 // For exact VLEN m2 or greater, try to split to m1 operations if we
5200 // can split cleanly.
5201 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5202 return V;
5203
5204 ArrayRef<int> Mask = SVN->getMask();
5205
5206 if (SDValue V =
5207 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5208 return V;
5209
5210 if (SDValue V =
5211 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5212 return V;
5213
5214 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5215 // available.
5216 if (Subtarget.hasStdExtZvkb())
5217 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5218 return V;
5219
5220 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5221 // be undef which can be handled with a single SLIDEDOWN/UP.
5222 int LoSrc, HiSrc;
5223 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5224 if (Rotation > 0) {
5225 SDValue LoV, HiV;
5226 if (LoSrc >= 0) {
5227 LoV = LoSrc == 0 ? V1 : V2;
5228 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5229 }
5230 if (HiSrc >= 0) {
5231 HiV = HiSrc == 0 ? V1 : V2;
5232 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5233 }
5234
5235 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5236 // to slide LoV up by (NumElts - Rotation).
5237 unsigned InvRotate = NumElts - Rotation;
5238
5239 SDValue Res = DAG.getUNDEF(ContainerVT);
5240 if (HiV) {
5241 // Even though we could use a smaller VL, don't to avoid a vsetivli
5242 // toggle.
5243 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5244 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5245 }
5246 if (LoV)
5247 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5248 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5250
5251 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5252 }
5253
5254 // If this is a deinterleave and we can widen the vector, then we can use
5255 // vnsrl to deinterleave.
5256 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5257 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5258 Subtarget, DAG);
5259 }
5260
5261 if (SDValue V =
5262 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5263 return V;
5264
5265 // Detect an interleave shuffle and lower to
5266 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5267 int EvenSrc, OddSrc;
5268 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5269 // Extract the halves of the vectors.
5270 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5271
5272 int Size = Mask.size();
5273 SDValue EvenV, OddV;
5274 assert(EvenSrc >= 0 && "Undef source?");
5275 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5276 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5277 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5278
5279 assert(OddSrc >= 0 && "Undef source?");
5280 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5281 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5282 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5283
5284 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5285 }
5286
5287
5288 // Handle any remaining single source shuffles
5289 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5290 if (V2.isUndef()) {
5291 // We might be able to express the shuffle as a bitrotate. But even if we
5292 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5293 // shifts and a vor will have a higher throughput than a vrgather.
5294 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5295 return V;
5296
5297 if (VT.getScalarSizeInBits() == 8 &&
5298 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5299 // On such a vector we're unable to use i8 as the index type.
5300 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5301 // may involve vector splitting if we're already at LMUL=8, or our
5302 // user-supplied maximum fixed-length LMUL.
5303 return SDValue();
5304 }
5305
5306 // Base case for the two operand recursion below - handle the worst case
5307 // single source shuffle.
5308 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5309 MVT IndexVT = VT.changeTypeToInteger();
5310 // Since we can't introduce illegal index types at this stage, use i16 and
5311 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5312 // than XLenVT.
5313 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5314 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5315 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5316 }
5317
5318 // If the mask allows, we can do all the index computation in 16 bits. This
5319 // requires less work and less register pressure at high LMUL, and creates
5320 // smaller constants which may be cheaper to materialize.
5321 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5322 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5323 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5324 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5325 }
5326
5327 MVT IndexContainerVT =
5328 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5329
5330 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5331 SmallVector<SDValue> GatherIndicesLHS;
5332 for (int MaskIndex : Mask) {
5333 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5334 GatherIndicesLHS.push_back(IsLHSIndex
5335 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5336 : DAG.getUNDEF(XLenVT));
5337 }
5338 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5339 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5340 Subtarget);
5341 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5342 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5343 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5344 }
5345
5346 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5347 // merged with a second vrgather.
5348 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5349
5350 // Now construct the mask that will be used by the blended vrgather operation.
5351 // Construct the appropriate indices into each vector.
5352 for (int MaskIndex : Mask) {
5353 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5354 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5355 ? MaskIndex : -1);
5356 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5357 }
5358
5359 // Try to pick a profitable operand order.
5360 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5361 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5362
5363 // Recursively invoke lowering for each operand if we had two
5364 // independent single source shuffles, and then combine the result via a
5365 // vselect. Note that the vselect will likely be folded back into the
5366 // second permute (vrgather, or other) by the post-isel combine.
5367 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5368 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5369
5370 SmallVector<SDValue> MaskVals;
5371 for (int MaskIndex : Mask) {
5372 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5373 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5374 }
5375
5376 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5377 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5378 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5379
5380 if (SwapOps)
5381 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5382 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5383}
5384
5386 // Support splats for any type. These should type legalize well.
5387 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5388 return true;
5389
5390 // Only support legal VTs for other shuffles for now.
5391 if (!isTypeLegal(VT))
5392 return false;
5393
5394 MVT SVT = VT.getSimpleVT();
5395
5396 // Not for i1 vectors.
5397 if (SVT.getScalarType() == MVT::i1)
5398 return false;
5399
5400 int Dummy1, Dummy2;
5401 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5402 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5403}
5404
5405// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5406// the exponent.
5407SDValue
5408RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5409 SelectionDAG &DAG) const {
5410 MVT VT = Op.getSimpleValueType();
5411 unsigned EltSize = VT.getScalarSizeInBits();
5412 SDValue Src = Op.getOperand(0);
5413 SDLoc DL(Op);
5414 MVT ContainerVT = VT;
5415
5416 SDValue Mask, VL;
5417 if (Op->isVPOpcode()) {
5418 Mask = Op.getOperand(1);
5419 if (VT.isFixedLengthVector())
5420 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5421 Subtarget);
5422 VL = Op.getOperand(2);
5423 }
5424
5425 // We choose FP type that can represent the value if possible. Otherwise, we
5426 // use rounding to zero conversion for correct exponent of the result.
5427 // TODO: Use f16 for i8 when possible?
5428 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5429 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5430 FloatEltVT = MVT::f32;
5431 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5432
5433 // Legal types should have been checked in the RISCVTargetLowering
5434 // constructor.
5435 // TODO: Splitting may make sense in some cases.
5436 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5437 "Expected legal float type!");
5438
5439 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5440 // The trailing zero count is equal to log2 of this single bit value.
5441 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5442 SDValue Neg = DAG.getNegative(Src, DL, VT);
5443 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5444 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5445 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5446 Src, Mask, VL);
5447 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5448 }
5449
5450 // We have a legal FP type, convert to it.
5451 SDValue FloatVal;
5452 if (FloatVT.bitsGT(VT)) {
5453 if (Op->isVPOpcode())
5454 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5455 else
5456 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5457 } else {
5458 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5459 if (VT.isFixedLengthVector()) {
5460 ContainerVT = getContainerForFixedLengthVector(VT);
5461 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5462 }
5463 if (!Op->isVPOpcode())
5464 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5465 SDValue RTZRM =
5467 MVT ContainerFloatVT =
5468 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5469 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5470 Src, Mask, RTZRM, VL);
5471 if (VT.isFixedLengthVector())
5472 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5473 }
5474 // Bitcast to integer and shift the exponent to the LSB.
5475 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5476 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5477 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5478
5479 SDValue Exp;
5480 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5481 if (Op->isVPOpcode()) {
5482 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5483 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5484 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5485 } else {
5486 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5487 DAG.getConstant(ShiftAmt, DL, IntVT));
5488 if (IntVT.bitsLT(VT))
5489 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5490 else if (IntVT.bitsGT(VT))
5491 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5492 }
5493
5494 // The exponent contains log2 of the value in biased form.
5495 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5496 // For trailing zeros, we just need to subtract the bias.
5497 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5498 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5499 DAG.getConstant(ExponentBias, DL, VT));
5500 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5501 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5502 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5503
5504 // For leading zeros, we need to remove the bias and convert from log2 to
5505 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5506 unsigned Adjust = ExponentBias + (EltSize - 1);
5507 SDValue Res;
5508 if (Op->isVPOpcode())
5509 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5510 Mask, VL);
5511 else
5512 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5513
5514 // The above result with zero input equals to Adjust which is greater than
5515 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5516 if (Op.getOpcode() == ISD::CTLZ)
5517 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5518 else if (Op.getOpcode() == ISD::VP_CTLZ)
5519 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5520 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5521 return Res;
5522}
5523
5524SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5525 SelectionDAG &DAG) const {
5526 SDLoc DL(Op);
5527 MVT XLenVT = Subtarget.getXLenVT();
5528 SDValue Source = Op->getOperand(0);
5529 MVT SrcVT = Source.getSimpleValueType();
5530 SDValue Mask = Op->getOperand(1);
5531 SDValue EVL = Op->getOperand(2);
5532
5533 if (SrcVT.isFixedLengthVector()) {
5534 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5535 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5536 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5537 Subtarget);
5538 SrcVT = ContainerVT;
5539 }
5540
5541 // Convert to boolean vector.
5542 if (SrcVT.getScalarType() != MVT::i1) {
5543 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5544 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5545 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5546 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5547 DAG.getUNDEF(SrcVT), Mask, EVL});
5548 }
5549
5550 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5551 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5552 // In this case, we can interpret poison as -1, so nothing to do further.
5553 return Res;
5554
5555 // Convert -1 to VL.
5556 SDValue SetCC =
5557 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5558 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5559 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5560}
5561
5562// While RVV has alignment restrictions, we should always be able to load as a
5563// legal equivalently-sized byte-typed vector instead. This method is
5564// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5565// the load is already correctly-aligned, it returns SDValue().
5566SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5567 SelectionDAG &DAG) const {
5568 auto *Load = cast<LoadSDNode>(Op);
5569 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5570
5572 Load->getMemoryVT(),
5573 *Load->getMemOperand()))
5574 return SDValue();
5575
5576 SDLoc DL(Op);
5577 MVT VT = Op.getSimpleValueType();
5578 unsigned EltSizeBits = VT.getScalarSizeInBits();
5579 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5580 "Unexpected unaligned RVV load type");
5581 MVT NewVT =
5582 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5583 assert(NewVT.isValid() &&
5584 "Expecting equally-sized RVV vector types to be legal");
5585 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5586 Load->getPointerInfo(), Load->getOriginalAlign(),
5587 Load->getMemOperand()->getFlags());
5588 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5589}
5590
5591// While RVV has alignment restrictions, we should always be able to store as a
5592// legal equivalently-sized byte-typed vector instead. This method is
5593// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5594// returns SDValue() if the store is already correctly aligned.
5595SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5596 SelectionDAG &DAG) const {
5597 auto *Store = cast<StoreSDNode>(Op);
5598 assert(Store && Store->getValue().getValueType().isVector() &&
5599 "Expected vector store");
5600
5602 Store->getMemoryVT(),
5603 *Store->getMemOperand()))
5604 return SDValue();
5605
5606 SDLoc DL(Op);
5607 SDValue StoredVal = Store->getValue();
5608 MVT VT = StoredVal.getSimpleValueType();
5609 unsigned EltSizeBits = VT.getScalarSizeInBits();
5610 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5611 "Unexpected unaligned RVV store type");
5612 MVT NewVT =
5613 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5614 assert(NewVT.isValid() &&
5615 "Expecting equally-sized RVV vector types to be legal");
5616 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5617 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5618 Store->getPointerInfo(), Store->getOriginalAlign(),
5619 Store->getMemOperand()->getFlags());
5620}
5621
5623 const RISCVSubtarget &Subtarget) {
5624 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5625
5626 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5627
5628 // All simm32 constants should be handled by isel.
5629 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5630 // this check redundant, but small immediates are common so this check
5631 // should have better compile time.
5632 if (isInt<32>(Imm))
5633 return Op;
5634
5635 // We only need to cost the immediate, if constant pool lowering is enabled.
5636 if (!Subtarget.useConstantPoolForLargeInts())
5637 return Op;
5638
5640 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5641 return Op;
5642
5643 // Optimizations below are disabled for opt size. If we're optimizing for
5644 // size, use a constant pool.
5645 if (DAG.shouldOptForSize())
5646 return SDValue();
5647
5648 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5649 // that if it will avoid a constant pool.
5650 // It will require an extra temporary register though.
5651 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5652 // low and high 32 bits are the same and bit 31 and 63 are set.
5653 unsigned ShiftAmt, AddOpc;
5654 RISCVMatInt::InstSeq SeqLo =
5655 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5656 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5657 return Op;
5658
5659 return SDValue();
5660}
5661
5663 const RISCVSubtarget &Subtarget) {
5664 SDLoc dl(Op);
5665 AtomicOrdering FenceOrdering =
5666 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5667 SyncScope::ID FenceSSID =
5668 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5669
5670 if (Subtarget.hasStdExtZtso()) {
5671 // The only fence that needs an instruction is a sequentially-consistent
5672 // cross-thread fence.
5673 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5674 FenceSSID == SyncScope::System)
5675 return Op;
5676
5677 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5678 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5679 }
5680
5681 // singlethread fences only synchronize with signal handlers on the same
5682 // thread and thus only need to preserve instruction order, not actually
5683 // enforce memory ordering.
5684 if (FenceSSID == SyncScope::SingleThread)
5685 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5686 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5687
5688 return Op;
5689}
5690
5692 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5693 "Unexpected custom legalisation");
5694
5695 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5696 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5697 SDLoc DL(Op);
5698 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5699 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5700 SDValue Result =
5701 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5702
5703 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5704 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5705 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5706 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5707 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5708 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5709 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5710}
5711
5713 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5714 "Unexpected custom legalisation");
5715
5716 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5717 // sign extend allows overflow of the lower 32 bits to be detected on
5718 // the promoted size.
5719 SDLoc DL(Op);
5720 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5721 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5722 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5723 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5724}
5725
5726// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5728 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5729 "Unexpected custom legalisation");
5730 if (isa<ConstantSDNode>(Op.getOperand(1)))
5731 return SDValue();
5732
5733 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5734 SDLoc DL(Op);
5735 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5736 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5737 SDValue WideOp =
5738 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5739 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5740 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5741 DAG.getValueType(MVT::i32));
5742 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5743 ISD::SETNE);
5744 return DAG.getMergeValues({Res, Ovf}, DL);
5745}
5746
5747// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5749 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5750 "Unexpected custom legalisation");
5751 SDLoc DL(Op);
5752 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5753 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5754 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5755 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5756 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5757 DAG.getValueType(MVT::i32));
5758 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5759 ISD::SETNE);
5760 return DAG.getMergeValues({Res, Ovf}, DL);
5761}
5762
5763SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5764 SelectionDAG &DAG) const {
5765 SDLoc DL(Op);
5766 MVT VT = Op.getSimpleValueType();
5767 MVT XLenVT = Subtarget.getXLenVT();
5768 unsigned Check = Op.getConstantOperandVal(1);
5769 unsigned TDCMask = 0;
5770 if (Check & fcSNan)
5771 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5772 if (Check & fcQNan)
5773 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5774 if (Check & fcPosInf)
5776 if (Check & fcNegInf)
5778 if (Check & fcPosNormal)
5780 if (Check & fcNegNormal)
5782 if (Check & fcPosSubnormal)
5784 if (Check & fcNegSubnormal)
5786 if (Check & fcPosZero)
5787 TDCMask |= RISCV::FPMASK_Positive_Zero;
5788 if (Check & fcNegZero)
5789 TDCMask |= RISCV::FPMASK_Negative_Zero;
5790
5791 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5792
5793 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5794
5795 if (VT.isVector()) {
5796 SDValue Op0 = Op.getOperand(0);
5797 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5798
5799 if (VT.isScalableVector()) {
5801 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5802 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5803 Mask = Op.getOperand(2);
5804 VL = Op.getOperand(3);
5805 }
5806 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5807 VL, Op->getFlags());
5808 if (IsOneBitMask)
5809 return DAG.getSetCC(DL, VT, FPCLASS,
5810 DAG.getConstant(TDCMask, DL, DstVT),
5812 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5813 DAG.getConstant(TDCMask, DL, DstVT));
5814 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5815 ISD::SETNE);
5816 }
5817
5818 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5819 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5820 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5821 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5822 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5823 Mask = Op.getOperand(2);
5824 MVT MaskContainerVT =
5825 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5826 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5827 VL = Op.getOperand(3);
5828 }
5829 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5830
5831 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5832 Mask, VL, Op->getFlags());
5833
5834 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5835 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5836 if (IsOneBitMask) {
5837 SDValue VMSEQ =
5838 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5839 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5840 DAG.getUNDEF(ContainerVT), Mask, VL});
5841 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5842 }
5843 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5844 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5845
5846 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5847 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5848 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5849
5850 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5851 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5852 DAG.getUNDEF(ContainerVT), Mask, VL});
5853 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5854 }
5855
5856 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5857 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5858 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5860 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5861}
5862
5863// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5864// operations propagate nans.
5866 const RISCVSubtarget &Subtarget) {
5867 SDLoc DL(Op);
5868 MVT VT = Op.getSimpleValueType();
5869
5870 SDValue X = Op.getOperand(0);
5871 SDValue Y = Op.getOperand(1);
5872
5873 if (!VT.isVector()) {
5874 MVT XLenVT = Subtarget.getXLenVT();
5875
5876 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5877 // ensures that when one input is a nan, the other will also be a nan
5878 // allowing the nan to propagate. If both inputs are nan, this will swap the
5879 // inputs which is harmless.
5880
5881 SDValue NewY = Y;
5882 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5883 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5884 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5885 }
5886
5887 SDValue NewX = X;
5888 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5889 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5890 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5891 }
5892
5893 unsigned Opc =
5894 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5895 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5896 }
5897
5898 // Check no NaNs before converting to fixed vector scalable.
5899 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5900 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5901
5902 MVT ContainerVT = VT;
5903 if (VT.isFixedLengthVector()) {
5904 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5905 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5906 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5907 }
5908
5909 SDValue Mask, VL;
5910 if (Op->isVPOpcode()) {
5911 Mask = Op.getOperand(2);
5912 if (VT.isFixedLengthVector())
5913 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5914 Subtarget);
5915 VL = Op.getOperand(3);
5916 } else {
5917 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5918 }
5919
5920 SDValue NewY = Y;
5921 if (!XIsNeverNan) {
5922 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5923 {X, X, DAG.getCondCode(ISD::SETOEQ),
5924 DAG.getUNDEF(ContainerVT), Mask, VL});
5925 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5926 DAG.getUNDEF(ContainerVT), VL);
5927 }
5928
5929 SDValue NewX = X;
5930 if (!YIsNeverNan) {
5931 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5932 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5933 DAG.getUNDEF(ContainerVT), Mask, VL});
5934 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5935 DAG.getUNDEF(ContainerVT), VL);
5936 }
5937
5938 unsigned Opc =
5939 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5942 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5943 DAG.getUNDEF(ContainerVT), Mask, VL);
5944 if (VT.isFixedLengthVector())
5945 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5946 return Res;
5947}
5948
5949/// Get a RISC-V target specified VL op for a given SDNode.
5950static unsigned getRISCVVLOp(SDValue Op) {
5951#define OP_CASE(NODE) \
5952 case ISD::NODE: \
5953 return RISCVISD::NODE##_VL;
5954#define VP_CASE(NODE) \
5955 case ISD::VP_##NODE: \
5956 return RISCVISD::NODE##_VL;
5957 // clang-format off
5958 switch (Op.getOpcode()) {
5959 default:
5960 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5961 OP_CASE(ADD)
5962 OP_CASE(SUB)
5963 OP_CASE(MUL)
5964 OP_CASE(MULHS)
5965 OP_CASE(MULHU)
5966 OP_CASE(SDIV)
5967 OP_CASE(SREM)
5968 OP_CASE(UDIV)
5969 OP_CASE(UREM)
5970 OP_CASE(SHL)
5971 OP_CASE(SRA)
5972 OP_CASE(SRL)
5973 OP_CASE(ROTL)
5974 OP_CASE(ROTR)
5975 OP_CASE(BSWAP)
5976 OP_CASE(CTTZ)
5977 OP_CASE(CTLZ)
5978 OP_CASE(CTPOP)
5979 OP_CASE(BITREVERSE)
5980 OP_CASE(SADDSAT)
5981 OP_CASE(UADDSAT)
5982 OP_CASE(SSUBSAT)
5983 OP_CASE(USUBSAT)
5984 OP_CASE(AVGFLOORS)
5985 OP_CASE(AVGFLOORU)
5986 OP_CASE(AVGCEILS)
5987 OP_CASE(AVGCEILU)
5988 OP_CASE(FADD)
5989 OP_CASE(FSUB)
5990 OP_CASE(FMUL)
5991 OP_CASE(FDIV)
5992 OP_CASE(FNEG)
5993 OP_CASE(FABS)
5994 OP_CASE(FSQRT)
5995 OP_CASE(SMIN)
5996 OP_CASE(SMAX)
5997 OP_CASE(UMIN)
5998 OP_CASE(UMAX)
5999 OP_CASE(STRICT_FADD)
6000 OP_CASE(STRICT_FSUB)
6001 OP_CASE(STRICT_FMUL)
6002 OP_CASE(STRICT_FDIV)
6003 OP_CASE(STRICT_FSQRT)
6004 VP_CASE(ADD) // VP_ADD
6005 VP_CASE(SUB) // VP_SUB
6006 VP_CASE(MUL) // VP_MUL
6007 VP_CASE(SDIV) // VP_SDIV
6008 VP_CASE(SREM) // VP_SREM
6009 VP_CASE(UDIV) // VP_UDIV
6010 VP_CASE(UREM) // VP_UREM
6011 VP_CASE(SHL) // VP_SHL
6012 VP_CASE(FADD) // VP_FADD
6013 VP_CASE(FSUB) // VP_FSUB
6014 VP_CASE(FMUL) // VP_FMUL
6015 VP_CASE(FDIV) // VP_FDIV
6016 VP_CASE(FNEG) // VP_FNEG
6017 VP_CASE(FABS) // VP_FABS
6018 VP_CASE(SMIN) // VP_SMIN
6019 VP_CASE(SMAX) // VP_SMAX
6020 VP_CASE(UMIN) // VP_UMIN
6021 VP_CASE(UMAX) // VP_UMAX
6022 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6023 VP_CASE(SETCC) // VP_SETCC
6024 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6025 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6026 VP_CASE(BITREVERSE) // VP_BITREVERSE
6027 VP_CASE(SADDSAT) // VP_SADDSAT
6028 VP_CASE(UADDSAT) // VP_UADDSAT
6029 VP_CASE(SSUBSAT) // VP_SSUBSAT
6030 VP_CASE(USUBSAT) // VP_USUBSAT
6031 VP_CASE(BSWAP) // VP_BSWAP
6032 VP_CASE(CTLZ) // VP_CTLZ
6033 VP_CASE(CTTZ) // VP_CTTZ
6034 VP_CASE(CTPOP) // VP_CTPOP
6036 case ISD::VP_CTLZ_ZERO_UNDEF:
6037 return RISCVISD::CTLZ_VL;
6039 case ISD::VP_CTTZ_ZERO_UNDEF:
6040 return RISCVISD::CTTZ_VL;
6041 case ISD::FMA:
6042 case ISD::VP_FMA:
6043 return RISCVISD::VFMADD_VL;
6044 case ISD::STRICT_FMA:
6046 case ISD::AND:
6047 case ISD::VP_AND:
6048 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6049 return RISCVISD::VMAND_VL;
6050 return RISCVISD::AND_VL;
6051 case ISD::OR:
6052 case ISD::VP_OR:
6053 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6054 return RISCVISD::VMOR_VL;
6055 return RISCVISD::OR_VL;
6056 case ISD::XOR:
6057 case ISD::VP_XOR:
6058 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6059 return RISCVISD::VMXOR_VL;
6060 return RISCVISD::XOR_VL;
6061 case ISD::VP_SELECT:
6062 case ISD::VP_MERGE:
6063 return RISCVISD::VMERGE_VL;
6064 case ISD::VP_SRA:
6065 return RISCVISD::SRA_VL;
6066 case ISD::VP_SRL:
6067 return RISCVISD::SRL_VL;
6068 case ISD::VP_SQRT:
6069 return RISCVISD::FSQRT_VL;
6070 case ISD::VP_SIGN_EXTEND:
6071 return RISCVISD::VSEXT_VL;
6072 case ISD::VP_ZERO_EXTEND:
6073 return RISCVISD::VZEXT_VL;
6074 case ISD::VP_FP_TO_SINT:
6076 case ISD::VP_FP_TO_UINT:
6078 case ISD::FMINNUM:
6079 case ISD::VP_FMINNUM:
6080 return RISCVISD::VFMIN_VL;
6081 case ISD::FMAXNUM:
6082 case ISD::VP_FMAXNUM:
6083 return RISCVISD::VFMAX_VL;
6084 case ISD::LRINT:
6085 case ISD::VP_LRINT:
6086 case ISD::LLRINT:
6087 case ISD::VP_LLRINT:
6089 }
6090 // clang-format on
6091#undef OP_CASE
6092#undef VP_CASE
6093}
6094
6095/// Return true if a RISC-V target specified op has a merge operand.
6096static bool hasMergeOp(unsigned Opcode) {
6097 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6099 "not a RISC-V target specific op");
6101 130 &&
6104 21 &&
6105 "adding target specific op should update this function");
6106 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6107 return true;
6108 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6109 return true;
6110 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6111 return true;
6112 if (Opcode == RISCVISD::SETCC_VL)
6113 return true;
6114 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6115 return true;
6116 if (Opcode == RISCVISD::VMERGE_VL)
6117 return true;
6118 return false;
6119}
6120
6121/// Return true if a RISC-V target specified op has a mask operand.
6122static bool hasMaskOp(unsigned Opcode) {
6123 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6125 "not a RISC-V target specific op");
6127 130 &&
6130 21 &&
6131 "adding target specific op should update this function");
6132 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6133 return true;
6134 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6135 return true;
6136 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6138 return true;
6139 return false;
6140}
6141
6143 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6144 SDLoc DL(Op);
6145
6148
6149 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6150 if (!Op.getOperand(j).getValueType().isVector()) {
6151 LoOperands[j] = Op.getOperand(j);
6152 HiOperands[j] = Op.getOperand(j);
6153 continue;
6154 }
6155 std::tie(LoOperands[j], HiOperands[j]) =
6156 DAG.SplitVector(Op.getOperand(j), DL);
6157 }
6158
6159 SDValue LoRes =
6160 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6161 SDValue HiRes =
6162 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6163
6164 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6165}
6166
6168 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6169 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6170 SDLoc DL(Op);
6171
6174
6175 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6176 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6177 std::tie(LoOperands[j], HiOperands[j]) =
6178 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6179 continue;
6180 }
6181 if (!Op.getOperand(j).getValueType().isVector()) {
6182 LoOperands[j] = Op.getOperand(j);
6183 HiOperands[j] = Op.getOperand(j);
6184 continue;
6185 }
6186 std::tie(LoOperands[j], HiOperands[j]) =
6187 DAG.SplitVector(Op.getOperand(j), DL);
6188 }
6189
6190 SDValue LoRes =
6191 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6192 SDValue HiRes =
6193 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6194
6195 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6196}
6197
6199 SDLoc DL(Op);
6200
6201 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6202 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6203 auto [EVLLo, EVLHi] =
6204 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6205
6206 SDValue ResLo =
6207 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6208 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6209 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6210 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6211}
6212
6214
6215 assert(Op->isStrictFPOpcode());
6216
6217 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6218
6219 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6220 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6221
6222 SDLoc DL(Op);
6223
6226
6227 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6228 if (!Op.getOperand(j).getValueType().isVector()) {
6229 LoOperands[j] = Op.getOperand(j);
6230 HiOperands[j] = Op.getOperand(j);
6231 continue;
6232 }
6233 std::tie(LoOperands[j], HiOperands[j]) =
6234 DAG.SplitVector(Op.getOperand(j), DL);
6235 }
6236
6237 SDValue LoRes =
6238 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6239 HiOperands[0] = LoRes.getValue(1);
6240 SDValue HiRes =
6241 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6242
6243 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6244 LoRes.getValue(0), HiRes.getValue(0));
6245 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6246}
6247
6249 SelectionDAG &DAG) const {
6250 switch (Op.getOpcode()) {
6251 default:
6252 report_fatal_error("unimplemented operand");
6253 case ISD::ATOMIC_FENCE:
6254 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6255 case ISD::GlobalAddress:
6256 return lowerGlobalAddress(Op, DAG);
6257 case ISD::BlockAddress:
6258 return lowerBlockAddress(Op, DAG);
6259 case ISD::ConstantPool:
6260 return lowerConstantPool(Op, DAG);
6261 case ISD::JumpTable:
6262 return lowerJumpTable(Op, DAG);
6264 return lowerGlobalTLSAddress(Op, DAG);
6265 case ISD::Constant:
6266 return lowerConstant(Op, DAG, Subtarget);
6267 case ISD::SELECT:
6268 return lowerSELECT(Op, DAG);
6269 case ISD::BRCOND:
6270 return lowerBRCOND(Op, DAG);
6271 case ISD::VASTART:
6272 return lowerVASTART(Op, DAG);
6273 case ISD::FRAMEADDR:
6274 return lowerFRAMEADDR(Op, DAG);
6275 case ISD::RETURNADDR:
6276 return lowerRETURNADDR(Op, DAG);
6277 case ISD::SADDO:
6278 case ISD::SSUBO:
6279 return lowerSADDO_SSUBO(Op, DAG);
6280 case ISD::SMULO:
6281 return lowerSMULO(Op, DAG);
6282 case ISD::SHL_PARTS:
6283 return lowerShiftLeftParts(Op, DAG);
6284 case ISD::SRA_PARTS:
6285 return lowerShiftRightParts(Op, DAG, true);
6286 case ISD::SRL_PARTS:
6287 return lowerShiftRightParts(Op, DAG, false);
6288 case ISD::ROTL:
6289 case ISD::ROTR:
6290 if (Op.getValueType().isFixedLengthVector()) {
6291 assert(Subtarget.hasStdExtZvkb());
6292 return lowerToScalableOp(Op, DAG);
6293 }
6294 assert(Subtarget.hasVendorXTHeadBb() &&
6295 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6296 "Unexpected custom legalization");
6297 // XTHeadBb only supports rotate by constant.
6298 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6299 return SDValue();
6300 return Op;
6301 case ISD::BITCAST: {
6302 SDLoc DL(Op);
6303 EVT VT = Op.getValueType();
6304 SDValue Op0 = Op.getOperand(0);
6305 EVT Op0VT = Op0.getValueType();
6306 MVT XLenVT = Subtarget.getXLenVT();
6307 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6308 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6309 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6310 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6311 return FPConv;
6312 }
6313 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6314 Subtarget.hasStdExtZfbfmin()) {
6315 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6316 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6317 return FPConv;
6318 }
6319 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6320 Subtarget.hasStdExtFOrZfinx()) {
6321 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6322 SDValue FPConv =
6323 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6324 return FPConv;
6325 }
6326 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6327 SDValue Lo, Hi;
6328 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6329 SDValue RetReg =
6330 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6331 return RetReg;
6332 }
6333
6334 // Consider other scalar<->scalar casts as legal if the types are legal.
6335 // Otherwise expand them.
6336 if (!VT.isVector() && !Op0VT.isVector()) {
6337 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6338 return Op;
6339 return SDValue();
6340 }
6341
6342 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6343 "Unexpected types");
6344
6345 if (VT.isFixedLengthVector()) {
6346 // We can handle fixed length vector bitcasts with a simple replacement
6347 // in isel.
6348 if (Op0VT.isFixedLengthVector())
6349 return Op;
6350 // When bitcasting from scalar to fixed-length vector, insert the scalar
6351 // into a one-element vector of the result type, and perform a vector
6352 // bitcast.
6353 if (!Op0VT.isVector()) {
6354 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6355 if (!isTypeLegal(BVT))
6356 return SDValue();
6357 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6358 DAG.getUNDEF(BVT), Op0,
6359 DAG.getVectorIdxConstant(0, DL)));
6360 }
6361 return SDValue();
6362 }
6363 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6364 // thus: bitcast the vector to a one-element vector type whose element type
6365 // is the same as the result type, and extract the first element.
6366 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6367 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6368 if (!isTypeLegal(BVT))
6369 return SDValue();
6370 SDValue BVec = DAG.getBitcast(BVT, Op0);
6371 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6372 DAG.getVectorIdxConstant(0, DL));
6373 }
6374 return SDValue();
6375 }
6377 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6379 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6381 return LowerINTRINSIC_VOID(Op, DAG);
6382 case ISD::IS_FPCLASS:
6383 return LowerIS_FPCLASS(Op, DAG);
6384 case ISD::BITREVERSE: {
6385 MVT VT = Op.getSimpleValueType();
6386 if (VT.isFixedLengthVector()) {
6387 assert(Subtarget.hasStdExtZvbb());
6388 return lowerToScalableOp(Op, DAG);
6389 }
6390 SDLoc DL(Op);
6391 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6392 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6393 // Expand bitreverse to a bswap(rev8) followed by brev8.
6394 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6395 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6396 }
6397 case ISD::TRUNCATE:
6398 // Only custom-lower vector truncates
6399 if (!Op.getSimpleValueType().isVector())
6400 return Op;
6401 return lowerVectorTruncLike(Op, DAG);
6402 case ISD::ANY_EXTEND:
6403 case ISD::ZERO_EXTEND:
6404 if (Op.getOperand(0).getValueType().isVector() &&
6405 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6406 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6407 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6408 case ISD::SIGN_EXTEND:
6409 if (Op.getOperand(0).getValueType().isVector() &&
6410 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6411 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6412 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6414 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6416 return lowerINSERT_VECTOR_ELT(Op, DAG);
6418 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6419 case ISD::SCALAR_TO_VECTOR: {
6420 MVT VT = Op.getSimpleValueType();
6421 SDLoc DL(Op);
6422 SDValue Scalar = Op.getOperand(0);
6423 if (VT.getVectorElementType() == MVT::i1) {
6424 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6425 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6426 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6427 }
6428 MVT ContainerVT = VT;
6429 if (VT.isFixedLengthVector())
6430 ContainerVT = getContainerForFixedLengthVector(VT);
6431 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6432 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6433 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6434 DAG.getUNDEF(ContainerVT), Scalar, VL);
6435 if (VT.isFixedLengthVector())
6436 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6437 return V;
6438 }
6439 case ISD::VSCALE: {
6440 MVT XLenVT = Subtarget.getXLenVT();
6441 MVT VT = Op.getSimpleValueType();
6442 SDLoc DL(Op);
6443 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6444 // We define our scalable vector types for lmul=1 to use a 64 bit known
6445 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6446 // vscale as VLENB / 8.
6447 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6448 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6449 report_fatal_error("Support for VLEN==32 is incomplete.");
6450 // We assume VLENB is a multiple of 8. We manually choose the best shift
6451 // here because SimplifyDemandedBits isn't always able to simplify it.
6452 uint64_t Val = Op.getConstantOperandVal(0);
6453 if (isPowerOf2_64(Val)) {
6454 uint64_t Log2 = Log2_64(Val);
6455 if (Log2 < 3)
6456 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6457 DAG.getConstant(3 - Log2, DL, VT));
6458 else if (Log2 > 3)
6459 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6460 DAG.getConstant(Log2 - 3, DL, XLenVT));
6461 } else if ((Val % 8) == 0) {
6462 // If the multiplier is a multiple of 8, scale it down to avoid needing
6463 // to shift the VLENB value.
6464 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6465 DAG.getConstant(Val / 8, DL, XLenVT));
6466 } else {
6467 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6468 DAG.getConstant(3, DL, XLenVT));
6469 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6470 DAG.getConstant(Val, DL, XLenVT));
6471 }
6472 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6473 }
6474 case ISD::FPOWI: {
6475 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6476 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6477 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6478 Op.getOperand(1).getValueType() == MVT::i32) {
6479 SDLoc DL(Op);
6480 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6481 SDValue Powi =
6482 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6483 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6484 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6485 }
6486 return SDValue();
6487 }
6488 case ISD::FMAXIMUM:
6489 case ISD::FMINIMUM:
6490 if (Op.getValueType() == MVT::nxv32f16 &&
6491 (Subtarget.hasVInstructionsF16Minimal() &&
6492 !Subtarget.hasVInstructionsF16()))
6493 return SplitVectorOp(Op, DAG);
6494 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6495 case ISD::FP_EXTEND: {
6496 SDLoc DL(Op);
6497 EVT VT = Op.getValueType();
6498 SDValue Op0 = Op.getOperand(0);
6499 EVT Op0VT = Op0.getValueType();
6500 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6501 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6502 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6503 SDValue FloatVal =
6504 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6505 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6506 }
6507
6508 if (!Op.getValueType().isVector())
6509 return Op;
6510 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6511 }
6512 case ISD::FP_ROUND: {
6513 SDLoc DL(Op);
6514 EVT VT = Op.getValueType();
6515 SDValue Op0 = Op.getOperand(0);
6516 EVT Op0VT = Op0.getValueType();
6517 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6518 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6519 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6520 Subtarget.hasStdExtDOrZdinx()) {
6521 SDValue FloatVal =
6522 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6523 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6524 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6525 }
6526
6527 if (!Op.getValueType().isVector())
6528 return Op;
6529 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6530 }
6533 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6534 case ISD::SINT_TO_FP:
6535 case ISD::UINT_TO_FP:
6536 if (Op.getValueType().isVector() &&
6537 Op.getValueType().getScalarType() == MVT::f16 &&
6538 (Subtarget.hasVInstructionsF16Minimal() &&
6539 !Subtarget.hasVInstructionsF16())) {
6540 if (Op.getValueType() == MVT::nxv32f16)
6541 return SplitVectorOp(Op, DAG);
6542 // int -> f32
6543 SDLoc DL(Op);
6544 MVT NVT =
6545 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6546 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6547 // f32 -> f16
6548 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6549 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6550 }
6551 [[fallthrough]];
6552 case ISD::FP_TO_SINT:
6553 case ISD::FP_TO_UINT:
6554 if (SDValue Op1 = Op.getOperand(0);
6555 Op1.getValueType().isVector() &&
6556 Op1.getValueType().getScalarType() == MVT::f16 &&
6557 (Subtarget.hasVInstructionsF16Minimal() &&
6558 !Subtarget.hasVInstructionsF16())) {
6559 if (Op1.getValueType() == MVT::nxv32f16)
6560 return SplitVectorOp(Op, DAG);
6561 // f16 -> f32
6562 SDLoc DL(Op);
6563 MVT NVT = MVT::getVectorVT(MVT::f32,
6564 Op1.getValueType().getVectorElementCount());
6565 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6566 // f32 -> int
6567 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6568 }
6569 [[fallthrough]];
6574 // RVV can only do fp<->int conversions to types half/double the size as
6575 // the source. We custom-lower any conversions that do two hops into
6576 // sequences.
6577 MVT VT = Op.getSimpleValueType();
6578 if (!VT.isVector())
6579 return Op;
6580 SDLoc DL(Op);
6581 bool IsStrict = Op->isStrictFPOpcode();
6582 SDValue Src = Op.getOperand(0 + IsStrict);
6583 MVT EltVT = VT.getVectorElementType();
6584 MVT SrcVT = Src.getSimpleValueType();
6585 MVT SrcEltVT = SrcVT.getVectorElementType();
6586 unsigned EltSize = EltVT.getSizeInBits();
6587 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6588 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6589 "Unexpected vector element types");
6590
6591 bool IsInt2FP = SrcEltVT.isInteger();
6592 // Widening conversions
6593 if (EltSize > (2 * SrcEltSize)) {
6594 if (IsInt2FP) {
6595 // Do a regular integer sign/zero extension then convert to float.
6596 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6598 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6599 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6602 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6603 if (IsStrict)
6604 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6605 Op.getOperand(0), Ext);
6606 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6607 }
6608 // FP2Int
6609 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6610 // Do one doubling fp_extend then complete the operation by converting
6611 // to int.
6612 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6613 if (IsStrict) {
6614 auto [FExt, Chain] =
6615 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6616 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6617 }
6618 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6619 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6620 }
6621
6622 // Narrowing conversions
6623 if (SrcEltSize > (2 * EltSize)) {
6624 if (IsInt2FP) {
6625 // One narrowing int_to_fp, then an fp_round.
6626 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6627 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6628 if (IsStrict) {
6629 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6630 DAG.getVTList(InterimFVT, MVT::Other),
6631 Op.getOperand(0), Src);
6632 SDValue Chain = Int2FP.getValue(1);
6633 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6634 }
6635 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6636 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6637 }
6638 // FP2Int
6639 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6640 // representable by the integer, the result is poison.
6641 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6643 if (IsStrict) {
6644 SDValue FP2Int =
6645 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6646 Op.getOperand(0), Src);
6647 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6648 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6649 }
6650 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6651 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6652 }
6653
6654 // Scalable vectors can exit here. Patterns will handle equally-sized
6655 // conversions halving/doubling ones.
6656 if (!VT.isFixedLengthVector())
6657 return Op;
6658
6659 // For fixed-length vectors we lower to a custom "VL" node.
6660 unsigned RVVOpc = 0;
6661 switch (Op.getOpcode()) {
6662 default:
6663 llvm_unreachable("Impossible opcode");
6664 case ISD::FP_TO_SINT:
6666 break;
6667 case ISD::FP_TO_UINT:
6669 break;
6670 case ISD::SINT_TO_FP:
6671 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6672 break;
6673 case ISD::UINT_TO_FP:
6674 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6675 break;
6678 break;
6681 break;
6684 break;
6687 break;
6688 }
6689
6690 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6691 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6692 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6693 "Expected same element count");
6694
6695 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6696
6697 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6698 if (IsStrict) {
6699 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6700 Op.getOperand(0), Src, Mask, VL);
6701 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6702 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6703 }
6704 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6705 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6706 }
6709 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6710 case ISD::FP_TO_BF16: {
6711 // Custom lower to ensure the libcall return is passed in an FPR on hard
6712 // float ABIs.
6713 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6714 SDLoc DL(Op);
6715 MakeLibCallOptions CallOptions;
6716 RTLIB::Libcall LC =
6717 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6718 SDValue Res =
6719 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6720 if (Subtarget.is64Bit() && !RV64LegalI32)
6721 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6722 return DAG.getBitcast(MVT::i32, Res);
6723 }
6724 case ISD::BF16_TO_FP: {
6725 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6726 MVT VT = Op.getSimpleValueType();
6727 SDLoc DL(Op);
6728 Op = DAG.getNode(
6729 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6730 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6731 SDValue Res = Subtarget.is64Bit()
6732 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6733 : DAG.getBitcast(MVT::f32, Op);
6734 // fp_extend if the target VT is bigger than f32.
6735 if (VT != MVT::f32)
6736 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6737 return Res;
6738 }
6739 case ISD::FP_TO_FP16: {
6740 // Custom lower to ensure the libcall return is passed in an FPR on hard
6741 // float ABIs.
6742 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6743 SDLoc DL(Op);
6744 MakeLibCallOptions CallOptions;
6745 RTLIB::Libcall LC =
6746 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6747 SDValue Res =
6748 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6749 if (Subtarget.is64Bit() && !RV64LegalI32)
6750 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6751 return DAG.getBitcast(MVT::i32, Res);
6752 }
6753 case ISD::FP16_TO_FP: {
6754 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6755 // float ABIs.
6756 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6757 SDLoc DL(Op);
6758 MakeLibCallOptions CallOptions;
6759 SDValue Arg = Subtarget.is64Bit()
6760 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6761 Op.getOperand(0))
6762 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6763 SDValue Res =
6764 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6765 .first;
6766 return Res;
6767 }
6768 case ISD::FTRUNC:
6769 case ISD::FCEIL:
6770 case ISD::FFLOOR:
6771 case ISD::FNEARBYINT:
6772 case ISD::FRINT:
6773 case ISD::FROUND:
6774 case ISD::FROUNDEVEN:
6775 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6776 case ISD::LRINT:
6777 case ISD::LLRINT:
6778 return lowerVectorXRINT(Op, DAG, Subtarget);
6779 case ISD::VECREDUCE_ADD:
6784 return lowerVECREDUCE(Op, DAG);
6785 case ISD::VECREDUCE_AND:
6786 case ISD::VECREDUCE_OR:
6787 case ISD::VECREDUCE_XOR:
6788 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6789 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6790 return lowerVECREDUCE(Op, DAG);
6797 return lowerFPVECREDUCE(Op, DAG);
6798 case ISD::VP_REDUCE_ADD:
6799 case ISD::VP_REDUCE_UMAX:
6800 case ISD::VP_REDUCE_SMAX:
6801 case ISD::VP_REDUCE_UMIN:
6802 case ISD::VP_REDUCE_SMIN:
6803 case ISD::VP_REDUCE_FADD:
6804 case ISD::VP_REDUCE_SEQ_FADD:
6805 case ISD::VP_REDUCE_FMIN:
6806 case ISD::VP_REDUCE_FMAX:
6807 case ISD::VP_REDUCE_FMINIMUM:
6808 case ISD::VP_REDUCE_FMAXIMUM:
6809 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6810 (Subtarget.hasVInstructionsF16Minimal() &&
6811 !Subtarget.hasVInstructionsF16()))
6812 return SplitVectorReductionOp(Op, DAG);
6813 return lowerVPREDUCE(Op, DAG);
6814 case ISD::VP_REDUCE_AND:
6815 case ISD::VP_REDUCE_OR:
6816 case ISD::VP_REDUCE_XOR:
6817 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6818 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6819 return lowerVPREDUCE(Op, DAG);
6820 case ISD::VP_CTTZ_ELTS:
6821 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6822 return lowerVPCttzElements(Op, DAG);
6823 case ISD::UNDEF: {
6824 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6825 return convertFromScalableVector(Op.getSimpleValueType(),
6826 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6827 }
6829 return lowerINSERT_SUBVECTOR(Op, DAG);
6831 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6833 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6835 return lowerVECTOR_INTERLEAVE(Op, DAG);
6836 case ISD::STEP_VECTOR:
6837 return lowerSTEP_VECTOR(Op, DAG);
6839 return lowerVECTOR_REVERSE(Op, DAG);
6840 case ISD::VECTOR_SPLICE:
6841 return lowerVECTOR_SPLICE(Op, DAG);
6842 case ISD::BUILD_VECTOR:
6843 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6844 case ISD::SPLAT_VECTOR:
6845 if ((Op.getValueType().getScalarType() == MVT::f16 &&
6846 (Subtarget.hasVInstructionsF16Minimal() &&
6847 Subtarget.hasStdExtZfhminOrZhinxmin() &&
6848 !Subtarget.hasVInstructionsF16())) ||
6849 (Op.getValueType().getScalarType() == MVT::bf16 &&
6850 (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) {
6851 if (Op.getValueType() == MVT::nxv32f16 ||
6852 Op.getValueType() == MVT::nxv32bf16)
6853 return SplitVectorOp(Op, DAG);
6854 SDLoc DL(Op);
6855 SDValue NewScalar =
6856 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6857 SDValue NewSplat = DAG.getNode(
6859 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6860 NewScalar);
6861 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6862 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6863 }
6864 if (Op.getValueType().getVectorElementType() == MVT::i1)
6865 return lowerVectorMaskSplat(Op, DAG);
6866 return SDValue();
6868 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6869 case ISD::CONCAT_VECTORS: {
6870 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6871 // better than going through the stack, as the default expansion does.
6872 SDLoc DL(Op);
6873 MVT VT = Op.getSimpleValueType();
6874 MVT ContainerVT = VT;
6875 if (VT.isFixedLengthVector())
6876 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6877
6878 // Recursively split concat_vectors with more than 2 operands:
6879 //
6880 // concat_vector op1, op2, op3, op4
6881 // ->
6882 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6883 //
6884 // This reduces the length of the chain of vslideups and allows us to
6885 // perform the vslideups at a smaller LMUL, limited to MF2.
6886 if (Op.getNumOperands() > 2 &&
6887 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6888 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6890 size_t HalfNumOps = Op.getNumOperands() / 2;
6891 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6892 Op->ops().take_front(HalfNumOps));
6893 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6894 Op->ops().drop_front(HalfNumOps));
6895 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6896 }
6897
6898 unsigned NumOpElts =
6899 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6900 SDValue Vec = DAG.getUNDEF(VT);
6901 for (const auto &OpIdx : enumerate(Op->ops())) {
6902 SDValue SubVec = OpIdx.value();
6903 // Don't insert undef subvectors.
6904 if (SubVec.isUndef())
6905 continue;
6906 Vec =
6907 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6908 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6909 }
6910 return Vec;
6911 }
6912 case ISD::LOAD:
6913 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6914 return V;
6915 if (Op.getValueType().isFixedLengthVector())
6916 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6917 return Op;
6918 case ISD::STORE:
6919 if (auto V = expandUnalignedRVVStore(Op, DAG))
6920 return V;
6921 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6922 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6923 return Op;
6924 case ISD::MLOAD:
6925 case ISD::VP_LOAD:
6926 return lowerMaskedLoad(Op, DAG);
6927 case ISD::MSTORE:
6928 case ISD::VP_STORE:
6929 return lowerMaskedStore(Op, DAG);
6930 case ISD::SELECT_CC: {
6931 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6932 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6933 // into separate SETCC+SELECT just like LegalizeDAG.
6934 SDValue Tmp1 = Op.getOperand(0);
6935 SDValue Tmp2 = Op.getOperand(1);
6936 SDValue True = Op.getOperand(2);
6937 SDValue False = Op.getOperand(3);
6938 EVT VT = Op.getValueType();
6939 SDValue CC = Op.getOperand(4);
6940 EVT CmpVT = Tmp1.getValueType();
6941 EVT CCVT =
6942 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6943 SDLoc DL(Op);
6944 SDValue Cond =
6945 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6946 return DAG.getSelect(DL, VT, Cond, True, False);
6947 }
6948 case ISD::SETCC: {
6949 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6950 if (OpVT.isScalarInteger()) {
6951 MVT VT = Op.getSimpleValueType();
6952 SDValue LHS = Op.getOperand(0);
6953 SDValue RHS = Op.getOperand(1);
6954 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6955 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6956 "Unexpected CondCode");
6957
6958 SDLoc DL(Op);
6959
6960 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6961 // convert this to the equivalent of (set(u)ge X, C+1) by using
6962 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6963 // in a register.
6964 if (isa<ConstantSDNode>(RHS)) {
6965 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6966 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6967 // If this is an unsigned compare and the constant is -1, incrementing
6968 // the constant would change behavior. The result should be false.
6969 if (CCVal == ISD::SETUGT && Imm == -1)
6970 return DAG.getConstant(0, DL, VT);
6971 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6972 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6973 SDValue SetCC = DAG.getSetCC(
6974 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6975 return DAG.getLogicalNOT(DL, SetCC, VT);
6976 }
6977 }
6978
6979 // Not a constant we could handle, swap the operands and condition code to
6980 // SETLT/SETULT.
6981 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6982 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6983 }
6984
6985 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6986 (Subtarget.hasVInstructionsF16Minimal() &&
6987 !Subtarget.hasVInstructionsF16()))
6988 return SplitVectorOp(Op, DAG);
6989
6990 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6991 }
6992 case ISD::ADD:
6993 case ISD::SUB:
6994 case ISD::MUL:
6995 case ISD::MULHS:
6996 case ISD::MULHU:
6997 case ISD::AND:
6998 case ISD::OR:
6999 case ISD::XOR:
7000 case ISD::SDIV:
7001 case ISD::SREM:
7002 case ISD::UDIV:
7003 case ISD::UREM:
7004 case ISD::BSWAP:
7005 case ISD::CTPOP:
7006 return lowerToScalableOp(Op, DAG);
7007 case ISD::SHL:
7008 case ISD::SRA:
7009 case ISD::SRL:
7010 if (Op.getSimpleValueType().isFixedLengthVector())
7011 return lowerToScalableOp(Op, DAG);
7012 // This can be called for an i32 shift amount that needs to be promoted.
7013 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7014 "Unexpected custom legalisation");
7015 return SDValue();
7016 case ISD::FADD:
7017 case ISD::FSUB:
7018 case ISD::FMUL:
7019 case ISD::FDIV:
7020 case ISD::FNEG:
7021 case ISD::FABS:
7022 case ISD::FSQRT:
7023 case ISD::FMA:
7024 case ISD::FMINNUM:
7025 case ISD::FMAXNUM:
7026 if (Op.getValueType() == MVT::nxv32f16 &&
7027 (Subtarget.hasVInstructionsF16Minimal() &&
7028 !Subtarget.hasVInstructionsF16()))
7029 return SplitVectorOp(Op, DAG);
7030 [[fallthrough]];
7031 case ISD::AVGFLOORS:
7032 case ISD::AVGFLOORU:
7033 case ISD::AVGCEILS:
7034 case ISD::AVGCEILU:
7035 case ISD::SMIN:
7036 case ISD::SMAX:
7037 case ISD::UMIN:
7038 case ISD::UMAX:
7039 return lowerToScalableOp(Op, DAG);
7040 case ISD::UADDSAT:
7041 case ISD::USUBSAT:
7042 if (!Op.getValueType().isVector())
7043 return lowerUADDSAT_USUBSAT(Op, DAG);
7044 return lowerToScalableOp(Op, DAG);
7045 case ISD::SADDSAT:
7046 case ISD::SSUBSAT:
7047 if (!Op.getValueType().isVector())
7048 return lowerSADDSAT_SSUBSAT(Op, DAG);
7049 return lowerToScalableOp(Op, DAG);
7050 case ISD::ABDS:
7051 case ISD::ABDU: {
7052 SDLoc dl(Op);
7053 EVT VT = Op->getValueType(0);
7054 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7055 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7056 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7057
7058 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7059 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7060 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7061 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7062 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7063 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7064 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7065 }
7066 case ISD::ABS:
7067 case ISD::VP_ABS:
7068 return lowerABS(Op, DAG);
7069 case ISD::CTLZ:
7071 case ISD::CTTZ:
7073 if (Subtarget.hasStdExtZvbb())
7074 return lowerToScalableOp(Op, DAG);
7075 assert(Op.getOpcode() != ISD::CTTZ);
7076 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7077 case ISD::VSELECT:
7078 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7079 case ISD::FCOPYSIGN:
7080 if (Op.getValueType() == MVT::nxv32f16 &&
7081 (Subtarget.hasVInstructionsF16Minimal() &&
7082 !Subtarget.hasVInstructionsF16()))
7083 return SplitVectorOp(Op, DAG);
7084 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7085 case ISD::STRICT_FADD:
7086 case ISD::STRICT_FSUB:
7087 case ISD::STRICT_FMUL:
7088 case ISD::STRICT_FDIV:
7089 case ISD::STRICT_FSQRT:
7090 case ISD::STRICT_FMA:
7091 if (Op.getValueType() == MVT::nxv32f16 &&
7092 (Subtarget.hasVInstructionsF16Minimal() &&
7093 !Subtarget.hasVInstructionsF16()))
7094 return SplitStrictFPVectorOp(Op, DAG);
7095 return lowerToScalableOp(Op, DAG);
7096 case ISD::STRICT_FSETCC:
7098 return lowerVectorStrictFSetcc(Op, DAG);
7099 case ISD::STRICT_FCEIL:
7100 case ISD::STRICT_FRINT:
7101 case ISD::STRICT_FFLOOR:
7102 case ISD::STRICT_FTRUNC:
7104 case ISD::STRICT_FROUND:
7106 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7107 case ISD::MGATHER:
7108 case ISD::VP_GATHER:
7109 return lowerMaskedGather(Op, DAG);
7110 case ISD::MSCATTER:
7111 case ISD::VP_SCATTER:
7112 return lowerMaskedScatter(Op, DAG);
7113 case ISD::GET_ROUNDING:
7114 return lowerGET_ROUNDING(Op, DAG);
7115 case ISD::SET_ROUNDING:
7116 return lowerSET_ROUNDING(Op, DAG);
7117 case ISD::EH_DWARF_CFA:
7118 return lowerEH_DWARF_CFA(Op, DAG);
7119 case ISD::VP_SELECT:
7120 case ISD::VP_MERGE:
7121 case ISD::VP_ADD:
7122 case ISD::VP_SUB:
7123 case ISD::VP_MUL:
7124 case ISD::VP_SDIV:
7125 case ISD::VP_UDIV:
7126 case ISD::VP_SREM:
7127 case ISD::VP_UREM:
7128 case ISD::VP_UADDSAT:
7129 case ISD::VP_USUBSAT:
7130 case ISD::VP_SADDSAT:
7131 case ISD::VP_SSUBSAT:
7132 case ISD::VP_LRINT:
7133 case ISD::VP_LLRINT:
7134 return lowerVPOp(Op, DAG);
7135 case ISD::VP_AND:
7136 case ISD::VP_OR:
7137 case ISD::VP_XOR:
7138 return lowerLogicVPOp(Op, DAG);
7139 case ISD::VP_FADD:
7140 case ISD::VP_FSUB:
7141 case ISD::VP_FMUL:
7142 case ISD::VP_FDIV:
7143 case ISD::VP_FNEG:
7144 case ISD::VP_FABS:
7145 case ISD::VP_SQRT:
7146 case ISD::VP_FMA:
7147 case ISD::VP_FMINNUM:
7148 case ISD::VP_FMAXNUM:
7149 case ISD::VP_FCOPYSIGN:
7150 if (Op.getValueType() == MVT::nxv32f16 &&
7151 (Subtarget.hasVInstructionsF16Minimal() &&
7152 !Subtarget.hasVInstructionsF16()))
7153 return SplitVPOp(Op, DAG);
7154 [[fallthrough]];
7155 case ISD::VP_SRA:
7156 case ISD::VP_SRL:
7157 case ISD::VP_SHL:
7158 return lowerVPOp(Op, DAG);
7159 case ISD::VP_IS_FPCLASS:
7160 return LowerIS_FPCLASS(Op, DAG);
7161 case ISD::VP_SIGN_EXTEND:
7162 case ISD::VP_ZERO_EXTEND:
7163 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7164 return lowerVPExtMaskOp(Op, DAG);
7165 return lowerVPOp(Op, DAG);
7166 case ISD::VP_TRUNCATE:
7167 return lowerVectorTruncLike(Op, DAG);
7168 case ISD::VP_FP_EXTEND:
7169 case ISD::VP_FP_ROUND:
7170 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7171 case ISD::VP_SINT_TO_FP:
7172 case ISD::VP_UINT_TO_FP:
7173 if (Op.getValueType().isVector() &&
7174 Op.getValueType().getScalarType() == MVT::f16 &&
7175 (Subtarget.hasVInstructionsF16Minimal() &&
7176 !Subtarget.hasVInstructionsF16())) {
7177 if (Op.getValueType() == MVT::nxv32f16)
7178 return SplitVPOp(Op, DAG);
7179 // int -> f32
7180 SDLoc DL(Op);
7181 MVT NVT =
7182 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7183 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7184 // f32 -> f16
7185 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7186 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7187 }
7188 [[fallthrough]];
7189 case ISD::VP_FP_TO_SINT:
7190 case ISD::VP_FP_TO_UINT:
7191 if (SDValue Op1 = Op.getOperand(0);
7192 Op1.getValueType().isVector() &&
7193 Op1.getValueType().getScalarType() == MVT::f16 &&
7194 (Subtarget.hasVInstructionsF16Minimal() &&
7195 !Subtarget.hasVInstructionsF16())) {
7196 if (Op1.getValueType() == MVT::nxv32f16)
7197 return SplitVPOp(Op, DAG);
7198 // f16 -> f32
7199 SDLoc DL(Op);
7200 MVT NVT = MVT::getVectorVT(MVT::f32,
7201 Op1.getValueType().getVectorElementCount());
7202 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7203 // f32 -> int
7204 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7205 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7206 }
7207 return lowerVPFPIntConvOp(Op, DAG);
7208 case ISD::VP_SETCC:
7209 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7210 (Subtarget.hasVInstructionsF16Minimal() &&
7211 !Subtarget.hasVInstructionsF16()))
7212 return SplitVPOp(Op, DAG);
7213 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7214 return lowerVPSetCCMaskOp(Op, DAG);
7215 [[fallthrough]];
7216 case ISD::VP_SMIN:
7217 case ISD::VP_SMAX:
7218 case ISD::VP_UMIN:
7219 case ISD::VP_UMAX:
7220 case ISD::VP_BITREVERSE:
7221 case ISD::VP_BSWAP:
7222 return lowerVPOp(Op, DAG);
7223 case ISD::VP_CTLZ:
7224 case ISD::VP_CTLZ_ZERO_UNDEF:
7225 if (Subtarget.hasStdExtZvbb())
7226 return lowerVPOp(Op, DAG);
7227 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7228 case ISD::VP_CTTZ:
7229 case ISD::VP_CTTZ_ZERO_UNDEF:
7230 if (Subtarget.hasStdExtZvbb())
7231 return lowerVPOp(Op, DAG);
7232 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7233 case ISD::VP_CTPOP:
7234 return lowerVPOp(Op, DAG);
7235 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7236 return lowerVPStridedLoad(Op, DAG);
7237 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7238 return lowerVPStridedStore(Op, DAG);
7239 case ISD::VP_FCEIL:
7240 case ISD::VP_FFLOOR:
7241 case ISD::VP_FRINT:
7242 case ISD::VP_FNEARBYINT:
7243 case ISD::VP_FROUND:
7244 case ISD::VP_FROUNDEVEN:
7245 case ISD::VP_FROUNDTOZERO:
7246 if (Op.getValueType() == MVT::nxv32f16 &&
7247 (Subtarget.hasVInstructionsF16Minimal() &&
7248 !Subtarget.hasVInstructionsF16()))
7249 return SplitVPOp(Op, DAG);
7250 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7251 case ISD::VP_FMAXIMUM:
7252 case ISD::VP_FMINIMUM:
7253 if (Op.getValueType() == MVT::nxv32f16 &&
7254 (Subtarget.hasVInstructionsF16Minimal() &&
7255 !Subtarget.hasVInstructionsF16()))
7256 return SplitVPOp(Op, DAG);
7257 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7258 case ISD::EXPERIMENTAL_VP_SPLICE:
7259 return lowerVPSpliceExperimental(Op, DAG);
7260 case ISD::EXPERIMENTAL_VP_REVERSE:
7261 return lowerVPReverseExperimental(Op, DAG);
7262 case ISD::EXPERIMENTAL_VP_SPLAT:
7263 return lowerVPSplatExperimental(Op, DAG);
7264 case ISD::CLEAR_CACHE: {
7265 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7266 "llvm.clear_cache only needs custom lower on Linux targets");
7267 SDLoc DL(Op);
7268 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7269 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7270 Op.getOperand(2), Flags, DL);
7271 }
7272 }
7273}
7274
7275SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7276 SDValue Start, SDValue End,
7277 SDValue Flags, SDLoc DL) const {
7278 MakeLibCallOptions CallOptions;
7279 std::pair<SDValue, SDValue> CallResult =
7280 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7281 {Start, End, Flags}, CallOptions, DL, InChain);
7282
7283 // This function returns void so only the out chain matters.
7284 return CallResult.second;
7285}
7286
7288 SelectionDAG &DAG, unsigned Flags) {
7289 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7290}
7291
7293 SelectionDAG &DAG, unsigned Flags) {
7294 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7295 Flags);
7296}
7297
7299 SelectionDAG &DAG, unsigned Flags) {
7300 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7301 N->getOffset(), Flags);
7302}
7303
7305 SelectionDAG &DAG, unsigned Flags) {
7306 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7307}
7308
7309template <class NodeTy>
7310SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7311 bool IsLocal, bool IsExternWeak) const {
7312 SDLoc DL(N);
7313 EVT Ty = getPointerTy(DAG.getDataLayout());
7314
7315 // When HWASAN is used and tagging of global variables is enabled
7316 // they should be accessed via the GOT, since the tagged address of a global
7317 // is incompatible with existing code models. This also applies to non-pic
7318 // mode.
7319 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7320 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7321 if (IsLocal && !Subtarget.allowTaggedGlobals())
7322 // Use PC-relative addressing to access the symbol. This generates the
7323 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7324 // %pcrel_lo(auipc)).
7325 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7326
7327 // Use PC-relative addressing to access the GOT for this symbol, then load
7328 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7329 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7330 SDValue Load =
7331 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7337 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7338 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7339 return Load;
7340 }
7341
7342 switch (getTargetMachine().getCodeModel()) {
7343 default:
7344 report_fatal_error("Unsupported code model for lowering");
7345 case CodeModel::Small: {
7346 // Generate a sequence for accessing addresses within the first 2 GiB of
7347 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7348 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7349 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7350 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7351 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7352 }
7353 case CodeModel::Medium: {
7354 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7355 if (IsExternWeak) {
7356 // An extern weak symbol may be undefined, i.e. have value 0, which may
7357 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7358 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7359 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7360 SDValue Load =
7361 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7367 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7368 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7369 return Load;
7370 }
7371
7372 // Generate a sequence for accessing addresses within any 2GiB range within
7373 // the address space. This generates the pattern (PseudoLLA sym), which
7374 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7375 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7376 }
7377 }
7378}
7379
7380SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7381 SelectionDAG &DAG) const {
7382 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7383 assert(N->getOffset() == 0 && "unexpected offset in global node");
7384 const GlobalValue *GV = N->getGlobal();
7385 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7386}
7387
7388SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7389 SelectionDAG &DAG) const {
7390 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7391
7392 return getAddr(N, DAG);
7393}
7394
7395SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7396 SelectionDAG &DAG) const {
7397 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7398
7399 return getAddr(N, DAG);
7400}
7401
7402SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7403 SelectionDAG &DAG) const {
7404 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7405
7406 return getAddr(N, DAG);
7407}
7408
7409SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7410 SelectionDAG &DAG,
7411 bool UseGOT) const {
7412 SDLoc DL(N);
7413 EVT Ty = getPointerTy(DAG.getDataLayout());
7414 const GlobalValue *GV = N->getGlobal();
7415 MVT XLenVT = Subtarget.getXLenVT();
7416
7417 if (UseGOT) {
7418 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7419 // load the address from the GOT and add the thread pointer. This generates
7420 // the pattern (PseudoLA_TLS_IE sym), which expands to
7421 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7422 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7423 SDValue Load =
7424 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7430 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7431 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7432
7433 // Add the thread pointer.
7434 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7435 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7436 }
7437
7438 // Generate a sequence for accessing the address relative to the thread
7439 // pointer, with the appropriate adjustment for the thread pointer offset.
7440 // This generates the pattern
7441 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7442 SDValue AddrHi =
7444 SDValue AddrAdd =
7446 SDValue AddrLo =
7448
7449 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7450 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7451 SDValue MNAdd =
7452 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7453 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7454}
7455
7456SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7457 SelectionDAG &DAG) const {
7458 SDLoc DL(N);
7459 EVT Ty = getPointerTy(DAG.getDataLayout());
7460 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7461 const GlobalValue *GV = N->getGlobal();
7462
7463 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7464 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7465 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7466 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7467 SDValue Load =
7468 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7469
7470 // Prepare argument list to generate call.
7472 ArgListEntry Entry;
7473 Entry.Node = Load;
7474 Entry.Ty = CallTy;
7475 Args.push_back(Entry);
7476
7477 // Setup call to __tls_get_addr.
7479 CLI.setDebugLoc(DL)
7480 .setChain(DAG.getEntryNode())
7481 .setLibCallee(CallingConv::C, CallTy,
7482 DAG.getExternalSymbol("__tls_get_addr", Ty),
7483 std::move(Args));
7484
7485 return LowerCallTo(CLI).first;
7486}
7487
7488SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7489 SelectionDAG &DAG) const {
7490 SDLoc DL(N);
7491 EVT Ty = getPointerTy(DAG.getDataLayout());
7492 const GlobalValue *GV = N->getGlobal();
7493
7494 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7495 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7496 //
7497 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7498 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7499 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7500 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7501 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7502 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7503}
7504
7505SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7506 SelectionDAG &DAG) const {
7507 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7508 assert(N->getOffset() == 0 && "unexpected offset in global node");
7509
7510 if (DAG.getTarget().useEmulatedTLS())
7511 return LowerToTLSEmulatedModel(N, DAG);
7512
7514
7517 report_fatal_error("In GHC calling convention TLS is not supported");
7518
7519 SDValue Addr;
7520 switch (Model) {
7522 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7523 break;
7525 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7526 break;
7529 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7530 : getDynamicTLSAddr(N, DAG);
7531 break;
7532 }
7533
7534 return Addr;
7535}
7536
7537// Return true if Val is equal to (setcc LHS, RHS, CC).
7538// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7539// Otherwise, return std::nullopt.
7540static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7541 ISD::CondCode CC, SDValue Val) {
7542 assert(Val->getOpcode() == ISD::SETCC);
7543 SDValue LHS2 = Val.getOperand(0);
7544 SDValue RHS2 = Val.getOperand(1);
7545 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7546
7547 if (LHS == LHS2 && RHS == RHS2) {
7548 if (CC == CC2)
7549 return true;
7550 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7551 return false;
7552 } else if (LHS == RHS2 && RHS == LHS2) {
7554 if (CC == CC2)
7555 return true;
7556 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7557 return false;
7558 }
7559
7560 return std::nullopt;
7561}
7562
7564 const RISCVSubtarget &Subtarget) {
7565 SDValue CondV = N->getOperand(0);
7566 SDValue TrueV = N->getOperand(1);
7567 SDValue FalseV = N->getOperand(2);
7568 MVT VT = N->getSimpleValueType(0);
7569 SDLoc DL(N);
7570
7571 if (!Subtarget.hasConditionalMoveFusion()) {
7572 // (select c, -1, y) -> -c | y
7573 if (isAllOnesConstant(TrueV)) {
7574 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7575 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7576 }
7577 // (select c, y, -1) -> (c-1) | y
7578 if (isAllOnesConstant(FalseV)) {
7579 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7580 DAG.getAllOnesConstant(DL, VT));
7581 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7582 }
7583
7584 // (select c, 0, y) -> (c-1) & y
7585 if (isNullConstant(TrueV)) {
7586 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7587 DAG.getAllOnesConstant(DL, VT));
7588 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7589 }
7590 // (select c, y, 0) -> -c & y
7591 if (isNullConstant(FalseV)) {
7592 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7593 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7594 }
7595 }
7596
7597 // select c, ~x, x --> xor -c, x
7598 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7599 const APInt &TrueVal = TrueV->getAsAPIntVal();
7600 const APInt &FalseVal = FalseV->getAsAPIntVal();
7601 if (~TrueVal == FalseVal) {
7602 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7603 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7604 }
7605 }
7606
7607 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7608 // when both truev and falsev are also setcc.
7609 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7610 FalseV.getOpcode() == ISD::SETCC) {
7611 SDValue LHS = CondV.getOperand(0);
7612 SDValue RHS = CondV.getOperand(1);
7613 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7614
7615 // (select x, x, y) -> x | y
7616 // (select !x, x, y) -> x & y
7617 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7618 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7619 DAG.getFreeze(FalseV));
7620 }
7621 // (select x, y, x) -> x & y
7622 // (select !x, y, x) -> x | y
7623 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7624 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7625 DAG.getFreeze(TrueV), FalseV);
7626 }
7627 }
7628
7629 return SDValue();
7630}
7631
7632// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7633// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7634// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7635// being `0` or `-1`. In such cases we can replace `select` with `and`.
7636// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7637// than `c0`?
7638static SDValue
7640 const RISCVSubtarget &Subtarget) {
7641 if (Subtarget.hasShortForwardBranchOpt())
7642 return SDValue();
7643
7644 unsigned SelOpNo = 0;
7645 SDValue Sel = BO->getOperand(0);
7646 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7647 SelOpNo = 1;
7648 Sel = BO->getOperand(1);
7649 }
7650
7651 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7652 return SDValue();
7653
7654 unsigned ConstSelOpNo = 1;
7655 unsigned OtherSelOpNo = 2;
7656 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7657 ConstSelOpNo = 2;
7658 OtherSelOpNo = 1;
7659 }
7660 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7661 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7662 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7663 return SDValue();
7664
7665 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7666 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7667 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7668 return SDValue();
7669
7670 SDLoc DL(Sel);
7671 EVT VT = BO->getValueType(0);
7672
7673 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7674 if (SelOpNo == 1)
7675 std::swap(NewConstOps[0], NewConstOps[1]);
7676
7677 SDValue NewConstOp =
7678 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7679 if (!NewConstOp)
7680 return SDValue();
7681
7682 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7683 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7684 return SDValue();
7685
7686 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7687 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7688 if (SelOpNo == 1)
7689 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7690 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7691
7692 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7693 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7694 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7695}
7696
7697SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7698 SDValue CondV = Op.getOperand(0);
7699 SDValue TrueV = Op.getOperand(1);
7700 SDValue FalseV = Op.getOperand(2);
7701 SDLoc DL(Op);
7702 MVT VT = Op.getSimpleValueType();
7703 MVT XLenVT = Subtarget.getXLenVT();
7704
7705 // Lower vector SELECTs to VSELECTs by splatting the condition.
7706 if (VT.isVector()) {
7707 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7708 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7709 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7710 }
7711
7712 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7713 // nodes to implement the SELECT. Performing the lowering here allows for
7714 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7715 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7716 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7717 VT.isScalarInteger()) {
7718 // (select c, t, 0) -> (czero_eqz t, c)
7719 if (isNullConstant(FalseV))
7720 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7721 // (select c, 0, f) -> (czero_nez f, c)
7722 if (isNullConstant(TrueV))
7723 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7724
7725 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7726 if (TrueV.getOpcode() == ISD::AND &&
7727 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7728 return DAG.getNode(
7729 ISD::OR, DL, VT, TrueV,
7730 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7731 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7732 if (FalseV.getOpcode() == ISD::AND &&
7733 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7734 return DAG.getNode(
7735 ISD::OR, DL, VT, FalseV,
7736 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7737
7738 // Try some other optimizations before falling back to generic lowering.
7739 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7740 return V;
7741
7742 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7743 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7744 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7745 const APInt &TrueVal = TrueV->getAsAPIntVal();
7746 const APInt &FalseVal = FalseV->getAsAPIntVal();
7747 const int TrueValCost = RISCVMatInt::getIntMatCost(
7748 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7749 const int FalseValCost = RISCVMatInt::getIntMatCost(
7750 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7751 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7752 SDValue LHSVal = DAG.getConstant(
7753 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7754 SDValue RHSVal =
7755 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7756 SDValue CMOV =
7758 DL, VT, LHSVal, CondV);
7759 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7760 }
7761
7762 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7763 // Unless we have the short forward branch optimization.
7764 if (!Subtarget.hasConditionalMoveFusion())
7765 return DAG.getNode(
7766 ISD::OR, DL, VT,
7767 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7768 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7769 }
7770
7771 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7772 return V;
7773
7774 if (Op.hasOneUse()) {
7775 unsigned UseOpc = Op->use_begin()->getOpcode();
7776 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7777 SDNode *BinOp = *Op->use_begin();
7778 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7779 DAG, Subtarget)) {
7780 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7781 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
7782 // may return a constant node and cause crash in lowerSELECT.
7783 if (NewSel.getOpcode() == ISD::SELECT)
7784 return lowerSELECT(NewSel, DAG);
7785 return NewSel;
7786 }
7787 }
7788 }
7789
7790 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7791 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7792 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7793 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7794 if (FPTV && FPFV) {
7795 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7796 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7797 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7798 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7799 DAG.getConstant(1, DL, XLenVT));
7800 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7801 }
7802 }
7803
7804 // If the condition is not an integer SETCC which operates on XLenVT, we need
7805 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7806 // (select condv, truev, falsev)
7807 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7808 if (CondV.getOpcode() != ISD::SETCC ||
7809 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7810 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7811 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7812
7813 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7814
7815 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7816 }
7817
7818 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7819 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7820 // advantage of the integer compare+branch instructions. i.e.:
7821 // (select (setcc lhs, rhs, cc), truev, falsev)
7822 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7823 SDValue LHS = CondV.getOperand(0);
7824 SDValue RHS = CondV.getOperand(1);
7825 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7826
7827 // Special case for a select of 2 constants that have a diffence of 1.
7828 // Normally this is done by DAGCombine, but if the select is introduced by
7829 // type legalization or op legalization, we miss it. Restricting to SETLT
7830 // case for now because that is what signed saturating add/sub need.
7831 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7832 // but we would probably want to swap the true/false values if the condition
7833 // is SETGE/SETLE to avoid an XORI.
7834 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7835 CCVal == ISD::SETLT) {
7836 const APInt &TrueVal = TrueV->getAsAPIntVal();
7837 const APInt &FalseVal = FalseV->getAsAPIntVal();
7838 if (TrueVal - 1 == FalseVal)
7839 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7840 if (TrueVal + 1 == FalseVal)
7841 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7842 }
7843
7844 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7845 // 1 < x ? x : 1 -> 0 < x ? x : 1
7846 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7847 RHS == TrueV && LHS == FalseV) {
7848 LHS = DAG.getConstant(0, DL, VT);
7849 // 0 <u x is the same as x != 0.
7850 if (CCVal == ISD::SETULT) {
7851 std::swap(LHS, RHS);
7852 CCVal = ISD::SETNE;
7853 }
7854 }
7855
7856 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7857 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7858 RHS == FalseV) {
7859 RHS = DAG.getConstant(0, DL, VT);
7860 }
7861
7862 SDValue TargetCC = DAG.getCondCode(CCVal);
7863
7864 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7865 // (select (setcc lhs, rhs, CC), constant, falsev)
7866 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7867 std::swap(TrueV, FalseV);
7868 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7869 }
7870
7871 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7872 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7873}
7874
7875SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7876 SDValue CondV = Op.getOperand(1);
7877 SDLoc DL(Op);
7878 MVT XLenVT = Subtarget.getXLenVT();
7879
7880 if (CondV.getOpcode() == ISD::SETCC &&
7881 CondV.getOperand(0).getValueType() == XLenVT) {
7882 SDValue LHS = CondV.getOperand(0);
7883 SDValue RHS = CondV.getOperand(1);
7884 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7885
7886 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7887
7888 SDValue TargetCC = DAG.getCondCode(CCVal);
7889 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7890 LHS, RHS, TargetCC, Op.getOperand(2));
7891 }
7892
7893 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7894 CondV, DAG.getConstant(0, DL, XLenVT),
7895 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7896}
7897
7898SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7901
7902 SDLoc DL(Op);
7903 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7905
7906 // vastart just stores the address of the VarArgsFrameIndex slot into the
7907 // memory location argument.
7908 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7909 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7910 MachinePointerInfo(SV));
7911}
7912
7913SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7914 SelectionDAG &DAG) const {
7915 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7917 MachineFrameInfo &MFI = MF.getFrameInfo();
7918 MFI.setFrameAddressIsTaken(true);
7919 Register FrameReg = RI.getFrameRegister(MF);
7920 int XLenInBytes = Subtarget.getXLen() / 8;
7921
7922 EVT VT = Op.getValueType();
7923 SDLoc DL(Op);
7924 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7925 unsigned Depth = Op.getConstantOperandVal(0);
7926 while (Depth--) {
7927 int Offset = -(XLenInBytes * 2);
7928 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7930 FrameAddr =
7931 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7932 }
7933 return FrameAddr;
7934}
7935
7936SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7937 SelectionDAG &DAG) const {
7938 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7940 MachineFrameInfo &MFI = MF.getFrameInfo();
7941 MFI.setReturnAddressIsTaken(true);
7942 MVT XLenVT = Subtarget.getXLenVT();
7943 int XLenInBytes = Subtarget.getXLen() / 8;
7944
7946 return SDValue();
7947
7948 EVT VT = Op.getValueType();
7949 SDLoc DL(Op);
7950 unsigned Depth = Op.getConstantOperandVal(0);
7951 if (Depth) {
7952 int Off = -XLenInBytes;
7953 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7954 SDValue Offset = DAG.getConstant(Off, DL, VT);
7955 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7956 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7958 }
7959
7960 // Return the value of the return address register, marking it an implicit
7961 // live-in.
7962 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7963 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7964}
7965
7966SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7967 SelectionDAG &DAG) const {
7968 SDLoc DL(Op);
7969 SDValue Lo = Op.getOperand(0);
7970 SDValue Hi = Op.getOperand(1);
7971 SDValue Shamt = Op.getOperand(2);
7972 EVT VT = Lo.getValueType();
7973
7974 // if Shamt-XLEN < 0: // Shamt < XLEN
7975 // Lo = Lo << Shamt
7976 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7977 // else:
7978 // Lo = 0
7979 // Hi = Lo << (Shamt-XLEN)
7980
7981 SDValue Zero = DAG.getConstant(0, DL, VT);
7982 SDValue One = DAG.getConstant(1, DL, VT);
7983 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7984 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7985 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7986 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7987
7988 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7989 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7990 SDValue ShiftRightLo =
7991 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7992 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7993 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7994 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7995
7996 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7997
7998 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7999 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8000
8001 SDValue Parts[2] = {Lo, Hi};
8002 return DAG.getMergeValues(Parts, DL);
8003}
8004
8005SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8006 bool IsSRA) const {
8007 SDLoc DL(Op);
8008 SDValue Lo = Op.getOperand(0);
8009 SDValue Hi = Op.getOperand(1);
8010 SDValue Shamt = Op.getOperand(2);
8011 EVT VT = Lo.getValueType();
8012
8013 // SRA expansion:
8014 // if Shamt-XLEN < 0: // Shamt < XLEN
8015 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8016 // Hi = Hi >>s Shamt
8017 // else:
8018 // Lo = Hi >>s (Shamt-XLEN);
8019 // Hi = Hi >>s (XLEN-1)
8020 //
8021 // SRL expansion:
8022 // if Shamt-XLEN < 0: // Shamt < XLEN
8023 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8024 // Hi = Hi >>u Shamt
8025 // else:
8026 // Lo = Hi >>u (Shamt-XLEN);
8027 // Hi = 0;
8028
8029 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8030
8031 SDValue Zero = DAG.getConstant(0, DL, VT);
8032 SDValue One = DAG.getConstant(1, DL, VT);
8033 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
8034 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8035 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8036 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8037
8038 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8039 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8040 SDValue ShiftLeftHi =
8041 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8042 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8043 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8044 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8045 SDValue HiFalse =
8046 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8047
8048 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8049
8050 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8051 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8052
8053 SDValue Parts[2] = {Lo, Hi};
8054 return DAG.getMergeValues(Parts, DL);
8055}
8056
8057// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8058// legal equivalently-sized i8 type, so we can use that as a go-between.
8059SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8060 SelectionDAG &DAG) const {
8061 SDLoc DL(Op);
8062 MVT VT = Op.getSimpleValueType();
8063 SDValue SplatVal = Op.getOperand(0);
8064 // All-zeros or all-ones splats are handled specially.
8065 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8066 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8067 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8068 }
8069 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8070 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8071 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8072 }
8073 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8074 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8075 DAG.getConstant(1, DL, SplatVal.getValueType()));
8076 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8077 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8078 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8079}
8080
8081// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8082// illegal (currently only vXi64 RV32).
8083// FIXME: We could also catch non-constant sign-extended i32 values and lower
8084// them to VMV_V_X_VL.
8085SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8086 SelectionDAG &DAG) const {
8087 SDLoc DL(Op);
8088 MVT VecVT = Op.getSimpleValueType();
8089 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8090 "Unexpected SPLAT_VECTOR_PARTS lowering");
8091
8092 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8093 SDValue Lo = Op.getOperand(0);
8094 SDValue Hi = Op.getOperand(1);
8095
8096 MVT ContainerVT = VecVT;
8097 if (VecVT.isFixedLengthVector())
8098 ContainerVT = getContainerForFixedLengthVector(VecVT);
8099
8100 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8101
8102 SDValue Res =
8103 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8104
8105 if (VecVT.isFixedLengthVector())
8106 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8107
8108 return Res;
8109}
8110
8111// Custom-lower extensions from mask vectors by using a vselect either with 1
8112// for zero/any-extension or -1 for sign-extension:
8113// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8114// Note that any-extension is lowered identically to zero-extension.
8115SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8116 int64_t ExtTrueVal) const {
8117 SDLoc DL(Op);
8118 MVT VecVT = Op.getSimpleValueType();
8119 SDValue Src = Op.getOperand(0);
8120 // Only custom-lower extensions from mask types
8121 assert(Src.getValueType().isVector() &&
8122 Src.getValueType().getVectorElementType() == MVT::i1);
8123
8124 if (VecVT.isScalableVector()) {
8125 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8126 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
8127 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8128 }
8129
8130 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8131 MVT I1ContainerVT =
8132 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8133
8134 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8135
8136 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8137
8138 MVT XLenVT = Subtarget.getXLenVT();
8139 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8140 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
8141
8142 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8143 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8144 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8145 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8146 SDValue Select =
8147 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8148 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8149
8150 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8151}
8152
8153SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8154 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8155 MVT ExtVT = Op.getSimpleValueType();
8156 // Only custom-lower extensions from fixed-length vector types.
8157 if (!ExtVT.isFixedLengthVector())
8158 return Op;
8159 MVT VT = Op.getOperand(0).getSimpleValueType();
8160 // Grab the canonical container type for the extended type. Infer the smaller
8161 // type from that to ensure the same number of vector elements, as we know
8162 // the LMUL will be sufficient to hold the smaller type.
8163 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8164 // Get the extended container type manually to ensure the same number of
8165 // vector elements between source and dest.
8166 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8167 ContainerExtVT.getVectorElementCount());
8168
8169 SDValue Op1 =
8170 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8171
8172 SDLoc DL(Op);
8173 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8174
8175 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8176
8177 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8178}
8179
8180// Custom-lower truncations from vectors to mask vectors by using a mask and a
8181// setcc operation:
8182// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8183SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8184 SelectionDAG &DAG) const {
8185 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8186 SDLoc DL(Op);
8187 EVT MaskVT = Op.getValueType();
8188 // Only expect to custom-lower truncations to mask types
8189 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8190 "Unexpected type for vector mask lowering");
8191 SDValue Src = Op.getOperand(0);
8192 MVT VecVT = Src.getSimpleValueType();
8193 SDValue Mask, VL;
8194 if (IsVPTrunc) {
8195 Mask = Op.getOperand(1);
8196 VL = Op.getOperand(2);
8197 }
8198 // If this is a fixed vector, we need to convert it to a scalable vector.
8199 MVT ContainerVT = VecVT;
8200
8201 if (VecVT.isFixedLengthVector()) {
8202 ContainerVT = getContainerForFixedLengthVector(VecVT);
8203 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8204 if (IsVPTrunc) {
8205 MVT MaskContainerVT =
8206 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8207 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8208 }
8209 }
8210
8211 if (!IsVPTrunc) {
8212 std::tie(Mask, VL) =
8213 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8214 }
8215
8216 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8217 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8218
8219 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8220 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8221 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8222 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8223
8224 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8225 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8226 DAG.getUNDEF(ContainerVT), Mask, VL);
8227 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8228 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8229 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8230 if (MaskVT.isFixedLengthVector())
8231 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8232 return Trunc;
8233}
8234
8235SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8236 SelectionDAG &DAG) const {
8237 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8238 SDLoc DL(Op);
8239
8240 MVT VT = Op.getSimpleValueType();
8241 // Only custom-lower vector truncates
8242 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8243
8244 // Truncates to mask types are handled differently
8245 if (VT.getVectorElementType() == MVT::i1)
8246 return lowerVectorMaskTruncLike(Op, DAG);
8247
8248 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8249 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8250 // truncate by one power of two at a time.
8251 MVT DstEltVT = VT.getVectorElementType();
8252
8253 SDValue Src = Op.getOperand(0);
8254 MVT SrcVT = Src.getSimpleValueType();
8255 MVT SrcEltVT = SrcVT.getVectorElementType();
8256
8257 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8258 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8259 "Unexpected vector truncate lowering");
8260
8261 MVT ContainerVT = SrcVT;
8262 SDValue Mask, VL;
8263 if (IsVPTrunc) {
8264 Mask = Op.getOperand(1);
8265 VL = Op.getOperand(2);
8266 }
8267 if (SrcVT.isFixedLengthVector()) {
8268 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8269 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8270 if (IsVPTrunc) {
8271 MVT MaskVT = getMaskTypeFor(ContainerVT);
8272 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8273 }
8274 }
8275
8276 SDValue Result = Src;
8277 if (!IsVPTrunc) {
8278 std::tie(Mask, VL) =
8279 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8280 }
8281
8282 do {
8283 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8284 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8285 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8286 Mask, VL);
8287 } while (SrcEltVT != DstEltVT);
8288
8289 if (SrcVT.isFixedLengthVector())
8290 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8291
8292 return Result;
8293}
8294
8295SDValue
8296RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8297 SelectionDAG &DAG) const {
8298 SDLoc DL(Op);
8299 SDValue Chain = Op.getOperand(0);
8300 SDValue Src = Op.getOperand(1);
8301 MVT VT = Op.getSimpleValueType();
8302 MVT SrcVT = Src.getSimpleValueType();
8303 MVT ContainerVT = VT;
8304 if (VT.isFixedLengthVector()) {
8305 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8306 ContainerVT =
8307 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8308 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8309 }
8310
8311 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8312
8313 // RVV can only widen/truncate fp to types double/half the size as the source.
8314 if ((VT.getVectorElementType() == MVT::f64 &&
8315 (SrcVT.getVectorElementType() == MVT::f16 ||
8316 SrcVT.getVectorElementType() == MVT::bf16)) ||
8317 ((VT.getVectorElementType() == MVT::f16 ||
8318 VT.getVectorElementType() == MVT::bf16) &&
8319 SrcVT.getVectorElementType() == MVT::f64)) {
8320 // For double rounding, the intermediate rounding should be round-to-odd.
8321 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8324 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8325 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8326 Chain, Src, Mask, VL);
8327 Chain = Src.getValue(1);
8328 }
8329
8330 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8333 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8334 Chain, Src, Mask, VL);
8335 if (VT.isFixedLengthVector()) {
8336 // StrictFP operations have two result values. Their lowered result should
8337 // have same result count.
8338 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8339 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8340 }
8341 return Res;
8342}
8343
8344SDValue
8345RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8346 SelectionDAG &DAG) const {
8347 bool IsVP =
8348 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8349 bool IsExtend =
8350 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8351 // RVV can only do truncate fp to types half the size as the source. We
8352 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8353 // conversion instruction.
8354 SDLoc DL(Op);
8355 MVT VT = Op.getSimpleValueType();
8356
8357 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8358
8359 SDValue Src = Op.getOperand(0);
8360 MVT SrcVT = Src.getSimpleValueType();
8361
8362 bool IsDirectExtend =
8363 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8364 (SrcVT.getVectorElementType() != MVT::f16 &&
8365 SrcVT.getVectorElementType() != MVT::bf16));
8366 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8367 VT.getVectorElementType() != MVT::bf16) ||
8368 SrcVT.getVectorElementType() != MVT::f64);
8369
8370 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8371
8372 // Prepare any fixed-length vector operands.
8373 MVT ContainerVT = VT;
8374 SDValue Mask, VL;
8375 if (IsVP) {
8376 Mask = Op.getOperand(1);
8377 VL = Op.getOperand(2);
8378 }
8379 if (VT.isFixedLengthVector()) {
8380 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8381 ContainerVT =
8382 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8383 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8384 if (IsVP) {
8385 MVT MaskVT = getMaskTypeFor(ContainerVT);
8386 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8387 }
8388 }
8389
8390 if (!IsVP)
8391 std::tie(Mask, VL) =
8392 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8393
8394 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8395
8396 if (IsDirectConv) {
8397 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8398 if (VT.isFixedLengthVector())
8399 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8400 return Src;
8401 }
8402
8403 unsigned InterConvOpc =
8405
8406 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8407 SDValue IntermediateConv =
8408 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8409 SDValue Result =
8410 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8411 if (VT.isFixedLengthVector())
8412 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8413 return Result;
8414}
8415
8416// Given a scalable vector type and an index into it, returns the type for the
8417// smallest subvector that the index fits in. This can be used to reduce LMUL
8418// for operations like vslidedown.
8419//
8420// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8421static std::optional<MVT>
8422getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8423 const RISCVSubtarget &Subtarget) {
8424 assert(VecVT.isScalableVector());
8425 const unsigned EltSize = VecVT.getScalarSizeInBits();
8426 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8427 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8428 MVT SmallerVT;
8429 if (MaxIdx < MinVLMAX)
8430 SmallerVT = getLMUL1VT(VecVT);
8431 else if (MaxIdx < MinVLMAX * 2)
8432 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8433 else if (MaxIdx < MinVLMAX * 4)
8434 SmallerVT = getLMUL1VT(VecVT)
8437 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8438 return std::nullopt;
8439 return SmallerVT;
8440}
8441
8442// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8443// first position of a vector, and that vector is slid up to the insert index.
8444// By limiting the active vector length to index+1 and merging with the
8445// original vector (with an undisturbed tail policy for elements >= VL), we
8446// achieve the desired result of leaving all elements untouched except the one
8447// at VL-1, which is replaced with the desired value.
8448SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8449 SelectionDAG &DAG) const {
8450 SDLoc DL(Op);
8451 MVT VecVT = Op.getSimpleValueType();
8452 SDValue Vec = Op.getOperand(0);
8453 SDValue Val = Op.getOperand(1);
8454 SDValue Idx = Op.getOperand(2);
8455
8456 if (VecVT.getVectorElementType() == MVT::i1) {
8457 // FIXME: For now we just promote to an i8 vector and insert into that,
8458 // but this is probably not optimal.
8459 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8460 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8461 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8462 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8463 }
8464
8465 MVT ContainerVT = VecVT;
8466 // If the operand is a fixed-length vector, convert to a scalable one.
8467 if (VecVT.isFixedLengthVector()) {
8468 ContainerVT = getContainerForFixedLengthVector(VecVT);
8469 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8470 }
8471
8472 // If we know the index we're going to insert at, we can shrink Vec so that
8473 // we're performing the scalar inserts and slideup on a smaller LMUL.
8474 MVT OrigContainerVT = ContainerVT;
8475 SDValue OrigVec = Vec;
8476 SDValue AlignedIdx;
8477 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8478 const unsigned OrigIdx = IdxC->getZExtValue();
8479 // Do we know an upper bound on LMUL?
8480 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8481 DL, DAG, Subtarget)) {
8482 ContainerVT = *ShrunkVT;
8483 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8484 }
8485
8486 // If we're compiling for an exact VLEN value, we can always perform
8487 // the insert in m1 as we can determine the register corresponding to
8488 // the index in the register group.
8489 const MVT M1VT = getLMUL1VT(ContainerVT);
8490 if (auto VLEN = Subtarget.getRealVLen();
8491 VLEN && ContainerVT.bitsGT(M1VT)) {
8492 EVT ElemVT = VecVT.getVectorElementType();
8493 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8494 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8495 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8496 unsigned ExtractIdx =
8497 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8498 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8499 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8500 ContainerVT = M1VT;
8501 }
8502
8503 if (AlignedIdx)
8504 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8505 AlignedIdx);
8506 }
8507
8508 MVT XLenVT = Subtarget.getXLenVT();
8509
8510 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8511 // Even i64-element vectors on RV32 can be lowered without scalar
8512 // legalization if the most-significant 32 bits of the value are not affected
8513 // by the sign-extension of the lower 32 bits.
8514 // TODO: We could also catch sign extensions of a 32-bit value.
8515 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8516 const auto *CVal = cast<ConstantSDNode>(Val);
8517 if (isInt<32>(CVal->getSExtValue())) {
8518 IsLegalInsert = true;
8519 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8520 }
8521 }
8522
8523 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8524
8525 SDValue ValInVec;
8526
8527 if (IsLegalInsert) {
8528 unsigned Opc =
8530 if (isNullConstant(Idx)) {
8531 if (!VecVT.isFloatingPoint())
8532 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8533 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8534
8535 if (AlignedIdx)
8536 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8537 Vec, AlignedIdx);
8538 if (!VecVT.isFixedLengthVector())
8539 return Vec;
8540 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8541 }
8542 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8543 } else {
8544 // On RV32, i64-element vectors must be specially handled to place the
8545 // value at element 0, by using two vslide1down instructions in sequence on
8546 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8547 // this.
8548 SDValue ValLo, ValHi;
8549 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8550 MVT I32ContainerVT =
8551 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8552 SDValue I32Mask =
8553 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8554 // Limit the active VL to two.
8555 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8556 // If the Idx is 0 we can insert directly into the vector.
8557 if (isNullConstant(Idx)) {
8558 // First slide in the lo value, then the hi in above it. We use slide1down
8559 // to avoid the register group overlap constraint of vslide1up.
8560 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8561 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8562 // If the source vector is undef don't pass along the tail elements from
8563 // the previous slide1down.
8564 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8565 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8566 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8567 // Bitcast back to the right container type.
8568 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8569
8570 if (AlignedIdx)
8571 ValInVec =
8572 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8573 ValInVec, AlignedIdx);
8574 if (!VecVT.isFixedLengthVector())
8575 return ValInVec;
8576 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8577 }
8578
8579 // First slide in the lo value, then the hi in above it. We use slide1down
8580 // to avoid the register group overlap constraint of vslide1up.
8581 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8582 DAG.getUNDEF(I32ContainerVT),
8583 DAG.getUNDEF(I32ContainerVT), ValLo,
8584 I32Mask, InsertI64VL);
8585 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8586 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8587 I32Mask, InsertI64VL);
8588 // Bitcast back to the right container type.
8589 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8590 }
8591
8592 // Now that the value is in a vector, slide it into position.
8593 SDValue InsertVL =
8594 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8595
8596 // Use tail agnostic policy if Idx is the last index of Vec.
8598 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8599 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8600 Policy = RISCVII::TAIL_AGNOSTIC;
8601 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8602 Idx, Mask, InsertVL, Policy);
8603
8604 if (AlignedIdx)
8605 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8606 Slideup, AlignedIdx);
8607 if (!VecVT.isFixedLengthVector())
8608 return Slideup;
8609 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8610}
8611
8612// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8613// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8614// types this is done using VMV_X_S to allow us to glean information about the
8615// sign bits of the result.
8616SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8617 SelectionDAG &DAG) const {
8618 SDLoc DL(Op);
8619 SDValue Idx = Op.getOperand(1);
8620 SDValue Vec = Op.getOperand(0);
8621 EVT EltVT = Op.getValueType();
8622 MVT VecVT = Vec.getSimpleValueType();
8623 MVT XLenVT = Subtarget.getXLenVT();
8624
8625 if (VecVT.getVectorElementType() == MVT::i1) {
8626 // Use vfirst.m to extract the first bit.
8627 if (isNullConstant(Idx)) {
8628 MVT ContainerVT = VecVT;
8629 if (VecVT.isFixedLengthVector()) {
8630 ContainerVT = getContainerForFixedLengthVector(VecVT);
8631 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8632 }
8633 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8634 SDValue Vfirst =
8635 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8636 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8637 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8638 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8639 }
8640 if (VecVT.isFixedLengthVector()) {
8641 unsigned NumElts = VecVT.getVectorNumElements();
8642 if (NumElts >= 8) {
8643 MVT WideEltVT;
8644 unsigned WidenVecLen;
8645 SDValue ExtractElementIdx;
8646 SDValue ExtractBitIdx;
8647 unsigned MaxEEW = Subtarget.getELen();
8648 MVT LargestEltVT = MVT::getIntegerVT(
8649 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8650 if (NumElts <= LargestEltVT.getSizeInBits()) {
8651 assert(isPowerOf2_32(NumElts) &&
8652 "the number of elements should be power of 2");
8653 WideEltVT = MVT::getIntegerVT(NumElts);
8654 WidenVecLen = 1;
8655 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8656 ExtractBitIdx = Idx;
8657 } else {
8658 WideEltVT = LargestEltVT;
8659 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8660 // extract element index = index / element width
8661 ExtractElementIdx = DAG.getNode(
8662 ISD::SRL, DL, XLenVT, Idx,
8663 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8664 // mask bit index = index % element width
8665 ExtractBitIdx = DAG.getNode(
8666 ISD::AND, DL, XLenVT, Idx,
8667 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8668 }
8669 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8670 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8671 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8672 Vec, ExtractElementIdx);
8673 // Extract the bit from GPR.
8674 SDValue ShiftRight =
8675 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8676 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8677 DAG.getConstant(1, DL, XLenVT));
8678 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8679 }
8680 }
8681 // Otherwise, promote to an i8 vector and extract from that.
8682 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8683 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8684 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8685 }
8686
8687 // If this is a fixed vector, we need to convert it to a scalable vector.
8688 MVT ContainerVT = VecVT;
8689 if (VecVT.isFixedLengthVector()) {
8690 ContainerVT = getContainerForFixedLengthVector(VecVT);
8691 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8692 }
8693
8694 // If we're compiling for an exact VLEN value and we have a known
8695 // constant index, we can always perform the extract in m1 (or
8696 // smaller) as we can determine the register corresponding to
8697 // the index in the register group.
8698 const auto VLen = Subtarget.getRealVLen();
8699 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8700 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8701 MVT M1VT = getLMUL1VT(ContainerVT);
8702 unsigned OrigIdx = IdxC->getZExtValue();
8703 EVT ElemVT = VecVT.getVectorElementType();
8704 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8705 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8706 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8707 unsigned ExtractIdx =
8708 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8709 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8710 DAG.getVectorIdxConstant(ExtractIdx, DL));
8711 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8712 ContainerVT = M1VT;
8713 }
8714
8715 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8716 // contains our index.
8717 std::optional<uint64_t> MaxIdx;
8718 if (VecVT.isFixedLengthVector())
8719 MaxIdx = VecVT.getVectorNumElements() - 1;
8720 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8721 MaxIdx = IdxC->getZExtValue();
8722 if (MaxIdx) {
8723 if (auto SmallerVT =
8724 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8725 ContainerVT = *SmallerVT;
8726 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8727 DAG.getConstant(0, DL, XLenVT));
8728 }
8729 }
8730
8731 // If after narrowing, the required slide is still greater than LMUL2,
8732 // fallback to generic expansion and go through the stack. This is done
8733 // for a subtle reason: extracting *all* elements out of a vector is
8734 // widely expected to be linear in vector size, but because vslidedown
8735 // is linear in LMUL, performing N extracts using vslidedown becomes
8736 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8737 // seems to have the same problem (the store is linear in LMUL), but the
8738 // generic expansion *memoizes* the store, and thus for many extracts of
8739 // the same vector we end up with one store and a bunch of loads.
8740 // TODO: We don't have the same code for insert_vector_elt because we
8741 // have BUILD_VECTOR and handle the degenerate case there. Should we
8742 // consider adding an inverse BUILD_VECTOR node?
8743 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8744 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8745 return SDValue();
8746
8747 // If the index is 0, the vector is already in the right position.
8748 if (!isNullConstant(Idx)) {
8749 // Use a VL of 1 to avoid processing more elements than we need.
8750 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8751 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8752 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8753 }
8754
8755 if (!EltVT.isInteger()) {
8756 // Floating-point extracts are handled in TableGen.
8757 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8758 DAG.getVectorIdxConstant(0, DL));
8759 }
8760
8761 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8762 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8763}
8764
8765// Some RVV intrinsics may claim that they want an integer operand to be
8766// promoted or expanded.
8768 const RISCVSubtarget &Subtarget) {
8769 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8770 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8771 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8772 "Unexpected opcode");
8773
8774 if (!Subtarget.hasVInstructions())
8775 return SDValue();
8776
8777 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8778 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8779 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8780
8781 SDLoc DL(Op);
8782
8784 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8785 if (!II || !II->hasScalarOperand())
8786 return SDValue();
8787
8788 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8789 assert(SplatOp < Op.getNumOperands());
8790
8791 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8792 SDValue &ScalarOp = Operands[SplatOp];
8793 MVT OpVT = ScalarOp.getSimpleValueType();
8794 MVT XLenVT = Subtarget.getXLenVT();
8795
8796 // If this isn't a scalar, or its type is XLenVT we're done.
8797 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8798 return SDValue();
8799
8800 // Simplest case is that the operand needs to be promoted to XLenVT.
8801 if (OpVT.bitsLT(XLenVT)) {
8802 // If the operand is a constant, sign extend to increase our chances
8803 // of being able to use a .vi instruction. ANY_EXTEND would become a
8804 // a zero extend and the simm5 check in isel would fail.
8805 // FIXME: Should we ignore the upper bits in isel instead?
8806 unsigned ExtOpc =
8807 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8808 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8809 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8810 }
8811
8812 // Use the previous operand to get the vXi64 VT. The result might be a mask
8813 // VT for compares. Using the previous operand assumes that the previous
8814 // operand will never have a smaller element size than a scalar operand and
8815 // that a widening operation never uses SEW=64.
8816 // NOTE: If this fails the below assert, we can probably just find the
8817 // element count from any operand or result and use it to construct the VT.
8818 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8819 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8820
8821 // The more complex case is when the scalar is larger than XLenVT.
8822 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8823 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8824
8825 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8826 // instruction to sign-extend since SEW>XLEN.
8827 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8828 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8829 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8830 }
8831
8832 switch (IntNo) {
8833 case Intrinsic::riscv_vslide1up:
8834 case Intrinsic::riscv_vslide1down:
8835 case Intrinsic::riscv_vslide1up_mask:
8836 case Intrinsic::riscv_vslide1down_mask: {
8837 // We need to special case these when the scalar is larger than XLen.
8838 unsigned NumOps = Op.getNumOperands();
8839 bool IsMasked = NumOps == 7;
8840
8841 // Convert the vector source to the equivalent nxvXi32 vector.
8842 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8843 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8844 SDValue ScalarLo, ScalarHi;
8845 std::tie(ScalarLo, ScalarHi) =
8846 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8847
8848 // Double the VL since we halved SEW.
8849 SDValue AVL = getVLOperand(Op);
8850 SDValue I32VL;
8851
8852 // Optimize for constant AVL
8853 if (isa<ConstantSDNode>(AVL)) {
8854 const auto [MinVLMAX, MaxVLMAX] =
8856
8857 uint64_t AVLInt = AVL->getAsZExtVal();
8858 if (AVLInt <= MinVLMAX) {
8859 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8860 } else if (AVLInt >= 2 * MaxVLMAX) {
8861 // Just set vl to VLMAX in this situation
8863 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8864 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8865 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8866 SDValue SETVLMAX = DAG.getTargetConstant(
8867 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8868 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8869 LMUL);
8870 } else {
8871 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8872 // is related to the hardware implementation.
8873 // So let the following code handle
8874 }
8875 }
8876 if (!I32VL) {
8878 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8879 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8880 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8881 SDValue SETVL =
8882 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8883 // Using vsetvli instruction to get actually used length which related to
8884 // the hardware implementation
8885 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8886 SEW, LMUL);
8887 I32VL =
8888 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8889 }
8890
8891 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8892
8893 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8894 // instructions.
8895 SDValue Passthru;
8896 if (IsMasked)
8897 Passthru = DAG.getUNDEF(I32VT);
8898 else
8899 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8900
8901 if (IntNo == Intrinsic::riscv_vslide1up ||
8902 IntNo == Intrinsic::riscv_vslide1up_mask) {
8903 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8904 ScalarHi, I32Mask, I32VL);
8905 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8906 ScalarLo, I32Mask, I32VL);
8907 } else {
8908 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8909 ScalarLo, I32Mask, I32VL);
8910 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8911 ScalarHi, I32Mask, I32VL);
8912 }
8913
8914 // Convert back to nxvXi64.
8915 Vec = DAG.getBitcast(VT, Vec);
8916
8917 if (!IsMasked)
8918 return Vec;
8919 // Apply mask after the operation.
8920 SDValue Mask = Operands[NumOps - 3];
8921 SDValue MaskedOff = Operands[1];
8922 // Assume Policy operand is the last operand.
8923 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8924 // We don't need to select maskedoff if it's undef.
8925 if (MaskedOff.isUndef())
8926 return Vec;
8927 // TAMU
8928 if (Policy == RISCVII::TAIL_AGNOSTIC)
8929 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8930 DAG.getUNDEF(VT), AVL);
8931 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8932 // It's fine because vmerge does not care mask policy.
8933 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8934 MaskedOff, AVL);
8935 }
8936 }
8937
8938 // We need to convert the scalar to a splat vector.
8939 SDValue VL = getVLOperand(Op);
8940 assert(VL.getValueType() == XLenVT);
8941 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8942 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8943}
8944
8945// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8946// scalable vector llvm.get.vector.length for now.
8947//
8948// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8949// (vscale * VF). The vscale and VF are independent of element width. We use
8950// SEW=8 for the vsetvli because it is the only element width that supports all
8951// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8952// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8953// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8954// SEW and LMUL are better for the surrounding vector instructions.
8956 const RISCVSubtarget &Subtarget) {
8957 MVT XLenVT = Subtarget.getXLenVT();
8958
8959 // The smallest LMUL is only valid for the smallest element width.
8960 const unsigned ElementWidth = 8;
8961
8962 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8963 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8964 // We don't support VF==1 with ELEN==32.
8965 [[maybe_unused]] unsigned MinVF =
8966 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8967
8968 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8969 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8970 "Unexpected VF");
8971
8972 bool Fractional = VF < LMul1VF;
8973 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8974 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8975 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8976
8977 SDLoc DL(N);
8978
8979 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8980 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8981
8982 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8983
8984 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8985 SDValue Res =
8986 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8987 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8988}
8989
8991 const RISCVSubtarget &Subtarget) {
8992 SDValue Op0 = N->getOperand(1);
8993 MVT OpVT = Op0.getSimpleValueType();
8994 MVT ContainerVT = OpVT;
8995 if (OpVT.isFixedLengthVector()) {
8996 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8997 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8998 }
8999 MVT XLenVT = Subtarget.getXLenVT();
9000 SDLoc DL(N);
9001 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9002 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9003 if (isOneConstant(N->getOperand(2)))
9004 return Res;
9005
9006 // Convert -1 to VL.
9007 SDValue Setcc =
9008 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9009 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9010 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9011}
9012
9013static inline void promoteVCIXScalar(const SDValue &Op,
9015 SelectionDAG &DAG) {
9016 const RISCVSubtarget &Subtarget =
9018
9019 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9020 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9021 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9022 SDLoc DL(Op);
9023
9025 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9026 if (!II || !II->hasScalarOperand())
9027 return;
9028
9029 unsigned SplatOp = II->ScalarOperand + 1;
9030 assert(SplatOp < Op.getNumOperands());
9031
9032 SDValue &ScalarOp = Operands[SplatOp];
9033 MVT OpVT = ScalarOp.getSimpleValueType();
9034 MVT XLenVT = Subtarget.getXLenVT();
9035
9036 // The code below is partially copied from lowerVectorIntrinsicScalars.
9037 // If this isn't a scalar, or its type is XLenVT we're done.
9038 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9039 return;
9040
9041 // Manually emit promote operation for scalar operation.
9042 if (OpVT.bitsLT(XLenVT)) {
9043 unsigned ExtOpc =
9044 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9045 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9046 }
9047
9048 return;
9049}
9050
9051static void processVCIXOperands(SDValue &OrigOp,
9053 SelectionDAG &DAG) {
9054 promoteVCIXScalar(OrigOp, Operands, DAG);
9055 const RISCVSubtarget &Subtarget =
9057 for (SDValue &V : Operands) {
9058 EVT ValType = V.getValueType();
9059 if (ValType.isVector() && ValType.isFloatingPoint()) {
9060 MVT InterimIVT =
9061 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9062 ValType.getVectorElementCount());
9063 V = DAG.getBitcast(InterimIVT, V);
9064 }
9065 if (ValType.isFixedLengthVector()) {
9066 MVT OpContainerVT = getContainerForFixedLengthVector(
9067 DAG, V.getSimpleValueType(), Subtarget);
9068 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9069 }
9070 }
9071}
9072
9073// LMUL * VLEN should be greater than or equal to EGS * SEW
9074static inline bool isValidEGW(int EGS, EVT VT,
9075 const RISCVSubtarget &Subtarget) {
9076 return (Subtarget.getRealMinVLen() *
9078 EGS * VT.getScalarSizeInBits();
9079}
9080
9081SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9082 SelectionDAG &DAG) const {
9083 unsigned IntNo = Op.getConstantOperandVal(0);
9084 SDLoc DL(Op);
9085 MVT XLenVT = Subtarget.getXLenVT();
9086
9087 switch (IntNo) {
9088 default:
9089 break; // Don't custom lower most intrinsics.
9090 case Intrinsic::thread_pointer: {
9091 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9092 return DAG.getRegister(RISCV::X4, PtrVT);
9093 }
9094 case Intrinsic::riscv_orc_b:
9095 case Intrinsic::riscv_brev8:
9096 case Intrinsic::riscv_sha256sig0:
9097 case Intrinsic::riscv_sha256sig1:
9098 case Intrinsic::riscv_sha256sum0:
9099 case Intrinsic::riscv_sha256sum1:
9100 case Intrinsic::riscv_sm3p0:
9101 case Intrinsic::riscv_sm3p1: {
9102 unsigned Opc;
9103 switch (IntNo) {
9104 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9105 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9106 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9107 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9108 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9109 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9110 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9111 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9112 }
9113
9114 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9115 SDValue NewOp =
9116 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9117 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
9118 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9119 }
9120
9121 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9122 }
9123 case Intrinsic::riscv_sm4ks:
9124 case Intrinsic::riscv_sm4ed: {
9125 unsigned Opc =
9126 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9127
9128 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9129 SDValue NewOp0 =
9130 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9131 SDValue NewOp1 =
9132 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9133 SDValue Res =
9134 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
9135 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9136 }
9137
9138 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9139 Op.getOperand(3));
9140 }
9141 case Intrinsic::riscv_zip:
9142 case Intrinsic::riscv_unzip: {
9143 unsigned Opc =
9144 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9145 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9146 }
9147 case Intrinsic::riscv_mopr: {
9148 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9149 SDValue NewOp =
9150 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9151 SDValue Res = DAG.getNode(
9152 RISCVISD::MOPR, DL, MVT::i64, NewOp,
9153 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
9154 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9155 }
9156 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9157 Op.getOperand(2));
9158 }
9159
9160 case Intrinsic::riscv_moprr: {
9161 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9162 SDValue NewOp0 =
9163 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9164 SDValue NewOp1 =
9165 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9166 SDValue Res = DAG.getNode(
9167 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
9168 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
9169 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9170 }
9171 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9172 Op.getOperand(2), Op.getOperand(3));
9173 }
9174 case Intrinsic::riscv_clmul:
9175 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9176 SDValue NewOp0 =
9177 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9178 SDValue NewOp1 =
9179 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9180 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
9181 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9182 }
9183 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9184 Op.getOperand(2));
9185 case Intrinsic::riscv_clmulh:
9186 case Intrinsic::riscv_clmulr: {
9187 unsigned Opc =
9188 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9189 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9190 SDValue NewOp0 =
9191 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9192 SDValue NewOp1 =
9193 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9194 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
9195 DAG.getConstant(32, DL, MVT::i64));
9196 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
9197 DAG.getConstant(32, DL, MVT::i64));
9198 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
9199 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
9200 DAG.getConstant(32, DL, MVT::i64));
9201 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9202 }
9203
9204 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9205 }
9206 case Intrinsic::experimental_get_vector_length:
9207 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9208 case Intrinsic::experimental_cttz_elts:
9209 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9210 case Intrinsic::riscv_vmv_x_s: {
9211 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9212 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9213 }
9214 case Intrinsic::riscv_vfmv_f_s:
9215 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9216 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9217 case Intrinsic::riscv_vmv_v_x:
9218 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9219 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9220 Subtarget);
9221 case Intrinsic::riscv_vfmv_v_f:
9222 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9223 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9224 case Intrinsic::riscv_vmv_s_x: {
9225 SDValue Scalar = Op.getOperand(2);
9226
9227 if (Scalar.getValueType().bitsLE(XLenVT)) {
9228 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9229 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9230 Op.getOperand(1), Scalar, Op.getOperand(3));
9231 }
9232
9233 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9234
9235 // This is an i64 value that lives in two scalar registers. We have to
9236 // insert this in a convoluted way. First we build vXi64 splat containing
9237 // the two values that we assemble using some bit math. Next we'll use
9238 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9239 // to merge element 0 from our splat into the source vector.
9240 // FIXME: This is probably not the best way to do this, but it is
9241 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9242 // point.
9243 // sw lo, (a0)
9244 // sw hi, 4(a0)
9245 // vlse vX, (a0)
9246 //
9247 // vid.v vVid
9248 // vmseq.vx mMask, vVid, 0
9249 // vmerge.vvm vDest, vSrc, vVal, mMask
9250 MVT VT = Op.getSimpleValueType();
9251 SDValue Vec = Op.getOperand(1);
9252 SDValue VL = getVLOperand(Op);
9253
9254 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9255 if (Op.getOperand(1).isUndef())
9256 return SplattedVal;
9257 SDValue SplattedIdx =
9258 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9259 DAG.getConstant(0, DL, MVT::i32), VL);
9260
9261 MVT MaskVT = getMaskTypeFor(VT);
9262 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9263 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9264 SDValue SelectCond =
9265 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9266 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9267 DAG.getUNDEF(MaskVT), Mask, VL});
9268 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9269 Vec, DAG.getUNDEF(VT), VL);
9270 }
9271 case Intrinsic::riscv_vfmv_s_f:
9272 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9273 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9274 // EGS * EEW >= 128 bits
9275 case Intrinsic::riscv_vaesdf_vv:
9276 case Intrinsic::riscv_vaesdf_vs:
9277 case Intrinsic::riscv_vaesdm_vv:
9278 case Intrinsic::riscv_vaesdm_vs:
9279 case Intrinsic::riscv_vaesef_vv:
9280 case Intrinsic::riscv_vaesef_vs:
9281 case Intrinsic::riscv_vaesem_vv:
9282 case Intrinsic::riscv_vaesem_vs:
9283 case Intrinsic::riscv_vaeskf1:
9284 case Intrinsic::riscv_vaeskf2:
9285 case Intrinsic::riscv_vaesz_vs:
9286 case Intrinsic::riscv_vsm4k:
9287 case Intrinsic::riscv_vsm4r_vv:
9288 case Intrinsic::riscv_vsm4r_vs: {
9289 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9290 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9291 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9292 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9293 return Op;
9294 }
9295 // EGS * EEW >= 256 bits
9296 case Intrinsic::riscv_vsm3c:
9297 case Intrinsic::riscv_vsm3me: {
9298 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9299 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9300 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9301 return Op;
9302 }
9303 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9304 case Intrinsic::riscv_vsha2ch:
9305 case Intrinsic::riscv_vsha2cl:
9306 case Intrinsic::riscv_vsha2ms: {
9307 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9308 !Subtarget.hasStdExtZvknhb())
9309 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9310 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9311 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9312 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9313 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9314 return Op;
9315 }
9316 case Intrinsic::riscv_sf_vc_v_x:
9317 case Intrinsic::riscv_sf_vc_v_i:
9318 case Intrinsic::riscv_sf_vc_v_xv:
9319 case Intrinsic::riscv_sf_vc_v_iv:
9320 case Intrinsic::riscv_sf_vc_v_vv:
9321 case Intrinsic::riscv_sf_vc_v_fv:
9322 case Intrinsic::riscv_sf_vc_v_xvv:
9323 case Intrinsic::riscv_sf_vc_v_ivv:
9324 case Intrinsic::riscv_sf_vc_v_vvv:
9325 case Intrinsic::riscv_sf_vc_v_fvv:
9326 case Intrinsic::riscv_sf_vc_v_xvw:
9327 case Intrinsic::riscv_sf_vc_v_ivw:
9328 case Intrinsic::riscv_sf_vc_v_vvw:
9329 case Intrinsic::riscv_sf_vc_v_fvw: {
9330 MVT VT = Op.getSimpleValueType();
9331
9332 SmallVector<SDValue> Operands{Op->op_values()};
9334
9335 MVT RetVT = VT;
9336 if (VT.isFixedLengthVector())
9338 else if (VT.isFloatingPoint())
9341
9342 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9343
9344 if (VT.isFixedLengthVector())
9345 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9346 else if (VT.isFloatingPoint())
9347 NewNode = DAG.getBitcast(VT, NewNode);
9348
9349 if (Op == NewNode)
9350 break;
9351
9352 return NewNode;
9353 }
9354 }
9355
9356 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9357}
9358
9360 unsigned Type) {
9361 SDLoc DL(Op);
9362 SmallVector<SDValue> Operands{Op->op_values()};
9363 Operands.erase(Operands.begin() + 1);
9364
9365 const RISCVSubtarget &Subtarget =
9367 MVT VT = Op.getSimpleValueType();
9368 MVT RetVT = VT;
9369 MVT FloatVT = VT;
9370
9371 if (VT.isFloatingPoint()) {
9372 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9374 FloatVT = RetVT;
9375 }
9376 if (VT.isFixedLengthVector())
9378 Subtarget);
9379
9381
9382 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9383 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9384 SDValue Chain = NewNode.getValue(1);
9385
9386 if (VT.isFixedLengthVector())
9387 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9388 if (VT.isFloatingPoint())
9389 NewNode = DAG.getBitcast(VT, NewNode);
9390
9391 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9392
9393 return NewNode;
9394}
9395
9397 unsigned Type) {
9398 SmallVector<SDValue> Operands{Op->op_values()};
9399 Operands.erase(Operands.begin() + 1);
9401
9402 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9403}
9404
9405SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9406 SelectionDAG &DAG) const {
9407 unsigned IntNo = Op.getConstantOperandVal(1);
9408 switch (IntNo) {
9409 default:
9410 break;
9411 case Intrinsic::riscv_seg2_load:
9412 case Intrinsic::riscv_seg3_load:
9413 case Intrinsic::riscv_seg4_load:
9414 case Intrinsic::riscv_seg5_load:
9415 case Intrinsic::riscv_seg6_load:
9416 case Intrinsic::riscv_seg7_load:
9417 case Intrinsic::riscv_seg8_load: {
9418 SDLoc DL(Op);
9419 static const Intrinsic::ID VlsegInts[7] = {
9420 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9421 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9422 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9423 Intrinsic::riscv_vlseg8};
9424 unsigned NF = Op->getNumValues() - 1;
9425 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9426 MVT XLenVT = Subtarget.getXLenVT();
9427 MVT VT = Op->getSimpleValueType(0);
9428 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9429
9430 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9431 Subtarget);
9432 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9433 auto *Load = cast<MemIntrinsicSDNode>(Op);
9434 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9435 ContainerVTs.push_back(MVT::Other);
9436 SDVTList VTs = DAG.getVTList(ContainerVTs);
9437 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9438 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9439 Ops.push_back(Op.getOperand(2));
9440 Ops.push_back(VL);
9441 SDValue Result =
9443 Load->getMemoryVT(), Load->getMemOperand());
9445 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9446 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9447 DAG, Subtarget));
9448 Results.push_back(Result.getValue(NF));
9449 return DAG.getMergeValues(Results, DL);
9450 }
9451 case Intrinsic::riscv_sf_vc_v_x_se:
9453 case Intrinsic::riscv_sf_vc_v_i_se:
9455 case Intrinsic::riscv_sf_vc_v_xv_se:
9457 case Intrinsic::riscv_sf_vc_v_iv_se:
9459 case Intrinsic::riscv_sf_vc_v_vv_se:
9461 case Intrinsic::riscv_sf_vc_v_fv_se:
9463 case Intrinsic::riscv_sf_vc_v_xvv_se:
9465 case Intrinsic::riscv_sf_vc_v_ivv_se:
9467 case Intrinsic::riscv_sf_vc_v_vvv_se:
9469 case Intrinsic::riscv_sf_vc_v_fvv_se:
9471 case Intrinsic::riscv_sf_vc_v_xvw_se:
9473 case Intrinsic::riscv_sf_vc_v_ivw_se:
9475 case Intrinsic::riscv_sf_vc_v_vvw_se:
9477 case Intrinsic::riscv_sf_vc_v_fvw_se:
9479 }
9480
9481 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9482}
9483
9484SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9485 SelectionDAG &DAG) const {
9486 unsigned IntNo = Op.getConstantOperandVal(1);
9487 switch (IntNo) {
9488 default:
9489 break;
9490 case Intrinsic::riscv_seg2_store:
9491 case Intrinsic::riscv_seg3_store:
9492 case Intrinsic::riscv_seg4_store:
9493 case Intrinsic::riscv_seg5_store:
9494 case Intrinsic::riscv_seg6_store:
9495 case Intrinsic::riscv_seg7_store:
9496 case Intrinsic::riscv_seg8_store: {
9497 SDLoc DL(Op);
9498 static const Intrinsic::ID VssegInts[] = {
9499 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9500 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9501 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9502 Intrinsic::riscv_vsseg8};
9503 // Operands are (chain, int_id, vec*, ptr, vl)
9504 unsigned NF = Op->getNumOperands() - 4;
9505 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9506 MVT XLenVT = Subtarget.getXLenVT();
9507 MVT VT = Op->getOperand(2).getSimpleValueType();
9508 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9509
9510 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9511 Subtarget);
9512 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9513 SDValue Ptr = Op->getOperand(NF + 2);
9514
9515 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9516 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9517 for (unsigned i = 0; i < NF; i++)
9519 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9520 Ops.append({Ptr, VL});
9521
9522 return DAG.getMemIntrinsicNode(
9523 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9524 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9525 }
9526 case Intrinsic::riscv_sf_vc_xv_se:
9528 case Intrinsic::riscv_sf_vc_iv_se:
9530 case Intrinsic::riscv_sf_vc_vv_se:
9532 case Intrinsic::riscv_sf_vc_fv_se:
9534 case Intrinsic::riscv_sf_vc_xvv_se:
9536 case Intrinsic::riscv_sf_vc_ivv_se:
9538 case Intrinsic::riscv_sf_vc_vvv_se:
9540 case Intrinsic::riscv_sf_vc_fvv_se:
9542 case Intrinsic::riscv_sf_vc_xvw_se:
9544 case Intrinsic::riscv_sf_vc_ivw_se:
9546 case Intrinsic::riscv_sf_vc_vvw_se:
9548 case Intrinsic::riscv_sf_vc_fvw_se:
9550 }
9551
9552 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9553}
9554
9555static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9556 switch (ISDOpcode) {
9557 default:
9558 llvm_unreachable("Unhandled reduction");
9559 case ISD::VP_REDUCE_ADD:
9560 case ISD::VECREDUCE_ADD:
9562 case ISD::VP_REDUCE_UMAX:
9565 case ISD::VP_REDUCE_SMAX:
9568 case ISD::VP_REDUCE_UMIN:
9571 case ISD::VP_REDUCE_SMIN:
9574 case ISD::VP_REDUCE_AND:
9575 case ISD::VECREDUCE_AND:
9577 case ISD::VP_REDUCE_OR:
9578 case ISD::VECREDUCE_OR:
9580 case ISD::VP_REDUCE_XOR:
9581 case ISD::VECREDUCE_XOR:
9583 case ISD::VP_REDUCE_FADD:
9585 case ISD::VP_REDUCE_SEQ_FADD:
9587 case ISD::VP_REDUCE_FMAX:
9588 case ISD::VP_REDUCE_FMAXIMUM:
9590 case ISD::VP_REDUCE_FMIN:
9591 case ISD::VP_REDUCE_FMINIMUM:
9593 }
9594
9595}
9596
9597SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9598 SelectionDAG &DAG,
9599 bool IsVP) const {
9600 SDLoc DL(Op);
9601 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9602 MVT VecVT = Vec.getSimpleValueType();
9603 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9604 Op.getOpcode() == ISD::VECREDUCE_OR ||
9605 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9606 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9607 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9608 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9609 "Unexpected reduction lowering");
9610
9611 MVT XLenVT = Subtarget.getXLenVT();
9612
9613 MVT ContainerVT = VecVT;
9614 if (VecVT.isFixedLengthVector()) {
9615 ContainerVT = getContainerForFixedLengthVector(VecVT);
9616 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9617 }
9618
9619 SDValue Mask, VL;
9620 if (IsVP) {
9621 Mask = Op.getOperand(2);
9622 VL = Op.getOperand(3);
9623 } else {
9624 std::tie(Mask, VL) =
9625 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9626 }
9627
9628 unsigned BaseOpc;
9630 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9631
9632 switch (Op.getOpcode()) {
9633 default:
9634 llvm_unreachable("Unhandled reduction");
9635 case ISD::VECREDUCE_AND:
9636 case ISD::VP_REDUCE_AND: {
9637 // vcpop ~x == 0
9638 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9639 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9640 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9641 CC = ISD::SETEQ;
9642 BaseOpc = ISD::AND;
9643 break;
9644 }
9645 case ISD::VECREDUCE_OR:
9646 case ISD::VP_REDUCE_OR:
9647 // vcpop x != 0
9648 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9649 CC = ISD::SETNE;
9650 BaseOpc = ISD::OR;
9651 break;
9652 case ISD::VECREDUCE_XOR:
9653 case ISD::VP_REDUCE_XOR: {
9654 // ((vcpop x) & 1) != 0
9655 SDValue One = DAG.getConstant(1, DL, XLenVT);
9656 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9657 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9658 CC = ISD::SETNE;
9659 BaseOpc = ISD::XOR;
9660 break;
9661 }
9662 }
9663
9664 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9665 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9666
9667 if (!IsVP)
9668 return SetCC;
9669
9670 // Now include the start value in the operation.
9671 // Note that we must return the start value when no elements are operated
9672 // upon. The vcpop instructions we've emitted in each case above will return
9673 // 0 for an inactive vector, and so we've already received the neutral value:
9674 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9675 // can simply include the start value.
9676 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9677}
9678
9679static bool isNonZeroAVL(SDValue AVL) {
9680 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9681 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9682 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9683 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9684}
9685
9686/// Helper to lower a reduction sequence of the form:
9687/// scalar = reduce_op vec, scalar_start
9688static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9689 SDValue StartValue, SDValue Vec, SDValue Mask,
9690 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9691 const RISCVSubtarget &Subtarget) {
9692 const MVT VecVT = Vec.getSimpleValueType();
9693 const MVT M1VT = getLMUL1VT(VecVT);
9694 const MVT XLenVT = Subtarget.getXLenVT();
9695 const bool NonZeroAVL = isNonZeroAVL(VL);
9696
9697 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9698 // or the original VT if fractional.
9699 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9700 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9701 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9702 // be the result of the reduction operation.
9703 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9704 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9705 DAG, Subtarget);
9706 if (M1VT != InnerVT)
9707 InitialValue =
9708 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9709 InitialValue, DAG.getVectorIdxConstant(0, DL));
9710 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9711 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9712 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9713 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9714 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9715 DAG.getVectorIdxConstant(0, DL));
9716}
9717
9718SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9719 SelectionDAG &DAG) const {
9720 SDLoc DL(Op);
9721 SDValue Vec = Op.getOperand(0);
9722 EVT VecEVT = Vec.getValueType();
9723
9724 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9725
9726 // Due to ordering in legalize types we may have a vector type that needs to
9727 // be split. Do that manually so we can get down to a legal type.
9728 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9730 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9731 VecEVT = Lo.getValueType();
9732 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9733 }
9734
9735 // TODO: The type may need to be widened rather than split. Or widened before
9736 // it can be split.
9737 if (!isTypeLegal(VecEVT))
9738 return SDValue();
9739
9740 MVT VecVT = VecEVT.getSimpleVT();
9741 MVT VecEltVT = VecVT.getVectorElementType();
9742 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9743
9744 MVT ContainerVT = VecVT;
9745 if (VecVT.isFixedLengthVector()) {
9746 ContainerVT = getContainerForFixedLengthVector(VecVT);
9747 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9748 }
9749
9750 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9751
9752 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9753 switch (BaseOpc) {
9754 case ISD::AND:
9755 case ISD::OR:
9756 case ISD::UMAX:
9757 case ISD::UMIN:
9758 case ISD::SMAX:
9759 case ISD::SMIN:
9760 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9761 DAG.getVectorIdxConstant(0, DL));
9762 }
9763 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9764 Mask, VL, DL, DAG, Subtarget);
9765}
9766
9767// Given a reduction op, this function returns the matching reduction opcode,
9768// the vector SDValue and the scalar SDValue required to lower this to a
9769// RISCVISD node.
9770static std::tuple<unsigned, SDValue, SDValue>
9772 const RISCVSubtarget &Subtarget) {
9773 SDLoc DL(Op);
9774 auto Flags = Op->getFlags();
9775 unsigned Opcode = Op.getOpcode();
9776 switch (Opcode) {
9777 default:
9778 llvm_unreachable("Unhandled reduction");
9779 case ISD::VECREDUCE_FADD: {
9780 // Use positive zero if we can. It is cheaper to materialize.
9781 SDValue Zero =
9782 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9783 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9784 }
9786 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9787 Op.getOperand(0));
9791 case ISD::VECREDUCE_FMAX: {
9792 SDValue Front =
9793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9794 DAG.getVectorIdxConstant(0, DL));
9795 unsigned RVVOpc =
9796 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9799 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9800 }
9801 }
9802}
9803
9804SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9805 SelectionDAG &DAG) const {
9806 SDLoc DL(Op);
9807 MVT VecEltVT = Op.getSimpleValueType();
9808
9809 unsigned RVVOpcode;
9810 SDValue VectorVal, ScalarVal;
9811 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9812 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9813 MVT VecVT = VectorVal.getSimpleValueType();
9814
9815 MVT ContainerVT = VecVT;
9816 if (VecVT.isFixedLengthVector()) {
9817 ContainerVT = getContainerForFixedLengthVector(VecVT);
9818 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9819 }
9820
9821 MVT ResVT = Op.getSimpleValueType();
9822 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9823 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9824 VL, DL, DAG, Subtarget);
9825 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9826 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9827 return Res;
9828
9829 if (Op->getFlags().hasNoNaNs())
9830 return Res;
9831
9832 // Force output to NaN if any element is Nan.
9833 SDValue IsNan =
9834 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9835 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9836 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9837 MVT XLenVT = Subtarget.getXLenVT();
9838 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9839 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9840 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9841 return DAG.getSelect(
9842 DL, ResVT, NoNaNs, Res,
9844 ResVT));
9845}
9846
9847SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9848 SelectionDAG &DAG) const {
9849 SDLoc DL(Op);
9850 unsigned Opc = Op.getOpcode();
9851 SDValue Start = Op.getOperand(0);
9852 SDValue Vec = Op.getOperand(1);
9853 EVT VecEVT = Vec.getValueType();
9854 MVT XLenVT = Subtarget.getXLenVT();
9855
9856 // TODO: The type may need to be widened rather than split. Or widened before
9857 // it can be split.
9858 if (!isTypeLegal(VecEVT))
9859 return SDValue();
9860
9861 MVT VecVT = VecEVT.getSimpleVT();
9862 unsigned RVVOpcode = getRVVReductionOp(Opc);
9863
9864 if (VecVT.isFixedLengthVector()) {
9865 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9866 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9867 }
9868
9869 SDValue VL = Op.getOperand(3);
9870 SDValue Mask = Op.getOperand(2);
9871 SDValue Res =
9872 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9873 Vec, Mask, VL, DL, DAG, Subtarget);
9874 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9875 Op->getFlags().hasNoNaNs())
9876 return Res;
9877
9878 // Propagate NaNs.
9879 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9880 // Check if any of the elements in Vec is NaN.
9881 SDValue IsNaN = DAG.getNode(
9882 RISCVISD::SETCC_VL, DL, PredVT,
9883 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9884 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9885 // Check if the start value is NaN.
9886 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9887 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9888 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9889 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9890 MVT ResVT = Res.getSimpleValueType();
9891 return DAG.getSelect(
9892 DL, ResVT, NoNaNs, Res,
9894 ResVT));
9895}
9896
9897SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9898 SelectionDAG &DAG) const {
9899 SDValue Vec = Op.getOperand(0);
9900 SDValue SubVec = Op.getOperand(1);
9901 MVT VecVT = Vec.getSimpleValueType();
9902 MVT SubVecVT = SubVec.getSimpleValueType();
9903
9904 SDLoc DL(Op);
9905 MVT XLenVT = Subtarget.getXLenVT();
9906 unsigned OrigIdx = Op.getConstantOperandVal(2);
9907 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9908
9909 // We don't have the ability to slide mask vectors up indexed by their i1
9910 // elements; the smallest we can do is i8. Often we are able to bitcast to
9911 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9912 // into a scalable one, we might not necessarily have enough scalable
9913 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9914 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9915 (OrigIdx != 0 || !Vec.isUndef())) {
9916 if (VecVT.getVectorMinNumElements() >= 8 &&
9917 SubVecVT.getVectorMinNumElements() >= 8) {
9918 assert(OrigIdx % 8 == 0 && "Invalid index");
9919 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9920 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9921 "Unexpected mask vector lowering");
9922 OrigIdx /= 8;
9923 SubVecVT =
9924 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9925 SubVecVT.isScalableVector());
9926 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9927 VecVT.isScalableVector());
9928 Vec = DAG.getBitcast(VecVT, Vec);
9929 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9930 } else {
9931 // We can't slide this mask vector up indexed by its i1 elements.
9932 // This poses a problem when we wish to insert a scalable vector which
9933 // can't be re-expressed as a larger type. Just choose the slow path and
9934 // extend to a larger type, then truncate back down.
9935 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9936 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9937 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9938 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9939 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9940 Op.getOperand(2));
9941 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9942 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9943 }
9944 }
9945
9946 // If the subvector vector is a fixed-length type and we don't know VLEN
9947 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9948 // don't know which register of a LMUL group contains the specific subvector
9949 // as we only know the minimum register size. Therefore we must slide the
9950 // vector group up the full amount.
9951 const auto VLen = Subtarget.getRealVLen();
9952 if (SubVecVT.isFixedLengthVector() && !VLen) {
9953 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9954 return Op;
9955 MVT ContainerVT = VecVT;
9956 if (VecVT.isFixedLengthVector()) {
9957 ContainerVT = getContainerForFixedLengthVector(VecVT);
9958 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9959 }
9960
9961 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9962 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9963 DAG.getUNDEF(ContainerVT), SubVec,
9964 DAG.getVectorIdxConstant(0, DL));
9965 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9966 return DAG.getBitcast(Op.getValueType(), SubVec);
9967 }
9968
9969 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9970 DAG.getUNDEF(ContainerVT), SubVec,
9971 DAG.getVectorIdxConstant(0, DL));
9972 SDValue Mask =
9973 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9974 // Set the vector length to only the number of elements we care about. Note
9975 // that for slideup this includes the offset.
9976 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9977 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9978
9979 // Use tail agnostic policy if we're inserting over Vec's tail.
9981 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9982 Policy = RISCVII::TAIL_AGNOSTIC;
9983
9984 // If we're inserting into the lowest elements, use a tail undisturbed
9985 // vmv.v.v.
9986 if (OrigIdx == 0) {
9987 SubVec =
9988 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9989 } else {
9990 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9991 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9992 SlideupAmt, Mask, VL, Policy);
9993 }
9994
9995 if (VecVT.isFixedLengthVector())
9996 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9997 return DAG.getBitcast(Op.getValueType(), SubVec);
9998 }
9999
10000 MVT ContainerVecVT = VecVT;
10001 if (VecVT.isFixedLengthVector()) {
10002 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10003 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10004 }
10005
10006 MVT ContainerSubVecVT = SubVecVT;
10007 if (SubVecVT.isFixedLengthVector()) {
10008 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10009 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10010 }
10011
10012 unsigned SubRegIdx;
10013 ElementCount RemIdx;
10014 // insert_subvector scales the index by vscale if the subvector is scalable,
10015 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10016 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10017 if (SubVecVT.isFixedLengthVector()) {
10018 assert(VLen);
10019 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10020 auto Decompose =
10022 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10023 SubRegIdx = Decompose.first;
10024 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10025 (OrigIdx % Vscale));
10026 } else {
10027 auto Decompose =
10029 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10030 SubRegIdx = Decompose.first;
10031 RemIdx = ElementCount::getScalable(Decompose.second);
10032 }
10033
10036 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10037 bool ExactlyVecRegSized =
10038 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10039 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10040
10041 // 1. If the Idx has been completely eliminated and this subvector's size is
10042 // a vector register or a multiple thereof, or the surrounding elements are
10043 // undef, then this is a subvector insert which naturally aligns to a vector
10044 // register. These can easily be handled using subregister manipulation.
10045 // 2. If the subvector isn't an exact multiple of a valid register group size,
10046 // then the insertion must preserve the undisturbed elements of the register.
10047 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10048 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10049 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10050 // of that LMUL=1 type back into the larger vector (resolving to another
10051 // subregister operation). See below for how our VSLIDEUP works. We go via a
10052 // LMUL=1 type to avoid allocating a large register group to hold our
10053 // subvector.
10054 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10055 if (SubVecVT.isFixedLengthVector()) {
10056 // We may get NoSubRegister if inserting at index 0 and the subvec
10057 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10058 if (SubRegIdx == RISCV::NoSubRegister) {
10059 assert(OrigIdx == 0);
10060 return Op;
10061 }
10062
10063 SDValue Insert =
10064 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
10065 if (VecVT.isFixedLengthVector())
10066 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10067 return Insert;
10068 }
10069 return Op;
10070 }
10071
10072 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10073 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10074 // (in our case undisturbed). This means we can set up a subvector insertion
10075 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10076 // size of the subvector.
10077 MVT InterSubVT = ContainerVecVT;
10078 SDValue AlignedExtract = Vec;
10079 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10080 if (SubVecVT.isFixedLengthVector())
10081 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10082 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10083 InterSubVT = getLMUL1VT(ContainerVecVT);
10084 // Extract a subvector equal to the nearest full vector register type. This
10085 // should resolve to a EXTRACT_SUBREG instruction.
10086 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10087 DAG.getVectorIdxConstant(AlignedIdx, DL));
10088 }
10089
10090 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10091 DAG.getUNDEF(InterSubVT), SubVec,
10092 DAG.getVectorIdxConstant(0, DL));
10093
10094 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10095
10096 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10097 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10098
10099 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10101 if (Subtarget.expandVScale(EndIndex) ==
10102 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10103 Policy = RISCVII::TAIL_AGNOSTIC;
10104
10105 // If we're inserting into the lowest elements, use a tail undisturbed
10106 // vmv.v.v.
10107 if (RemIdx.isZero()) {
10108 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10109 SubVec, VL);
10110 } else {
10111 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10112
10113 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10114 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10115
10116 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10117 SlideupAmt, Mask, VL, Policy);
10118 }
10119
10120 // If required, insert this subvector back into the correct vector register.
10121 // This should resolve to an INSERT_SUBREG instruction.
10122 if (ContainerVecVT.bitsGT(InterSubVT))
10123 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10124 DAG.getVectorIdxConstant(AlignedIdx, DL));
10125
10126 if (VecVT.isFixedLengthVector())
10127 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10128
10129 // We might have bitcast from a mask type: cast back to the original type if
10130 // required.
10131 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10132}
10133
10134SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10135 SelectionDAG &DAG) const {
10136 SDValue Vec = Op.getOperand(0);
10137 MVT SubVecVT = Op.getSimpleValueType();
10138 MVT VecVT = Vec.getSimpleValueType();
10139
10140 SDLoc DL(Op);
10141 MVT XLenVT = Subtarget.getXLenVT();
10142 unsigned OrigIdx = Op.getConstantOperandVal(1);
10143 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10144
10145 // We don't have the ability to slide mask vectors down indexed by their i1
10146 // elements; the smallest we can do is i8. Often we are able to bitcast to
10147 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10148 // from a scalable one, we might not necessarily have enough scalable
10149 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10150 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
10151 if (VecVT.getVectorMinNumElements() >= 8 &&
10152 SubVecVT.getVectorMinNumElements() >= 8) {
10153 assert(OrigIdx % 8 == 0 && "Invalid index");
10154 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10155 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10156 "Unexpected mask vector lowering");
10157 OrigIdx /= 8;
10158 SubVecVT =
10159 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10160 SubVecVT.isScalableVector());
10161 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10162 VecVT.isScalableVector());
10163 Vec = DAG.getBitcast(VecVT, Vec);
10164 } else {
10165 // We can't slide this mask vector down, indexed by its i1 elements.
10166 // This poses a problem when we wish to extract a scalable vector which
10167 // can't be re-expressed as a larger type. Just choose the slow path and
10168 // extend to a larger type, then truncate back down.
10169 // TODO: We could probably improve this when extracting certain fixed
10170 // from fixed, where we can extract as i8 and shift the correct element
10171 // right to reach the desired subvector?
10172 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10173 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10174 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10175 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10176 Op.getOperand(1));
10177 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10178 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10179 }
10180 }
10181
10182 // With an index of 0 this is a cast-like subvector, which can be performed
10183 // with subregister operations.
10184 if (OrigIdx == 0)
10185 return Op;
10186
10187 const auto VLen = Subtarget.getRealVLen();
10188
10189 // If the subvector vector is a fixed-length type and we don't know VLEN
10190 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10191 // don't know which register of a LMUL group contains the specific subvector
10192 // as we only know the minimum register size. Therefore we must slide the
10193 // vector group down the full amount.
10194 if (SubVecVT.isFixedLengthVector() && !VLen) {
10195 MVT ContainerVT = VecVT;
10196 if (VecVT.isFixedLengthVector()) {
10197 ContainerVT = getContainerForFixedLengthVector(VecVT);
10198 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10199 }
10200
10201 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10202 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10203 if (auto ShrunkVT =
10204 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10205 ContainerVT = *ShrunkVT;
10206 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10207 DAG.getVectorIdxConstant(0, DL));
10208 }
10209
10210 SDValue Mask =
10211 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10212 // Set the vector length to only the number of elements we care about. This
10213 // avoids sliding down elements we're going to discard straight away.
10214 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
10215 Subtarget);
10216 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10217 SDValue Slidedown =
10218 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10219 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10220 // Now we can use a cast-like subvector extract to get the result.
10221 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10222 DAG.getVectorIdxConstant(0, DL));
10223 return DAG.getBitcast(Op.getValueType(), Slidedown);
10224 }
10225
10226 if (VecVT.isFixedLengthVector()) {
10227 VecVT = getContainerForFixedLengthVector(VecVT);
10228 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10229 }
10230
10231 MVT ContainerSubVecVT = SubVecVT;
10232 if (SubVecVT.isFixedLengthVector())
10233 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10234
10235 unsigned SubRegIdx;
10236 ElementCount RemIdx;
10237 // extract_subvector scales the index by vscale if the subvector is scalable,
10238 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10239 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10240 if (SubVecVT.isFixedLengthVector()) {
10241 assert(VLen);
10242 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10243 auto Decompose =
10245 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10246 SubRegIdx = Decompose.first;
10247 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10248 (OrigIdx % Vscale));
10249 } else {
10250 auto Decompose =
10252 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10253 SubRegIdx = Decompose.first;
10254 RemIdx = ElementCount::getScalable(Decompose.second);
10255 }
10256
10257 // If the Idx has been completely eliminated then this is a subvector extract
10258 // which naturally aligns to a vector register. These can easily be handled
10259 // using subregister manipulation.
10260 if (RemIdx.isZero()) {
10261 if (SubVecVT.isFixedLengthVector()) {
10262 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10263 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10264 }
10265 return Op;
10266 }
10267
10268 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10269 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10270 // divide exactly.
10271 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10272 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10273
10274 // If the vector type is an LMUL-group type, extract a subvector equal to the
10275 // nearest full vector register type.
10276 MVT InterSubVT = VecVT;
10277 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10278 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10279 // we should have successfully decomposed the extract into a subregister.
10280 assert(SubRegIdx != RISCV::NoSubRegister);
10281 InterSubVT = getLMUL1VT(VecVT);
10282 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10283 }
10284
10285 // Slide this vector register down by the desired number of elements in order
10286 // to place the desired subvector starting at element 0.
10287 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10288 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10289 if (SubVecVT.isFixedLengthVector())
10290 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10291 Subtarget);
10292 SDValue Slidedown =
10293 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10294 Vec, SlidedownAmt, Mask, VL);
10295
10296 // Now the vector is in the right position, extract our final subvector. This
10297 // should resolve to a COPY.
10298 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10299 DAG.getVectorIdxConstant(0, DL));
10300
10301 // We might have bitcast from a mask type: cast back to the original type if
10302 // required.
10303 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10304}
10305
10306// Widen a vector's operands to i8, then truncate its results back to the
10307// original type, typically i1. All operand and result types must be the same.
10309 SelectionDAG &DAG) {
10310 MVT VT = N.getSimpleValueType();
10311 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10313 for (SDValue Op : N->ops()) {
10314 assert(Op.getSimpleValueType() == VT &&
10315 "Operands and result must be same type");
10316 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10317 }
10318
10319 unsigned NumVals = N->getNumValues();
10320
10322 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10323 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10324 SmallVector<SDValue, 4> TruncVals;
10325 for (unsigned I = 0; I < NumVals; I++) {
10326 TruncVals.push_back(
10327 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10328 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10329 }
10330
10331 if (TruncVals.size() > 1)
10332 return DAG.getMergeValues(TruncVals, DL);
10333 return TruncVals.front();
10334}
10335
10336SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10337 SelectionDAG &DAG) const {
10338 SDLoc DL(Op);
10339 MVT VecVT = Op.getSimpleValueType();
10340
10341 assert(VecVT.isScalableVector() &&
10342 "vector_interleave on non-scalable vector!");
10343
10344 // 1 bit element vectors need to be widened to e8
10345 if (VecVT.getVectorElementType() == MVT::i1)
10346 return widenVectorOpsToi8(Op, DL, DAG);
10347
10348 // If the VT is LMUL=8, we need to split and reassemble.
10349 if (VecVT.getSizeInBits().getKnownMinValue() ==
10350 (8 * RISCV::RVVBitsPerBlock)) {
10351 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10352 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10353 EVT SplitVT = Op0Lo.getValueType();
10354
10356 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10358 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10359
10360 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10361 ResLo.getValue(0), ResHi.getValue(0));
10362 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10363 ResHi.getValue(1));
10364 return DAG.getMergeValues({Even, Odd}, DL);
10365 }
10366
10367 // Concatenate the two vectors as one vector to deinterleave
10368 MVT ConcatVT =
10371 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10372 Op.getOperand(0), Op.getOperand(1));
10373
10374 // We want to operate on all lanes, so get the mask and VL and mask for it
10375 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10376 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10377
10378 // We can deinterleave through vnsrl.wi if the element type is smaller than
10379 // ELEN
10380 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10381 SDValue Even =
10382 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10383 SDValue Odd =
10384 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10385 return DAG.getMergeValues({Even, Odd}, DL);
10386 }
10387
10388 // For the indices, use the same SEW to avoid an extra vsetvli
10389 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10390 // Create a vector of even indices {0, 2, 4, ...}
10391 SDValue EvenIdx =
10392 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10393 // Create a vector of odd indices {1, 3, 5, ... }
10394 SDValue OddIdx =
10395 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10396
10397 // Gather the even and odd elements into two separate vectors
10398 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10399 Concat, EvenIdx, Passthru, Mask, VL);
10400 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10401 Concat, OddIdx, Passthru, Mask, VL);
10402
10403 // Extract the result half of the gather for even and odd
10404 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10405 DAG.getVectorIdxConstant(0, DL));
10406 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10407 DAG.getVectorIdxConstant(0, DL));
10408
10409 return DAG.getMergeValues({Even, Odd}, DL);
10410}
10411
10412SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10413 SelectionDAG &DAG) const {
10414 SDLoc DL(Op);
10415 MVT VecVT = Op.getSimpleValueType();
10416
10417 assert(VecVT.isScalableVector() &&
10418 "vector_interleave on non-scalable vector!");
10419
10420 // i1 vectors need to be widened to i8
10421 if (VecVT.getVectorElementType() == MVT::i1)
10422 return widenVectorOpsToi8(Op, DL, DAG);
10423
10424 MVT XLenVT = Subtarget.getXLenVT();
10425 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10426
10427 // If the VT is LMUL=8, we need to split and reassemble.
10428 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10429 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10430 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10431 EVT SplitVT = Op0Lo.getValueType();
10432
10434 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10436 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10437
10438 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10439 ResLo.getValue(0), ResLo.getValue(1));
10440 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10441 ResHi.getValue(0), ResHi.getValue(1));
10442 return DAG.getMergeValues({Lo, Hi}, DL);
10443 }
10444
10445 SDValue Interleaved;
10446
10447 // If the element type is smaller than ELEN, then we can interleave with
10448 // vwaddu.vv and vwmaccu.vx
10449 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10450 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10451 DAG, Subtarget);
10452 } else {
10453 // Otherwise, fallback to using vrgathere16.vv
10454 MVT ConcatVT =
10457 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10458 Op.getOperand(0), Op.getOperand(1));
10459
10460 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10461
10462 // 0 1 2 3 4 5 6 7 ...
10463 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10464
10465 // 1 1 1 1 1 1 1 1 ...
10466 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10467
10468 // 1 0 1 0 1 0 1 0 ...
10469 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10470 OddMask = DAG.getSetCC(
10471 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10472 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10474
10475 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10476
10477 // Build up the index vector for interleaving the concatenated vector
10478 // 0 0 1 1 2 2 3 3 ...
10479 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10480 // 0 n 1 n+1 2 n+2 3 n+3 ...
10481 Idx =
10482 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10483
10484 // Then perform the interleave
10485 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10486 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10487 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10488 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10489 }
10490
10491 // Extract the two halves from the interleaved result
10492 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10493 DAG.getVectorIdxConstant(0, DL));
10494 SDValue Hi = DAG.getNode(
10495 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10497
10498 return DAG.getMergeValues({Lo, Hi}, DL);
10499}
10500
10501// Lower step_vector to the vid instruction. Any non-identity step value must
10502// be accounted for my manual expansion.
10503SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10504 SelectionDAG &DAG) const {
10505 SDLoc DL(Op);
10506 MVT VT = Op.getSimpleValueType();
10507 assert(VT.isScalableVector() && "Expected scalable vector");
10508 MVT XLenVT = Subtarget.getXLenVT();
10509 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10510 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10511 uint64_t StepValImm = Op.getConstantOperandVal(0);
10512 if (StepValImm != 1) {
10513 if (isPowerOf2_64(StepValImm)) {
10514 SDValue StepVal =
10515 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10516 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10517 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10518 } else {
10519 SDValue StepVal = lowerScalarSplat(
10520 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10521 VL, VT, DL, DAG, Subtarget);
10522 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10523 }
10524 }
10525 return StepVec;
10526}
10527
10528// Implement vector_reverse using vrgather.vv with indices determined by
10529// subtracting the id of each element from (VLMAX-1). This will convert
10530// the indices like so:
10531// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10532// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10533SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10534 SelectionDAG &DAG) const {
10535 SDLoc DL(Op);
10536 MVT VecVT = Op.getSimpleValueType();
10537 if (VecVT.getVectorElementType() == MVT::i1) {
10538 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10539 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10540 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10541 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10542 }
10543 unsigned EltSize = VecVT.getScalarSizeInBits();
10544 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10545 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10546 unsigned MaxVLMAX =
10547 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10548
10549 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10550 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10551
10552 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10553 // to use vrgatherei16.vv.
10554 // TODO: It's also possible to use vrgatherei16.vv for other types to
10555 // decrease register width for the index calculation.
10556 if (MaxVLMAX > 256 && EltSize == 8) {
10557 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10558 // Reverse each half, then reassemble them in reverse order.
10559 // NOTE: It's also possible that after splitting that VLMAX no longer
10560 // requires vrgatherei16.vv.
10561 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10562 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10563 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10564 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10565 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10566 // Reassemble the low and high pieces reversed.
10567 // FIXME: This is a CONCAT_VECTORS.
10568 SDValue Res =
10569 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10570 DAG.getVectorIdxConstant(0, DL));
10571 return DAG.getNode(
10572 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10573 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10574 }
10575
10576 // Just promote the int type to i16 which will double the LMUL.
10577 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10578 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10579 }
10580
10581 MVT XLenVT = Subtarget.getXLenVT();
10582 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10583
10584 // Calculate VLMAX-1 for the desired SEW.
10585 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10586 computeVLMax(VecVT, DL, DAG),
10587 DAG.getConstant(1, DL, XLenVT));
10588
10589 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10590 bool IsRV32E64 =
10591 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10592 SDValue SplatVL;
10593 if (!IsRV32E64)
10594 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10595 else
10596 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10597 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10598
10599 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10600 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10601 DAG.getUNDEF(IntVT), Mask, VL);
10602
10603 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10604 DAG.getUNDEF(VecVT), Mask, VL);
10605}
10606
10607SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10608 SelectionDAG &DAG) const {
10609 SDLoc DL(Op);
10610 SDValue V1 = Op.getOperand(0);
10611 SDValue V2 = Op.getOperand(1);
10612 MVT XLenVT = Subtarget.getXLenVT();
10613 MVT VecVT = Op.getSimpleValueType();
10614
10615 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10616
10617 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10618 SDValue DownOffset, UpOffset;
10619 if (ImmValue >= 0) {
10620 // The operand is a TargetConstant, we need to rebuild it as a regular
10621 // constant.
10622 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10623 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10624 } else {
10625 // The operand is a TargetConstant, we need to rebuild it as a regular
10626 // constant rather than negating the original operand.
10627 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10628 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10629 }
10630
10631 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10632
10633 SDValue SlideDown =
10634 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10635 DownOffset, TrueMask, UpOffset);
10636 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10637 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10639}
10640
10641SDValue
10642RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10643 SelectionDAG &DAG) const {
10644 SDLoc DL(Op);
10645 auto *Load = cast<LoadSDNode>(Op);
10646
10648 Load->getMemoryVT(),
10649 *Load->getMemOperand()) &&
10650 "Expecting a correctly-aligned load");
10651
10652 MVT VT = Op.getSimpleValueType();
10653 MVT XLenVT = Subtarget.getXLenVT();
10654 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10655
10656 // If we know the exact VLEN and our fixed length vector completely fills
10657 // the container, use a whole register load instead.
10658 const auto [MinVLMAX, MaxVLMAX] =
10659 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10660 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10661 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10662 MachineMemOperand *MMO = Load->getMemOperand();
10663 SDValue NewLoad =
10664 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10665 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10666 MMO->getAAInfo(), MMO->getRanges());
10667 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10668 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10669 }
10670
10671 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10672
10673 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10674 SDValue IntID = DAG.getTargetConstant(
10675 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10676 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10677 if (!IsMaskOp)
10678 Ops.push_back(DAG.getUNDEF(ContainerVT));
10679 Ops.push_back(Load->getBasePtr());
10680 Ops.push_back(VL);
10681 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10682 SDValue NewLoad =
10684 Load->getMemoryVT(), Load->getMemOperand());
10685
10686 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10687 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10688}
10689
10690SDValue
10691RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10692 SelectionDAG &DAG) const {
10693 SDLoc DL(Op);
10694 auto *Store = cast<StoreSDNode>(Op);
10695
10697 Store->getMemoryVT(),
10698 *Store->getMemOperand()) &&
10699 "Expecting a correctly-aligned store");
10700
10701 SDValue StoreVal = Store->getValue();
10702 MVT VT = StoreVal.getSimpleValueType();
10703 MVT XLenVT = Subtarget.getXLenVT();
10704
10705 // If the size less than a byte, we need to pad with zeros to make a byte.
10706 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10707 VT = MVT::v8i1;
10708 StoreVal =
10709 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10710 StoreVal, DAG.getVectorIdxConstant(0, DL));
10711 }
10712
10713 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10714
10715 SDValue NewValue =
10716 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10717
10718
10719 // If we know the exact VLEN and our fixed length vector completely fills
10720 // the container, use a whole register store instead.
10721 const auto [MinVLMAX, MaxVLMAX] =
10722 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10723 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10724 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10725 MachineMemOperand *MMO = Store->getMemOperand();
10726 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10727 MMO->getPointerInfo(), MMO->getBaseAlign(),
10728 MMO->getFlags(), MMO->getAAInfo());
10729 }
10730
10731 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10732 Subtarget);
10733
10734 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10735 SDValue IntID = DAG.getTargetConstant(
10736 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10737 return DAG.getMemIntrinsicNode(
10738 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10739 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10740 Store->getMemoryVT(), Store->getMemOperand());
10741}
10742
10743SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10744 SelectionDAG &DAG) const {
10745 SDLoc DL(Op);
10746 MVT VT = Op.getSimpleValueType();
10747
10748 const auto *MemSD = cast<MemSDNode>(Op);
10749 EVT MemVT = MemSD->getMemoryVT();
10750 MachineMemOperand *MMO = MemSD->getMemOperand();
10751 SDValue Chain = MemSD->getChain();
10752 SDValue BasePtr = MemSD->getBasePtr();
10753
10754 SDValue Mask, PassThru, VL;
10755 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10756 Mask = VPLoad->getMask();
10757 PassThru = DAG.getUNDEF(VT);
10758 VL = VPLoad->getVectorLength();
10759 } else {
10760 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10761 Mask = MLoad->getMask();
10762 PassThru = MLoad->getPassThru();
10763 }
10764
10765 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10766
10767 MVT XLenVT = Subtarget.getXLenVT();
10768
10769 MVT ContainerVT = VT;
10770 if (VT.isFixedLengthVector()) {
10771 ContainerVT = getContainerForFixedLengthVector(VT);
10772 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10773 if (!IsUnmasked) {
10774 MVT MaskVT = getMaskTypeFor(ContainerVT);
10775 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10776 }
10777 }
10778
10779 if (!VL)
10780 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10781
10782 unsigned IntID =
10783 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10784 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10785 if (IsUnmasked)
10786 Ops.push_back(DAG.getUNDEF(ContainerVT));
10787 else
10788 Ops.push_back(PassThru);
10789 Ops.push_back(BasePtr);
10790 if (!IsUnmasked)
10791 Ops.push_back(Mask);
10792 Ops.push_back(VL);
10793 if (!IsUnmasked)
10795
10796 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10797
10798 SDValue Result =
10799 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10800 Chain = Result.getValue(1);
10801
10802 if (VT.isFixedLengthVector())
10803 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10804
10805 return DAG.getMergeValues({Result, Chain}, DL);
10806}
10807
10808SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10809 SelectionDAG &DAG) const {
10810 SDLoc DL(Op);
10811
10812 const auto *MemSD = cast<MemSDNode>(Op);
10813 EVT MemVT = MemSD->getMemoryVT();
10814 MachineMemOperand *MMO = MemSD->getMemOperand();
10815 SDValue Chain = MemSD->getChain();
10816 SDValue BasePtr = MemSD->getBasePtr();
10817 SDValue Val, Mask, VL;
10818
10819 bool IsCompressingStore = false;
10820 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10821 Val = VPStore->getValue();
10822 Mask = VPStore->getMask();
10823 VL = VPStore->getVectorLength();
10824 } else {
10825 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10826 Val = MStore->getValue();
10827 Mask = MStore->getMask();
10828 IsCompressingStore = MStore->isCompressingStore();
10829 }
10830
10831 bool IsUnmasked =
10832 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10833
10834 MVT VT = Val.getSimpleValueType();
10835 MVT XLenVT = Subtarget.getXLenVT();
10836
10837 MVT ContainerVT = VT;
10838 if (VT.isFixedLengthVector()) {
10839 ContainerVT = getContainerForFixedLengthVector(VT);
10840
10841 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10842 if (!IsUnmasked || IsCompressingStore) {
10843 MVT MaskVT = getMaskTypeFor(ContainerVT);
10844 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10845 }
10846 }
10847
10848 if (!VL)
10849 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10850
10851 if (IsCompressingStore) {
10852 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10853 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10854 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10855 VL =
10856 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10857 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10858 }
10859
10860 unsigned IntID =
10861 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10862 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10863 Ops.push_back(Val);
10864 Ops.push_back(BasePtr);
10865 if (!IsUnmasked)
10866 Ops.push_back(Mask);
10867 Ops.push_back(VL);
10868
10870 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10871}
10872
10873SDValue
10874RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10875 SelectionDAG &DAG) const {
10876 MVT InVT = Op.getOperand(0).getSimpleValueType();
10877 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10878
10879 MVT VT = Op.getSimpleValueType();
10880
10881 SDValue Op1 =
10882 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10883 SDValue Op2 =
10884 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10885
10886 SDLoc DL(Op);
10887 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10888 DAG, Subtarget);
10889 MVT MaskVT = getMaskTypeFor(ContainerVT);
10890
10891 SDValue Cmp =
10892 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10893 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10894
10895 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10896}
10897
10898SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10899 SelectionDAG &DAG) const {
10900 unsigned Opc = Op.getOpcode();
10901 SDLoc DL(Op);
10902 SDValue Chain = Op.getOperand(0);
10903 SDValue Op1 = Op.getOperand(1);
10904 SDValue Op2 = Op.getOperand(2);
10905 SDValue CC = Op.getOperand(3);
10906 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10907 MVT VT = Op.getSimpleValueType();
10908 MVT InVT = Op1.getSimpleValueType();
10909
10910 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10911 // condition code.
10912 if (Opc == ISD::STRICT_FSETCCS) {
10913 // Expand strict_fsetccs(x, oeq) to
10914 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10915 SDVTList VTList = Op->getVTList();
10916 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10917 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10918 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10919 Op2, OLECCVal);
10920 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10921 Op1, OLECCVal);
10922 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10923 Tmp1.getValue(1), Tmp2.getValue(1));
10924 // Tmp1 and Tmp2 might be the same node.
10925 if (Tmp1 != Tmp2)
10926 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10927 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10928 }
10929
10930 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10931 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10932 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10933 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10934 Op2, OEQCCVal);
10935 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10936 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10937 }
10938 }
10939
10940 MVT ContainerInVT = InVT;
10941 if (InVT.isFixedLengthVector()) {
10942 ContainerInVT = getContainerForFixedLengthVector(InVT);
10943 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10944 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10945 }
10946 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10947
10948 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10949
10950 SDValue Res;
10951 if (Opc == ISD::STRICT_FSETCC &&
10952 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10953 CCVal == ISD::SETOLE)) {
10954 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10955 // active when both input elements are ordered.
10956 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10957 SDValue OrderMask1 = DAG.getNode(
10958 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10959 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10960 True, VL});
10961 SDValue OrderMask2 = DAG.getNode(
10962 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10963 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10964 True, VL});
10965 Mask =
10966 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10967 // Use Mask as the merge operand to let the result be 0 if either of the
10968 // inputs is unordered.
10970 DAG.getVTList(MaskVT, MVT::Other),
10971 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10972 } else {
10973 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10975 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10976 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10977 }
10978
10979 if (VT.isFixedLengthVector()) {
10980 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10981 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10982 }
10983 return Res;
10984}
10985
10986// Lower vector ABS to smax(X, sub(0, X)).
10987SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10988 SDLoc DL(Op);
10989 MVT VT = Op.getSimpleValueType();
10990 SDValue X = Op.getOperand(0);
10991
10992 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10993 "Unexpected type for ISD::ABS");
10994
10995 MVT ContainerVT = VT;
10996 if (VT.isFixedLengthVector()) {
10997 ContainerVT = getContainerForFixedLengthVector(VT);
10998 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10999 }
11000
11001 SDValue Mask, VL;
11002 if (Op->getOpcode() == ISD::VP_ABS) {
11003 Mask = Op->getOperand(1);
11004 if (VT.isFixedLengthVector())
11005 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11006 Subtarget);
11007 VL = Op->getOperand(2);
11008 } else
11009 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11010
11011 SDValue SplatZero = DAG.getNode(
11012 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11013 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11014 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11015 DAG.getUNDEF(ContainerVT), Mask, VL);
11016 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11017 DAG.getUNDEF(ContainerVT), Mask, VL);
11018
11019 if (VT.isFixedLengthVector())
11020 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11021 return Max;
11022}
11023
11024SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11025 SDValue Op, SelectionDAG &DAG) const {
11026 SDLoc DL(Op);
11027 MVT VT = Op.getSimpleValueType();
11028 SDValue Mag = Op.getOperand(0);
11029 SDValue Sign = Op.getOperand(1);
11030 assert(Mag.getValueType() == Sign.getValueType() &&
11031 "Can only handle COPYSIGN with matching types.");
11032
11033 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11034 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11035 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11036
11037 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11038
11039 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11040 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11041
11042 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11043}
11044
11045SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11046 SDValue Op, SelectionDAG &DAG) const {
11047 MVT VT = Op.getSimpleValueType();
11048 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11049
11050 MVT I1ContainerVT =
11051 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11052
11053 SDValue CC =
11054 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11055 SDValue Op1 =
11056 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11057 SDValue Op2 =
11058 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11059
11060 SDLoc DL(Op);
11061 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11062
11063 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11064 Op2, DAG.getUNDEF(ContainerVT), VL);
11065
11066 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11067}
11068
11069SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11070 SelectionDAG &DAG) const {
11071 unsigned NewOpc = getRISCVVLOp(Op);
11072 bool HasMergeOp = hasMergeOp(NewOpc);
11073 bool HasMask = hasMaskOp(NewOpc);
11074
11075 MVT VT = Op.getSimpleValueType();
11076 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11077
11078 // Create list of operands by converting existing ones to scalable types.
11080 for (const SDValue &V : Op->op_values()) {
11081 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11082
11083 // Pass through non-vector operands.
11084 if (!V.getValueType().isVector()) {
11085 Ops.push_back(V);
11086 continue;
11087 }
11088
11089 // "cast" fixed length vector to a scalable vector.
11090 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11091 "Only fixed length vectors are supported!");
11092 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11093 }
11094
11095 SDLoc DL(Op);
11096 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11097 if (HasMergeOp)
11098 Ops.push_back(DAG.getUNDEF(ContainerVT));
11099 if (HasMask)
11100 Ops.push_back(Mask);
11101 Ops.push_back(VL);
11102
11103 // StrictFP operations have two result values. Their lowered result should
11104 // have same result count.
11105 if (Op->isStrictFPOpcode()) {
11106 SDValue ScalableRes =
11107 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11108 Op->getFlags());
11109 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11110 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11111 }
11112
11113 SDValue ScalableRes =
11114 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11115 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11116}
11117
11118// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11119// * Operands of each node are assumed to be in the same order.
11120// * The EVL operand is promoted from i32 to i64 on RV64.
11121// * Fixed-length vectors are converted to their scalable-vector container
11122// types.
11123SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11124 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11125 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
11126
11127 SDLoc DL(Op);
11128 MVT VT = Op.getSimpleValueType();
11130
11131 MVT ContainerVT = VT;
11132 if (VT.isFixedLengthVector())
11133 ContainerVT = getContainerForFixedLengthVector(VT);
11134
11135 for (const auto &OpIdx : enumerate(Op->ops())) {
11136 SDValue V = OpIdx.value();
11137 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11138 // Add dummy merge value before the mask. Or if there isn't a mask, before
11139 // EVL.
11140 if (HasMergeOp) {
11141 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11142 if (MaskIdx) {
11143 if (*MaskIdx == OpIdx.index())
11144 Ops.push_back(DAG.getUNDEF(ContainerVT));
11145 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11146 OpIdx.index()) {
11147 if (Op.getOpcode() == ISD::VP_MERGE) {
11148 // For VP_MERGE, copy the false operand instead of an undef value.
11149 Ops.push_back(Ops.back());
11150 } else {
11151 assert(Op.getOpcode() == ISD::VP_SELECT);
11152 // For VP_SELECT, add an undef value.
11153 Ops.push_back(DAG.getUNDEF(ContainerVT));
11154 }
11155 }
11156 }
11157 // Pass through operands which aren't fixed-length vectors.
11158 if (!V.getValueType().isFixedLengthVector()) {
11159 Ops.push_back(V);
11160 continue;
11161 }
11162 // "cast" fixed length vector to a scalable vector.
11163 MVT OpVT = V.getSimpleValueType();
11164 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11165 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11166 "Only fixed length vectors are supported!");
11167 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11168 }
11169
11170 if (!VT.isFixedLengthVector())
11171 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11172
11173 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11174
11175 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11176}
11177
11178SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11179 SelectionDAG &DAG) const {
11180 SDLoc DL(Op);
11181 MVT VT = Op.getSimpleValueType();
11182
11183 SDValue Src = Op.getOperand(0);
11184 // NOTE: Mask is dropped.
11185 SDValue VL = Op.getOperand(2);
11186
11187 MVT ContainerVT = VT;
11188 if (VT.isFixedLengthVector()) {
11189 ContainerVT = getContainerForFixedLengthVector(VT);
11190 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11191 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11192 }
11193
11194 MVT XLenVT = Subtarget.getXLenVT();
11195 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11196 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11197 DAG.getUNDEF(ContainerVT), Zero, VL);
11198
11199 SDValue SplatValue = DAG.getConstant(
11200 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11201 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11202 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11203
11204 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11205 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11206 if (!VT.isFixedLengthVector())
11207 return Result;
11208 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11209}
11210
11211SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11212 SelectionDAG &DAG) const {
11213 SDLoc DL(Op);
11214 MVT VT = Op.getSimpleValueType();
11215
11216 SDValue Op1 = Op.getOperand(0);
11217 SDValue Op2 = Op.getOperand(1);
11218 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11219 // NOTE: Mask is dropped.
11220 SDValue VL = Op.getOperand(4);
11221
11222 MVT ContainerVT = VT;
11223 if (VT.isFixedLengthVector()) {
11224 ContainerVT = getContainerForFixedLengthVector(VT);
11225 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11226 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11227 }
11228
11230 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11231
11232 switch (Condition) {
11233 default:
11234 break;
11235 // X != Y --> (X^Y)
11236 case ISD::SETNE:
11237 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11238 break;
11239 // X == Y --> ~(X^Y)
11240 case ISD::SETEQ: {
11241 SDValue Temp =
11242 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11243 Result =
11244 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11245 break;
11246 }
11247 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11248 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11249 case ISD::SETGT:
11250 case ISD::SETULT: {
11251 SDValue Temp =
11252 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11253 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11254 break;
11255 }
11256 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11257 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11258 case ISD::SETLT:
11259 case ISD::SETUGT: {
11260 SDValue Temp =
11261 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11262 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11263 break;
11264 }
11265 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11266 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11267 case ISD::SETGE:
11268 case ISD::SETULE: {
11269 SDValue Temp =
11270 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11271 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11272 break;
11273 }
11274 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11275 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11276 case ISD::SETLE:
11277 case ISD::SETUGE: {
11278 SDValue Temp =
11279 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11280 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11281 break;
11282 }
11283 }
11284
11285 if (!VT.isFixedLengthVector())
11286 return Result;
11287 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11288}
11289
11290// Lower Floating-Point/Integer Type-Convert VP SDNodes
11291SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11292 SelectionDAG &DAG) const {
11293 SDLoc DL(Op);
11294
11295 SDValue Src = Op.getOperand(0);
11296 SDValue Mask = Op.getOperand(1);
11297 SDValue VL = Op.getOperand(2);
11298 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11299
11300 MVT DstVT = Op.getSimpleValueType();
11301 MVT SrcVT = Src.getSimpleValueType();
11302 if (DstVT.isFixedLengthVector()) {
11303 DstVT = getContainerForFixedLengthVector(DstVT);
11304 SrcVT = getContainerForFixedLengthVector(SrcVT);
11305 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11306 MVT MaskVT = getMaskTypeFor(DstVT);
11307 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11308 }
11309
11310 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11311 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11312
11314 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11315 if (SrcVT.isInteger()) {
11316 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11317
11318 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11321
11322 // Do we need to do any pre-widening before converting?
11323 if (SrcEltSize == 1) {
11324 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11325 MVT XLenVT = Subtarget.getXLenVT();
11326 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11327 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11328 DAG.getUNDEF(IntVT), Zero, VL);
11329 SDValue One = DAG.getConstant(
11330 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11331 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11332 DAG.getUNDEF(IntVT), One, VL);
11333 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11334 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11335 } else if (DstEltSize > (2 * SrcEltSize)) {
11336 // Widen before converting.
11337 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11338 DstVT.getVectorElementCount());
11339 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11340 }
11341
11342 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11343 } else {
11344 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11345 "Wrong input/output vector types");
11346
11347 // Convert f16 to f32 then convert f32 to i64.
11348 if (DstEltSize > (2 * SrcEltSize)) {
11349 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11350 MVT InterimFVT =
11351 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11352 Src =
11353 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11354 }
11355
11356 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11357 }
11358 } else { // Narrowing + Conversion
11359 if (SrcVT.isInteger()) {
11360 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11361 // First do a narrowing convert to an FP type half the size, then round
11362 // the FP type to a small FP type if needed.
11363
11364 MVT InterimFVT = DstVT;
11365 if (SrcEltSize > (2 * DstEltSize)) {
11366 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11367 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11368 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11369 }
11370
11371 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11372
11373 if (InterimFVT != DstVT) {
11374 Src = Result;
11375 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11376 }
11377 } else {
11378 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11379 "Wrong input/output vector types");
11380 // First do a narrowing conversion to an integer half the size, then
11381 // truncate if needed.
11382
11383 if (DstEltSize == 1) {
11384 // First convert to the same size integer, then convert to mask using
11385 // setcc.
11386 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11387 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11388 DstVT.getVectorElementCount());
11389 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11390
11391 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11392 // otherwise the conversion was undefined.
11393 MVT XLenVT = Subtarget.getXLenVT();
11394 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11395 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11396 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11397 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11398 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11399 DAG.getUNDEF(DstVT), Mask, VL});
11400 } else {
11401 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11402 DstVT.getVectorElementCount());
11403
11404 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11405
11406 while (InterimIVT != DstVT) {
11407 SrcEltSize /= 2;
11408 Src = Result;
11409 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11410 DstVT.getVectorElementCount());
11411 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11412 Src, Mask, VL);
11413 }
11414 }
11415 }
11416 }
11417
11418 MVT VT = Op.getSimpleValueType();
11419 if (!VT.isFixedLengthVector())
11420 return Result;
11421 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11422}
11423
11424SDValue
11425RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11426 SelectionDAG &DAG) const {
11427 SDLoc DL(Op);
11428
11429 SDValue Op1 = Op.getOperand(0);
11430 SDValue Op2 = Op.getOperand(1);
11431 SDValue Offset = Op.getOperand(2);
11432 SDValue Mask = Op.getOperand(3);
11433 SDValue EVL1 = Op.getOperand(4);
11434 SDValue EVL2 = Op.getOperand(5);
11435
11436 const MVT XLenVT = Subtarget.getXLenVT();
11437 MVT VT = Op.getSimpleValueType();
11438 MVT ContainerVT = VT;
11439 if (VT.isFixedLengthVector()) {
11440 ContainerVT = getContainerForFixedLengthVector(VT);
11441 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11442 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11443 MVT MaskVT = getMaskTypeFor(ContainerVT);
11444 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11445 }
11446
11447 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11448 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11449
11450 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11451 if (IsMaskVector) {
11452 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11453
11454 // Expand input operands
11455 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11456 DAG.getUNDEF(ContainerVT),
11457 DAG.getConstant(1, DL, XLenVT), EVL1);
11458 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11459 DAG.getUNDEF(ContainerVT),
11460 DAG.getConstant(0, DL, XLenVT), EVL1);
11461 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11462 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11463
11464 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11465 DAG.getUNDEF(ContainerVT),
11466 DAG.getConstant(1, DL, XLenVT), EVL2);
11467 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11468 DAG.getUNDEF(ContainerVT),
11469 DAG.getConstant(0, DL, XLenVT), EVL2);
11470 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11471 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11472 }
11473
11474 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11475 SDValue DownOffset, UpOffset;
11476 if (ImmValue >= 0) {
11477 // The operand is a TargetConstant, we need to rebuild it as a regular
11478 // constant.
11479 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11480 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11481 } else {
11482 // The operand is a TargetConstant, we need to rebuild it as a regular
11483 // constant rather than negating the original operand.
11484 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11485 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11486 }
11487
11488 SDValue SlideDown =
11489 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11490 Op1, DownOffset, Mask, UpOffset);
11491 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11492 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11493
11494 if (IsMaskVector) {
11495 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11496 Result = DAG.getNode(
11497 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11498 {Result, DAG.getConstant(0, DL, ContainerVT),
11499 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11500 Mask, EVL2});
11501 }
11502
11503 if (!VT.isFixedLengthVector())
11504 return Result;
11505 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11506}
11507
11508SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
11509 SelectionDAG &DAG) const {
11510 SDLoc DL(Op);
11511 SDValue Val = Op.getOperand(0);
11512 SDValue Mask = Op.getOperand(1);
11513 SDValue VL = Op.getOperand(2);
11514 MVT VT = Op.getSimpleValueType();
11515
11516 MVT ContainerVT = VT;
11517 if (VT.isFixedLengthVector()) {
11518 ContainerVT = getContainerForFixedLengthVector(VT);
11519 MVT MaskVT = getMaskTypeFor(ContainerVT);
11520 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11521 }
11522
11523 SDValue Result =
11524 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
11525
11526 if (!VT.isFixedLengthVector())
11527 return Result;
11528 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11529}
11530
11531SDValue
11532RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11533 SelectionDAG &DAG) const {
11534 SDLoc DL(Op);
11535 MVT VT = Op.getSimpleValueType();
11536 MVT XLenVT = Subtarget.getXLenVT();
11537
11538 SDValue Op1 = Op.getOperand(0);
11539 SDValue Mask = Op.getOperand(1);
11540 SDValue EVL = Op.getOperand(2);
11541
11542 MVT ContainerVT = VT;
11543 if (VT.isFixedLengthVector()) {
11544 ContainerVT = getContainerForFixedLengthVector(VT);
11545 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11546 MVT MaskVT = getMaskTypeFor(ContainerVT);
11547 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11548 }
11549
11550 MVT GatherVT = ContainerVT;
11551 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11552 // Check if we are working with mask vectors
11553 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11554 if (IsMaskVector) {
11555 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11556
11557 // Expand input operand
11558 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11559 DAG.getUNDEF(IndicesVT),
11560 DAG.getConstant(1, DL, XLenVT), EVL);
11561 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11562 DAG.getUNDEF(IndicesVT),
11563 DAG.getConstant(0, DL, XLenVT), EVL);
11564 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11565 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11566 }
11567
11568 unsigned EltSize = GatherVT.getScalarSizeInBits();
11569 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11570 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11571 unsigned MaxVLMAX =
11572 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11573
11574 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11575 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11576 // to use vrgatherei16.vv.
11577 // TODO: It's also possible to use vrgatherei16.vv for other types to
11578 // decrease register width for the index calculation.
11579 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11580 if (MaxVLMAX > 256 && EltSize == 8) {
11581 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11582 // Split the vector in half and reverse each half using a full register
11583 // reverse.
11584 // Swap the halves and concatenate them.
11585 // Slide the concatenated result by (VLMax - VL).
11586 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11587 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11588 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11589
11590 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11591 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11592
11593 // Reassemble the low and high pieces reversed.
11594 // NOTE: this Result is unmasked (because we do not need masks for
11595 // shuffles). If in the future this has to change, we can use a SELECT_VL
11596 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11597 SDValue Result =
11598 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11599
11600 // Slide off any elements from past EVL that were reversed into the low
11601 // elements.
11602 unsigned MinElts = GatherVT.getVectorMinNumElements();
11603 SDValue VLMax =
11604 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11605 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11606
11607 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11608 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11609
11610 if (IsMaskVector) {
11611 // Truncate Result back to a mask vector
11612 Result =
11613 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11614 {Result, DAG.getConstant(0, DL, GatherVT),
11616 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11617 }
11618
11619 if (!VT.isFixedLengthVector())
11620 return Result;
11621 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11622 }
11623
11624 // Just promote the int type to i16 which will double the LMUL.
11625 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11626 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11627 }
11628
11629 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11630 SDValue VecLen =
11631 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11632 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11633 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11634 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11635 DAG.getUNDEF(IndicesVT), Mask, EVL);
11636 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11637 DAG.getUNDEF(GatherVT), Mask, EVL);
11638
11639 if (IsMaskVector) {
11640 // Truncate Result back to a mask vector
11641 Result = DAG.getNode(
11642 RISCVISD::SETCC_VL, DL, ContainerVT,
11643 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11644 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11645 }
11646
11647 if (!VT.isFixedLengthVector())
11648 return Result;
11649 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11650}
11651
11652SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11653 SelectionDAG &DAG) const {
11654 MVT VT = Op.getSimpleValueType();
11655 if (VT.getVectorElementType() != MVT::i1)
11656 return lowerVPOp(Op, DAG);
11657
11658 // It is safe to drop mask parameter as masked-off elements are undef.
11659 SDValue Op1 = Op->getOperand(0);
11660 SDValue Op2 = Op->getOperand(1);
11661 SDValue VL = Op->getOperand(3);
11662
11663 MVT ContainerVT = VT;
11664 const bool IsFixed = VT.isFixedLengthVector();
11665 if (IsFixed) {
11666 ContainerVT = getContainerForFixedLengthVector(VT);
11667 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11668 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11669 }
11670
11671 SDLoc DL(Op);
11672 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11673 if (!IsFixed)
11674 return Val;
11675 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11676}
11677
11678SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11679 SelectionDAG &DAG) const {
11680 SDLoc DL(Op);
11681 MVT XLenVT = Subtarget.getXLenVT();
11682 MVT VT = Op.getSimpleValueType();
11683 MVT ContainerVT = VT;
11684 if (VT.isFixedLengthVector())
11685 ContainerVT = getContainerForFixedLengthVector(VT);
11686
11687 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11688
11689 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11690 // Check if the mask is known to be all ones
11691 SDValue Mask = VPNode->getMask();
11692 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11693
11694 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11695 : Intrinsic::riscv_vlse_mask,
11696 DL, XLenVT);
11697 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11698 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11699 VPNode->getStride()};
11700 if (!IsUnmasked) {
11701 if (VT.isFixedLengthVector()) {
11702 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11703 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11704 }
11705 Ops.push_back(Mask);
11706 }
11707 Ops.push_back(VPNode->getVectorLength());
11708 if (!IsUnmasked) {
11709 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11710 Ops.push_back(Policy);
11711 }
11712
11713 SDValue Result =
11715 VPNode->getMemoryVT(), VPNode->getMemOperand());
11716 SDValue Chain = Result.getValue(1);
11717
11718 if (VT.isFixedLengthVector())
11719 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11720
11721 return DAG.getMergeValues({Result, Chain}, DL);
11722}
11723
11724SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11725 SelectionDAG &DAG) const {
11726 SDLoc DL(Op);
11727 MVT XLenVT = Subtarget.getXLenVT();
11728
11729 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11730 SDValue StoreVal = VPNode->getValue();
11731 MVT VT = StoreVal.getSimpleValueType();
11732 MVT ContainerVT = VT;
11733 if (VT.isFixedLengthVector()) {
11734 ContainerVT = getContainerForFixedLengthVector(VT);
11735 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11736 }
11737
11738 // Check if the mask is known to be all ones
11739 SDValue Mask = VPNode->getMask();
11740 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11741
11742 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11743 : Intrinsic::riscv_vsse_mask,
11744 DL, XLenVT);
11745 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11746 VPNode->getBasePtr(), VPNode->getStride()};
11747 if (!IsUnmasked) {
11748 if (VT.isFixedLengthVector()) {
11749 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11750 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11751 }
11752 Ops.push_back(Mask);
11753 }
11754 Ops.push_back(VPNode->getVectorLength());
11755
11756 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11757 Ops, VPNode->getMemoryVT(),
11758 VPNode->getMemOperand());
11759}
11760
11761// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11762// matched to a RVV indexed load. The RVV indexed load instructions only
11763// support the "unsigned unscaled" addressing mode; indices are implicitly
11764// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11765// signed or scaled indexing is extended to the XLEN value type and scaled
11766// accordingly.
11767SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11768 SelectionDAG &DAG) const {
11769 SDLoc DL(Op);
11770 MVT VT = Op.getSimpleValueType();
11771
11772 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11773 EVT MemVT = MemSD->getMemoryVT();
11774 MachineMemOperand *MMO = MemSD->getMemOperand();
11775 SDValue Chain = MemSD->getChain();
11776 SDValue BasePtr = MemSD->getBasePtr();
11777
11778 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11779 SDValue Index, Mask, PassThru, VL;
11780
11781 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11782 Index = VPGN->getIndex();
11783 Mask = VPGN->getMask();
11784 PassThru = DAG.getUNDEF(VT);
11785 VL = VPGN->getVectorLength();
11786 // VP doesn't support extending loads.
11788 } else {
11789 // Else it must be a MGATHER.
11790 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11791 Index = MGN->getIndex();
11792 Mask = MGN->getMask();
11793 PassThru = MGN->getPassThru();
11794 LoadExtType = MGN->getExtensionType();
11795 }
11796
11797 MVT IndexVT = Index.getSimpleValueType();
11798 MVT XLenVT = Subtarget.getXLenVT();
11799
11801 "Unexpected VTs!");
11802 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11803 // Targets have to explicitly opt-in for extending vector loads.
11804 assert(LoadExtType == ISD::NON_EXTLOAD &&
11805 "Unexpected extending MGATHER/VP_GATHER");
11806
11807 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11808 // the selection of the masked intrinsics doesn't do this for us.
11809 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11810
11811 MVT ContainerVT = VT;
11812 if (VT.isFixedLengthVector()) {
11813 ContainerVT = getContainerForFixedLengthVector(VT);
11814 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11815 ContainerVT.getVectorElementCount());
11816
11817 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11818
11819 if (!IsUnmasked) {
11820 MVT MaskVT = getMaskTypeFor(ContainerVT);
11821 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11822 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11823 }
11824 }
11825
11826 if (!VL)
11827 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11828
11829 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11830 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11831 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11832 }
11833
11834 unsigned IntID =
11835 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11836 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11837 if (IsUnmasked)
11838 Ops.push_back(DAG.getUNDEF(ContainerVT));
11839 else
11840 Ops.push_back(PassThru);
11841 Ops.push_back(BasePtr);
11842 Ops.push_back(Index);
11843 if (!IsUnmasked)
11844 Ops.push_back(Mask);
11845 Ops.push_back(VL);
11846 if (!IsUnmasked)
11848
11849 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11850 SDValue Result =
11851 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11852 Chain = Result.getValue(1);
11853
11854 if (VT.isFixedLengthVector())
11855 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11856
11857 return DAG.getMergeValues({Result, Chain}, DL);
11858}
11859
11860// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11861// matched to a RVV indexed store. The RVV indexed store instructions only
11862// support the "unsigned unscaled" addressing mode; indices are implicitly
11863// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11864// signed or scaled indexing is extended to the XLEN value type and scaled
11865// accordingly.
11866SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11867 SelectionDAG &DAG) const {
11868 SDLoc DL(Op);
11869 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11870 EVT MemVT = MemSD->getMemoryVT();
11871 MachineMemOperand *MMO = MemSD->getMemOperand();
11872 SDValue Chain = MemSD->getChain();
11873 SDValue BasePtr = MemSD->getBasePtr();
11874
11875 [[maybe_unused]] bool IsTruncatingStore = false;
11876 SDValue Index, Mask, Val, VL;
11877
11878 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11879 Index = VPSN->getIndex();
11880 Mask = VPSN->getMask();
11881 Val = VPSN->getValue();
11882 VL = VPSN->getVectorLength();
11883 // VP doesn't support truncating stores.
11884 IsTruncatingStore = false;
11885 } else {
11886 // Else it must be a MSCATTER.
11887 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11888 Index = MSN->getIndex();
11889 Mask = MSN->getMask();
11890 Val = MSN->getValue();
11891 IsTruncatingStore = MSN->isTruncatingStore();
11892 }
11893
11894 MVT VT = Val.getSimpleValueType();
11895 MVT IndexVT = Index.getSimpleValueType();
11896 MVT XLenVT = Subtarget.getXLenVT();
11897
11899 "Unexpected VTs!");
11900 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11901 // Targets have to explicitly opt-in for extending vector loads and
11902 // truncating vector stores.
11903 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11904
11905 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11906 // the selection of the masked intrinsics doesn't do this for us.
11907 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11908
11909 MVT ContainerVT = VT;
11910 if (VT.isFixedLengthVector()) {
11911 ContainerVT = getContainerForFixedLengthVector(VT);
11912 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11913 ContainerVT.getVectorElementCount());
11914
11915 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11916 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11917
11918 if (!IsUnmasked) {
11919 MVT MaskVT = getMaskTypeFor(ContainerVT);
11920 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11921 }
11922 }
11923
11924 if (!VL)
11925 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11926
11927 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11928 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11929 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11930 }
11931
11932 unsigned IntID =
11933 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11934 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11935 Ops.push_back(Val);
11936 Ops.push_back(BasePtr);
11937 Ops.push_back(Index);
11938 if (!IsUnmasked)
11939 Ops.push_back(Mask);
11940 Ops.push_back(VL);
11941
11943 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11944}
11945
11946SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11947 SelectionDAG &DAG) const {
11948 const MVT XLenVT = Subtarget.getXLenVT();
11949 SDLoc DL(Op);
11950 SDValue Chain = Op->getOperand(0);
11951 SDValue SysRegNo = DAG.getTargetConstant(
11952 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11953 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11954 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11955
11956 // Encoding used for rounding mode in RISC-V differs from that used in
11957 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11958 // table, which consists of a sequence of 4-bit fields, each representing
11959 // corresponding FLT_ROUNDS mode.
11960 static const int Table =
11966
11967 SDValue Shift =
11968 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11969 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11970 DAG.getConstant(Table, DL, XLenVT), Shift);
11971 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11972 DAG.getConstant(7, DL, XLenVT));
11973
11974 return DAG.getMergeValues({Masked, Chain}, DL);
11975}
11976
11977SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11978 SelectionDAG &DAG) const {
11979 const MVT XLenVT = Subtarget.getXLenVT();
11980 SDLoc DL(Op);
11981 SDValue Chain = Op->getOperand(0);
11982 SDValue RMValue = Op->getOperand(1);
11983 SDValue SysRegNo = DAG.getTargetConstant(
11984 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11985
11986 // Encoding used for rounding mode in RISC-V differs from that used in
11987 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11988 // a table, which consists of a sequence of 4-bit fields, each representing
11989 // corresponding RISC-V mode.
11990 static const unsigned Table =
11996
11997 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11998
11999 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12000 DAG.getConstant(2, DL, XLenVT));
12001 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12002 DAG.getConstant(Table, DL, XLenVT), Shift);
12003 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12004 DAG.getConstant(0x7, DL, XLenVT));
12005 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12006 RMValue);
12007}
12008
12009SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12010 SelectionDAG &DAG) const {
12012
12013 bool isRISCV64 = Subtarget.is64Bit();
12014 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12015
12016 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12017 return DAG.getFrameIndex(FI, PtrVT);
12018}
12019
12020// Returns the opcode of the target-specific SDNode that implements the 32-bit
12021// form of the given Opcode.
12022static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12023 switch (Opcode) {
12024 default:
12025 llvm_unreachable("Unexpected opcode");
12026 case ISD::SHL:
12027 return RISCVISD::SLLW;
12028 case ISD::SRA:
12029 return RISCVISD::SRAW;
12030 case ISD::SRL:
12031 return RISCVISD::SRLW;
12032 case ISD::SDIV:
12033 return RISCVISD::DIVW;
12034 case ISD::UDIV:
12035 return RISCVISD::DIVUW;
12036 case ISD::UREM:
12037 return RISCVISD::REMUW;
12038 case ISD::ROTL:
12039 return RISCVISD::ROLW;
12040 case ISD::ROTR:
12041 return RISCVISD::RORW;
12042 }
12043}
12044
12045// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12046// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12047// otherwise be promoted to i64, making it difficult to select the
12048// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12049// type i8/i16/i32 is lost.
12051 unsigned ExtOpc = ISD::ANY_EXTEND) {
12052 SDLoc DL(N);
12053 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12054 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12055 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12056 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12057 // ReplaceNodeResults requires we maintain the same type for the return value.
12058 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12059}
12060
12061// Converts the given 32-bit operation to a i64 operation with signed extension
12062// semantic to reduce the signed extension instructions.
12064 SDLoc DL(N);
12065 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12066 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12067 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12068 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12069 DAG.getValueType(MVT::i32));
12070 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12071}
12072
12075 SelectionDAG &DAG) const {
12076 SDLoc DL(N);
12077 switch (N->getOpcode()) {
12078 default:
12079 llvm_unreachable("Don't know how to custom type legalize this operation!");
12082 case ISD::FP_TO_SINT:
12083 case ISD::FP_TO_UINT: {
12084 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12085 "Unexpected custom legalisation");
12086 bool IsStrict = N->isStrictFPOpcode();
12087 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12088 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12089 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12090 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12092 if (!isTypeLegal(Op0.getValueType()))
12093 return;
12094 if (IsStrict) {
12095 SDValue Chain = N->getOperand(0);
12096 // In absense of Zfh, promote f16 to f32, then convert.
12097 if (Op0.getValueType() == MVT::f16 &&
12098 !Subtarget.hasStdExtZfhOrZhinx()) {
12099 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12100 {Chain, Op0});
12101 Chain = Op0.getValue(1);
12102 }
12103 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12105 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12106 SDValue Res = DAG.getNode(
12107 Opc, DL, VTs, Chain, Op0,
12108 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12109 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12110 Results.push_back(Res.getValue(1));
12111 return;
12112 }
12113 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12114 // convert.
12115 if ((Op0.getValueType() == MVT::f16 &&
12116 !Subtarget.hasStdExtZfhOrZhinx()) ||
12117 Op0.getValueType() == MVT::bf16)
12118 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12119
12120 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12121 SDValue Res =
12122 DAG.getNode(Opc, DL, MVT::i64, Op0,
12123 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12124 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12125 return;
12126 }
12127 // If the FP type needs to be softened, emit a library call using the 'si'
12128 // version. If we left it to default legalization we'd end up with 'di'. If
12129 // the FP type doesn't need to be softened just let generic type
12130 // legalization promote the result type.
12131 RTLIB::Libcall LC;
12132 if (IsSigned)
12133 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12134 else
12135 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12136 MakeLibCallOptions CallOptions;
12137 EVT OpVT = Op0.getValueType();
12138 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12139 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12140 SDValue Result;
12141 std::tie(Result, Chain) =
12142 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12143 Results.push_back(Result);
12144 if (IsStrict)
12145 Results.push_back(Chain);
12146 break;
12147 }
12148 case ISD::LROUND: {
12149 SDValue Op0 = N->getOperand(0);
12150 EVT Op0VT = Op0.getValueType();
12151 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12153 if (!isTypeLegal(Op0VT))
12154 return;
12155
12156 // In absense of Zfh, promote f16 to f32, then convert.
12157 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12158 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12159
12160 SDValue Res =
12161 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12162 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12163 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12164 return;
12165 }
12166 // If the FP type needs to be softened, emit a library call to lround. We'll
12167 // need to truncate the result. We assume any value that doesn't fit in i32
12168 // is allowed to return an unspecified value.
12169 RTLIB::Libcall LC =
12170 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12171 MakeLibCallOptions CallOptions;
12172 EVT OpVT = Op0.getValueType();
12173 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12174 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12175 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12176 Results.push_back(Result);
12177 break;
12178 }
12181 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12182 "has custom type legalization on riscv32");
12183
12184 SDValue LoCounter, HiCounter;
12185 MVT XLenVT = Subtarget.getXLenVT();
12186 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12187 LoCounter = DAG.getTargetConstant(
12188 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12189 HiCounter = DAG.getTargetConstant(
12190 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12191 } else {
12192 LoCounter = DAG.getTargetConstant(
12193 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12194 HiCounter = DAG.getTargetConstant(
12195 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12196 }
12197 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12199 N->getOperand(0), LoCounter, HiCounter);
12200
12201 Results.push_back(
12202 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12203 Results.push_back(RCW.getValue(2));
12204 break;
12205 }
12206 case ISD::LOAD: {
12207 if (!ISD::isNON_EXTLoad(N))
12208 return;
12209
12210 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12211 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12212 LoadSDNode *Ld = cast<LoadSDNode>(N);
12213
12214 SDLoc dl(N);
12215 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12216 Ld->getBasePtr(), Ld->getMemoryVT(),
12217 Ld->getMemOperand());
12218 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12219 Results.push_back(Res.getValue(1));
12220 return;
12221 }
12222 case ISD::MUL: {
12223 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12224 unsigned XLen = Subtarget.getXLen();
12225 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12226 if (Size > XLen) {
12227 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12228 SDValue LHS = N->getOperand(0);
12229 SDValue RHS = N->getOperand(1);
12230 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12231
12232 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12233 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12234 // We need exactly one side to be unsigned.
12235 if (LHSIsU == RHSIsU)
12236 return;
12237
12238 auto MakeMULPair = [&](SDValue S, SDValue U) {
12239 MVT XLenVT = Subtarget.getXLenVT();
12240 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12241 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12242 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12243 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12244 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12245 };
12246
12247 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12248 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12249
12250 // The other operand should be signed, but still prefer MULH when
12251 // possible.
12252 if (RHSIsU && LHSIsS && !RHSIsS)
12253 Results.push_back(MakeMULPair(LHS, RHS));
12254 else if (LHSIsU && RHSIsS && !LHSIsS)
12255 Results.push_back(MakeMULPair(RHS, LHS));
12256
12257 return;
12258 }
12259 [[fallthrough]];
12260 }
12261 case ISD::ADD:
12262 case ISD::SUB:
12263 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12264 "Unexpected custom legalisation");
12265 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12266 break;
12267 case ISD::SHL:
12268 case ISD::SRA:
12269 case ISD::SRL:
12270 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12271 "Unexpected custom legalisation");
12272 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12273 // If we can use a BSET instruction, allow default promotion to apply.
12274 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12275 isOneConstant(N->getOperand(0)))
12276 break;
12277 Results.push_back(customLegalizeToWOp(N, DAG));
12278 break;
12279 }
12280
12281 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12282 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12283 // shift amount.
12284 if (N->getOpcode() == ISD::SHL) {
12285 SDLoc DL(N);
12286 SDValue NewOp0 =
12287 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12288 SDValue NewOp1 =
12289 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12290 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12291 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12292 DAG.getValueType(MVT::i32));
12293 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12294 }
12295
12296 break;
12297 case ISD::ROTL:
12298 case ISD::ROTR:
12299 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12300 "Unexpected custom legalisation");
12301 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12302 Subtarget.hasVendorXTHeadBb()) &&
12303 "Unexpected custom legalization");
12304 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12305 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12306 return;
12307 Results.push_back(customLegalizeToWOp(N, DAG));
12308 break;
12309 case ISD::CTTZ:
12311 case ISD::CTLZ:
12312 case ISD::CTLZ_ZERO_UNDEF: {
12313 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12314 "Unexpected custom legalisation");
12315
12316 SDValue NewOp0 =
12317 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12318 bool IsCTZ =
12319 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12320 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12321 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12322 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12323 return;
12324 }
12325 case ISD::SDIV:
12326 case ISD::UDIV:
12327 case ISD::UREM: {
12328 MVT VT = N->getSimpleValueType(0);
12329 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12330 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12331 "Unexpected custom legalisation");
12332 // Don't promote division/remainder by constant since we should expand those
12333 // to multiply by magic constant.
12335 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12336 !isIntDivCheap(N->getValueType(0), Attr))
12337 return;
12338
12339 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12340 // the upper 32 bits. For other types we need to sign or zero extend
12341 // based on the opcode.
12342 unsigned ExtOpc = ISD::ANY_EXTEND;
12343 if (VT != MVT::i32)
12344 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12346
12347 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12348 break;
12349 }
12350 case ISD::SADDO: {
12351 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12352 "Unexpected custom legalisation");
12353
12354 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12355 // use the default legalization.
12356 if (!isa<ConstantSDNode>(N->getOperand(1)))
12357 return;
12358
12359 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12360 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12361 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12362 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12363 DAG.getValueType(MVT::i32));
12364
12365 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12366
12367 // For an addition, the result should be less than one of the operands (LHS)
12368 // if and only if the other operand (RHS) is negative, otherwise there will
12369 // be overflow.
12370 // For a subtraction, the result should be less than one of the operands
12371 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12372 // otherwise there will be overflow.
12373 EVT OType = N->getValueType(1);
12374 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12375 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12376
12377 SDValue Overflow =
12378 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12379 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12380 Results.push_back(Overflow);
12381 return;
12382 }
12383 case ISD::UADDO:
12384 case ISD::USUBO: {
12385 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12386 "Unexpected custom legalisation");
12387 bool IsAdd = N->getOpcode() == ISD::UADDO;
12388 // Create an ADDW or SUBW.
12389 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12390 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12391 SDValue Res =
12392 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12393 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12394 DAG.getValueType(MVT::i32));
12395
12396 SDValue Overflow;
12397 if (IsAdd && isOneConstant(RHS)) {
12398 // Special case uaddo X, 1 overflowed if the addition result is 0.
12399 // The general case (X + C) < C is not necessarily beneficial. Although we
12400 // reduce the live range of X, we may introduce the materialization of
12401 // constant C, especially when the setcc result is used by branch. We have
12402 // no compare with constant and branch instructions.
12403 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12404 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12405 } else if (IsAdd && isAllOnesConstant(RHS)) {
12406 // Special case uaddo X, -1 overflowed if X != 0.
12407 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12408 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12409 } else {
12410 // Sign extend the LHS and perform an unsigned compare with the ADDW
12411 // result. Since the inputs are sign extended from i32, this is equivalent
12412 // to comparing the lower 32 bits.
12413 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12414 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12415 IsAdd ? ISD::SETULT : ISD::SETUGT);
12416 }
12417
12418 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12419 Results.push_back(Overflow);
12420 return;
12421 }
12422 case ISD::UADDSAT:
12423 case ISD::USUBSAT: {
12424 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12425 "Unexpected custom legalisation");
12426 if (Subtarget.hasStdExtZbb()) {
12427 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12428 // sign extend allows overflow of the lower 32 bits to be detected on
12429 // the promoted size.
12430 SDValue LHS =
12431 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12432 SDValue RHS =
12433 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12434 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12435 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12436 return;
12437 }
12438
12439 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12440 // promotion for UADDO/USUBO.
12441 Results.push_back(expandAddSubSat(N, DAG));
12442 return;
12443 }
12444 case ISD::SADDSAT:
12445 case ISD::SSUBSAT: {
12446 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12447 "Unexpected custom legalisation");
12448 Results.push_back(expandAddSubSat(N, DAG));
12449 return;
12450 }
12451 case ISD::ABS: {
12452 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12453 "Unexpected custom legalisation");
12454
12455 if (Subtarget.hasStdExtZbb()) {
12456 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12457 // This allows us to remember that the result is sign extended. Expanding
12458 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12459 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12460 N->getOperand(0));
12461 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12462 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12463 return;
12464 }
12465
12466 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12467 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12468
12469 // Freeze the source so we can increase it's use count.
12470 Src = DAG.getFreeze(Src);
12471
12472 // Copy sign bit to all bits using the sraiw pattern.
12473 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12474 DAG.getValueType(MVT::i32));
12475 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12476 DAG.getConstant(31, DL, MVT::i64));
12477
12478 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12479 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12480
12481 // NOTE: The result is only required to be anyextended, but sext is
12482 // consistent with type legalization of sub.
12483 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12484 DAG.getValueType(MVT::i32));
12485 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12486 return;
12487 }
12488 case ISD::BITCAST: {
12489 EVT VT = N->getValueType(0);
12490 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12491 SDValue Op0 = N->getOperand(0);
12492 EVT Op0VT = Op0.getValueType();
12493 MVT XLenVT = Subtarget.getXLenVT();
12494 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12495 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12496 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12497 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12498 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12499 Subtarget.hasStdExtZfbfmin()) {
12500 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12501 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12502 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12503 Subtarget.hasStdExtFOrZfinx()) {
12504 SDValue FPConv =
12505 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12507 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12508 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12509 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12510 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12511 NewReg.getValue(0), NewReg.getValue(1));
12512 Results.push_back(RetReg);
12513 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12514 isTypeLegal(Op0VT)) {
12515 // Custom-legalize bitcasts from fixed-length vector types to illegal
12516 // scalar types in order to improve codegen. Bitcast the vector to a
12517 // one-element vector type whose element type is the same as the result
12518 // type, and extract the first element.
12519 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12520 if (isTypeLegal(BVT)) {
12521 SDValue BVec = DAG.getBitcast(BVT, Op0);
12522 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12523 DAG.getVectorIdxConstant(0, DL)));
12524 }
12525 }
12526 break;
12527 }
12528 case RISCVISD::BREV8:
12529 case RISCVISD::ORC_B: {
12530 MVT VT = N->getSimpleValueType(0);
12531 MVT XLenVT = Subtarget.getXLenVT();
12532 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12533 "Unexpected custom legalisation");
12534 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12535 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12536 "Unexpected extension");
12537 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12538 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12539 // ReplaceNodeResults requires we maintain the same type for the return
12540 // value.
12541 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12542 break;
12543 }
12545 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12546 // type is illegal (currently only vXi64 RV32).
12547 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12548 // transferred to the destination register. We issue two of these from the
12549 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12550 // first element.
12551 SDValue Vec = N->getOperand(0);
12552 SDValue Idx = N->getOperand(1);
12553
12554 // The vector type hasn't been legalized yet so we can't issue target
12555 // specific nodes if it needs legalization.
12556 // FIXME: We would manually legalize if it's important.
12557 if (!isTypeLegal(Vec.getValueType()))
12558 return;
12559
12560 MVT VecVT = Vec.getSimpleValueType();
12561
12562 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12563 VecVT.getVectorElementType() == MVT::i64 &&
12564 "Unexpected EXTRACT_VECTOR_ELT legalization");
12565
12566 // If this is a fixed vector, we need to convert it to a scalable vector.
12567 MVT ContainerVT = VecVT;
12568 if (VecVT.isFixedLengthVector()) {
12569 ContainerVT = getContainerForFixedLengthVector(VecVT);
12570 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12571 }
12572
12573 MVT XLenVT = Subtarget.getXLenVT();
12574
12575 // Use a VL of 1 to avoid processing more elements than we need.
12576 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12577
12578 // Unless the index is known to be 0, we must slide the vector down to get
12579 // the desired element into index 0.
12580 if (!isNullConstant(Idx)) {
12581 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12582 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12583 }
12584
12585 // Extract the lower XLEN bits of the correct vector element.
12586 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12587
12588 // To extract the upper XLEN bits of the vector element, shift the first
12589 // element right by 32 bits and re-extract the lower XLEN bits.
12590 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12591 DAG.getUNDEF(ContainerVT),
12592 DAG.getConstant(32, DL, XLenVT), VL);
12593 SDValue LShr32 =
12594 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12595 DAG.getUNDEF(ContainerVT), Mask, VL);
12596
12597 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12598
12599 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12600 break;
12601 }
12603 unsigned IntNo = N->getConstantOperandVal(0);
12604 switch (IntNo) {
12605 default:
12607 "Don't know how to custom type legalize this intrinsic!");
12608 case Intrinsic::experimental_get_vector_length: {
12609 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12610 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12611 return;
12612 }
12613 case Intrinsic::experimental_cttz_elts: {
12614 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12615 Results.push_back(
12616 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12617 return;
12618 }
12619 case Intrinsic::riscv_orc_b:
12620 case Intrinsic::riscv_brev8:
12621 case Intrinsic::riscv_sha256sig0:
12622 case Intrinsic::riscv_sha256sig1:
12623 case Intrinsic::riscv_sha256sum0:
12624 case Intrinsic::riscv_sha256sum1:
12625 case Intrinsic::riscv_sm3p0:
12626 case Intrinsic::riscv_sm3p1: {
12627 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12628 return;
12629 unsigned Opc;
12630 switch (IntNo) {
12631 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12632 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12633 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12634 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12635 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12636 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12637 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12638 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12639 }
12640
12641 SDValue NewOp =
12642 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12643 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12644 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12645 return;
12646 }
12647 case Intrinsic::riscv_sm4ks:
12648 case Intrinsic::riscv_sm4ed: {
12649 unsigned Opc =
12650 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12651 SDValue NewOp0 =
12652 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12653 SDValue NewOp1 =
12654 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12655 SDValue Res =
12656 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12657 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12658 return;
12659 }
12660 case Intrinsic::riscv_mopr: {
12661 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12662 return;
12663 SDValue NewOp =
12664 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12665 SDValue Res = DAG.getNode(
12666 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12667 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12668 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12669 return;
12670 }
12671 case Intrinsic::riscv_moprr: {
12672 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12673 return;
12674 SDValue NewOp0 =
12675 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12676 SDValue NewOp1 =
12677 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12678 SDValue Res = DAG.getNode(
12679 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12680 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12681 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12682 return;
12683 }
12684 case Intrinsic::riscv_clmul: {
12685 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12686 return;
12687
12688 SDValue NewOp0 =
12689 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12690 SDValue NewOp1 =
12691 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12692 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12693 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12694 return;
12695 }
12696 case Intrinsic::riscv_clmulh:
12697 case Intrinsic::riscv_clmulr: {
12698 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12699 return;
12700
12701 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12702 // to the full 128-bit clmul result of multiplying two xlen values.
12703 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12704 // upper 32 bits.
12705 //
12706 // The alternative is to mask the inputs to 32 bits and use clmul, but
12707 // that requires two shifts to mask each input without zext.w.
12708 // FIXME: If the inputs are known zero extended or could be freely
12709 // zero extended, the mask form would be better.
12710 SDValue NewOp0 =
12711 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12712 SDValue NewOp1 =
12713 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12714 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12715 DAG.getConstant(32, DL, MVT::i64));
12716 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12717 DAG.getConstant(32, DL, MVT::i64));
12718 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12720 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12721 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12722 DAG.getConstant(32, DL, MVT::i64));
12723 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12724 return;
12725 }
12726 case Intrinsic::riscv_vmv_x_s: {
12727 EVT VT = N->getValueType(0);
12728 MVT XLenVT = Subtarget.getXLenVT();
12729 if (VT.bitsLT(XLenVT)) {
12730 // Simple case just extract using vmv.x.s and truncate.
12731 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12732 Subtarget.getXLenVT(), N->getOperand(1));
12733 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12734 return;
12735 }
12736
12737 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12738 "Unexpected custom legalization");
12739
12740 // We need to do the move in two steps.
12741 SDValue Vec = N->getOperand(1);
12742 MVT VecVT = Vec.getSimpleValueType();
12743
12744 // First extract the lower XLEN bits of the element.
12745 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12746
12747 // To extract the upper XLEN bits of the vector element, shift the first
12748 // element right by 32 bits and re-extract the lower XLEN bits.
12749 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12750
12751 SDValue ThirtyTwoV =
12752 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12753 DAG.getConstant(32, DL, XLenVT), VL);
12754 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12755 DAG.getUNDEF(VecVT), Mask, VL);
12756 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12757
12758 Results.push_back(
12759 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12760 break;
12761 }
12762 }
12763 break;
12764 }
12765 case ISD::VECREDUCE_ADD:
12766 case ISD::VECREDUCE_AND:
12767 case ISD::VECREDUCE_OR:
12768 case ISD::VECREDUCE_XOR:
12773 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12774 Results.push_back(V);
12775 break;
12776 case ISD::VP_REDUCE_ADD:
12777 case ISD::VP_REDUCE_AND:
12778 case ISD::VP_REDUCE_OR:
12779 case ISD::VP_REDUCE_XOR:
12780 case ISD::VP_REDUCE_SMAX:
12781 case ISD::VP_REDUCE_UMAX:
12782 case ISD::VP_REDUCE_SMIN:
12783 case ISD::VP_REDUCE_UMIN:
12784 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12785 Results.push_back(V);
12786 break;
12787 case ISD::GET_ROUNDING: {
12788 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12789 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12790 Results.push_back(Res.getValue(0));
12791 Results.push_back(Res.getValue(1));
12792 break;
12793 }
12794 }
12795}
12796
12797/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12798/// which corresponds to it.
12799static unsigned getVecReduceOpcode(unsigned Opc) {
12800 switch (Opc) {
12801 default:
12802 llvm_unreachable("Unhandled binary to transfrom reduction");
12803 case ISD::ADD:
12804 return ISD::VECREDUCE_ADD;
12805 case ISD::UMAX:
12806 return ISD::VECREDUCE_UMAX;
12807 case ISD::SMAX:
12808 return ISD::VECREDUCE_SMAX;
12809 case ISD::UMIN:
12810 return ISD::VECREDUCE_UMIN;
12811 case ISD::SMIN:
12812 return ISD::VECREDUCE_SMIN;
12813 case ISD::AND:
12814 return ISD::VECREDUCE_AND;
12815 case ISD::OR:
12816 return ISD::VECREDUCE_OR;
12817 case ISD::XOR:
12818 return ISD::VECREDUCE_XOR;
12819 case ISD::FADD:
12820 // Note: This is the associative form of the generic reduction opcode.
12821 return ISD::VECREDUCE_FADD;
12822 }
12823}
12824
12825/// Perform two related transforms whose purpose is to incrementally recognize
12826/// an explode_vector followed by scalar reduction as a vector reduction node.
12827/// This exists to recover from a deficiency in SLP which can't handle
12828/// forests with multiple roots sharing common nodes. In some cases, one
12829/// of the trees will be vectorized, and the other will remain (unprofitably)
12830/// scalarized.
12831static SDValue
12833 const RISCVSubtarget &Subtarget) {
12834
12835 // This transforms need to run before all integer types have been legalized
12836 // to i64 (so that the vector element type matches the add type), and while
12837 // it's safe to introduce odd sized vector types.
12839 return SDValue();
12840
12841 // Without V, this transform isn't useful. We could form the (illegal)
12842 // operations and let them be scalarized again, but there's really no point.
12843 if (!Subtarget.hasVInstructions())
12844 return SDValue();
12845
12846 const SDLoc DL(N);
12847 const EVT VT = N->getValueType(0);
12848 const unsigned Opc = N->getOpcode();
12849
12850 // For FADD, we only handle the case with reassociation allowed. We
12851 // could handle strict reduction order, but at the moment, there's no
12852 // known reason to, and the complexity isn't worth it.
12853 // TODO: Handle fminnum and fmaxnum here
12854 if (!VT.isInteger() &&
12855 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12856 return SDValue();
12857
12858 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12859 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12860 "Inconsistent mappings");
12861 SDValue LHS = N->getOperand(0);
12862 SDValue RHS = N->getOperand(1);
12863
12864 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12865 return SDValue();
12866
12867 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12868 std::swap(LHS, RHS);
12869
12870 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12871 !isa<ConstantSDNode>(RHS.getOperand(1)))
12872 return SDValue();
12873
12874 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12875 SDValue SrcVec = RHS.getOperand(0);
12876 EVT SrcVecVT = SrcVec.getValueType();
12877 assert(SrcVecVT.getVectorElementType() == VT);
12878 if (SrcVecVT.isScalableVector())
12879 return SDValue();
12880
12881 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12882 return SDValue();
12883
12884 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12885 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12886 // root of our reduction tree. TODO: We could extend this to any two
12887 // adjacent aligned constant indices if desired.
12888 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12889 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12890 uint64_t LHSIdx =
12891 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12892 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12893 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12894 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12895 DAG.getVectorIdxConstant(0, DL));
12896 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12897 }
12898 }
12899
12900 // Match (binop (reduce (extract_subvector V, 0),
12901 // (extract_vector_elt V, sizeof(SubVec))))
12902 // into a reduction of one more element from the original vector V.
12903 if (LHS.getOpcode() != ReduceOpc)
12904 return SDValue();
12905
12906 SDValue ReduceVec = LHS.getOperand(0);
12907 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12908 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12909 isNullConstant(ReduceVec.getOperand(1)) &&
12910 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12911 // For illegal types (e.g. 3xi32), most will be combined again into a
12912 // wider (hopefully legal) type. If this is a terminal state, we are
12913 // relying on type legalization here to produce something reasonable
12914 // and this lowering quality could probably be improved. (TODO)
12915 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12916 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12917 DAG.getVectorIdxConstant(0, DL));
12918 auto Flags = ReduceVec->getFlags();
12919 Flags.intersectWith(N->getFlags());
12920 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12921 }
12922
12923 return SDValue();
12924}
12925
12926
12927// Try to fold (<bop> x, (reduction.<bop> vec, start))
12929 const RISCVSubtarget &Subtarget) {
12930 auto BinOpToRVVReduce = [](unsigned Opc) {
12931 switch (Opc) {
12932 default:
12933 llvm_unreachable("Unhandled binary to transfrom reduction");
12934 case ISD::ADD:
12936 case ISD::UMAX:
12938 case ISD::SMAX:
12940 case ISD::UMIN:
12942 case ISD::SMIN:
12944 case ISD::AND:
12946 case ISD::OR:
12948 case ISD::XOR:
12950 case ISD::FADD:
12952 case ISD::FMAXNUM:
12954 case ISD::FMINNUM:
12956 }
12957 };
12958
12959 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12960 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12961 isNullConstant(V.getOperand(1)) &&
12962 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12963 };
12964
12965 unsigned Opc = N->getOpcode();
12966 unsigned ReduceIdx;
12967 if (IsReduction(N->getOperand(0), Opc))
12968 ReduceIdx = 0;
12969 else if (IsReduction(N->getOperand(1), Opc))
12970 ReduceIdx = 1;
12971 else
12972 return SDValue();
12973
12974 // Skip if FADD disallows reassociation but the combiner needs.
12975 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12976 return SDValue();
12977
12978 SDValue Extract = N->getOperand(ReduceIdx);
12979 SDValue Reduce = Extract.getOperand(0);
12980 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12981 return SDValue();
12982
12983 SDValue ScalarV = Reduce.getOperand(2);
12984 EVT ScalarVT = ScalarV.getValueType();
12985 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12986 ScalarV.getOperand(0)->isUndef() &&
12987 isNullConstant(ScalarV.getOperand(2)))
12988 ScalarV = ScalarV.getOperand(1);
12989
12990 // Make sure that ScalarV is a splat with VL=1.
12991 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12992 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12993 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12994 return SDValue();
12995
12996 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12997 return SDValue();
12998
12999 // Check the scalar of ScalarV is neutral element
13000 // TODO: Deal with value other than neutral element.
13001 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13002 0))
13003 return SDValue();
13004
13005 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13006 // FIXME: We might be able to improve this if operand 0 is undef.
13007 if (!isNonZeroAVL(Reduce.getOperand(5)))
13008 return SDValue();
13009
13010 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13011
13012 SDLoc DL(N);
13013 SDValue NewScalarV =
13014 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13015 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13016
13017 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13018 if (ScalarVT != ScalarV.getValueType())
13019 NewScalarV =
13020 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13021 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13022
13023 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13024 NewScalarV, Reduce.getOperand(3),
13025 Reduce.getOperand(4), Reduce.getOperand(5)};
13026 SDValue NewReduce =
13027 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13028 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13029 Extract.getOperand(1));
13030}
13031
13032// Optimize (add (shl x, c0), (shl y, c1)) ->
13033// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13035 const RISCVSubtarget &Subtarget) {
13036 // Perform this optimization only in the zba extension.
13037 if (!Subtarget.hasStdExtZba())
13038 return SDValue();
13039
13040 // Skip for vector types and larger types.
13041 EVT VT = N->getValueType(0);
13042 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13043 return SDValue();
13044
13045 // The two operand nodes must be SHL and have no other use.
13046 SDValue N0 = N->getOperand(0);
13047 SDValue N1 = N->getOperand(1);
13048 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13049 !N0->hasOneUse() || !N1->hasOneUse())
13050 return SDValue();
13051
13052 // Check c0 and c1.
13053 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13054 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13055 if (!N0C || !N1C)
13056 return SDValue();
13057 int64_t C0 = N0C->getSExtValue();
13058 int64_t C1 = N1C->getSExtValue();
13059 if (C0 <= 0 || C1 <= 0)
13060 return SDValue();
13061
13062 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13063 int64_t Bits = std::min(C0, C1);
13064 int64_t Diff = std::abs(C0 - C1);
13065 if (Diff != 1 && Diff != 2 && Diff != 3)
13066 return SDValue();
13067
13068 // Build nodes.
13069 SDLoc DL(N);
13070 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13071 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13072 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13073 DAG.getConstant(Diff, DL, VT), NS);
13074 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13075}
13076
13077// Combine a constant select operand into its use:
13078//
13079// (and (select cond, -1, c), x)
13080// -> (select cond, x, (and x, c)) [AllOnes=1]
13081// (or (select cond, 0, c), x)
13082// -> (select cond, x, (or x, c)) [AllOnes=0]
13083// (xor (select cond, 0, c), x)
13084// -> (select cond, x, (xor x, c)) [AllOnes=0]
13085// (add (select cond, 0, c), x)
13086// -> (select cond, x, (add x, c)) [AllOnes=0]
13087// (sub x, (select cond, 0, c))
13088// -> (select cond, x, (sub x, c)) [AllOnes=0]
13090 SelectionDAG &DAG, bool AllOnes,
13091 const RISCVSubtarget &Subtarget) {
13092 EVT VT = N->getValueType(0);
13093
13094 // Skip vectors.
13095 if (VT.isVector())
13096 return SDValue();
13097
13098 if (!Subtarget.hasConditionalMoveFusion()) {
13099 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13100 if ((!Subtarget.hasStdExtZicond() &&
13101 !Subtarget.hasVendorXVentanaCondOps()) ||
13102 N->getOpcode() != ISD::AND)
13103 return SDValue();
13104
13105 // Maybe harmful when condition code has multiple use.
13106 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13107 return SDValue();
13108
13109 // Maybe harmful when VT is wider than XLen.
13110 if (VT.getSizeInBits() > Subtarget.getXLen())
13111 return SDValue();
13112 }
13113
13114 if ((Slct.getOpcode() != ISD::SELECT &&
13115 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13116 !Slct.hasOneUse())
13117 return SDValue();
13118
13119 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13121 };
13122
13123 bool SwapSelectOps;
13124 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13125 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13126 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13127 SDValue NonConstantVal;
13128 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13129 SwapSelectOps = false;
13130 NonConstantVal = FalseVal;
13131 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13132 SwapSelectOps = true;
13133 NonConstantVal = TrueVal;
13134 } else
13135 return SDValue();
13136
13137 // Slct is now know to be the desired identity constant when CC is true.
13138 TrueVal = OtherOp;
13139 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13140 // Unless SwapSelectOps says the condition should be false.
13141 if (SwapSelectOps)
13142 std::swap(TrueVal, FalseVal);
13143
13144 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13145 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13146 {Slct.getOperand(0), Slct.getOperand(1),
13147 Slct.getOperand(2), TrueVal, FalseVal});
13148
13149 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13150 {Slct.getOperand(0), TrueVal, FalseVal});
13151}
13152
13153// Attempt combineSelectAndUse on each operand of a commutative operator N.
13155 bool AllOnes,
13156 const RISCVSubtarget &Subtarget) {
13157 SDValue N0 = N->getOperand(0);
13158 SDValue N1 = N->getOperand(1);
13159 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13160 return Result;
13161 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13162 return Result;
13163 return SDValue();
13164}
13165
13166// Transform (add (mul x, c0), c1) ->
13167// (add (mul (add x, c1/c0), c0), c1%c0).
13168// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13169// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13170// to an infinite loop in DAGCombine if transformed.
13171// Or transform (add (mul x, c0), c1) ->
13172// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13173// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13174// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13175// lead to an infinite loop in DAGCombine if transformed.
13176// Or transform (add (mul x, c0), c1) ->
13177// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13178// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13179// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13180// lead to an infinite loop in DAGCombine if transformed.
13181// Or transform (add (mul x, c0), c1) ->
13182// (mul (add x, c1/c0), c0).
13183// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13185 const RISCVSubtarget &Subtarget) {
13186 // Skip for vector types and larger types.
13187 EVT VT = N->getValueType(0);
13188 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13189 return SDValue();
13190 // The first operand node must be a MUL and has no other use.
13191 SDValue N0 = N->getOperand(0);
13192 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13193 return SDValue();
13194 // Check if c0 and c1 match above conditions.
13195 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13196 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13197 if (!N0C || !N1C)
13198 return SDValue();
13199 // If N0C has multiple uses it's possible one of the cases in
13200 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13201 // in an infinite loop.
13202 if (!N0C->hasOneUse())
13203 return SDValue();
13204 int64_t C0 = N0C->getSExtValue();
13205 int64_t C1 = N1C->getSExtValue();
13206 int64_t CA, CB;
13207 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13208 return SDValue();
13209 // Search for proper CA (non-zero) and CB that both are simm12.
13210 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13211 !isInt<12>(C0 * (C1 / C0))) {
13212 CA = C1 / C0;
13213 CB = C1 % C0;
13214 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13215 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13216 CA = C1 / C0 + 1;
13217 CB = C1 % C0 - C0;
13218 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13219 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13220 CA = C1 / C0 - 1;
13221 CB = C1 % C0 + C0;
13222 } else
13223 return SDValue();
13224 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13225 SDLoc DL(N);
13226 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13227 DAG.getConstant(CA, DL, VT));
13228 SDValue New1 =
13229 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
13230 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
13231}
13232
13233// add (zext, zext) -> zext (add (zext, zext))
13234// sub (zext, zext) -> sext (sub (zext, zext))
13235// mul (zext, zext) -> zext (mul (zext, zext))
13236// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13237// udiv (zext, zext) -> zext (udiv (zext, zext))
13238// srem (zext, zext) -> zext (srem (zext, zext))
13239// urem (zext, zext) -> zext (urem (zext, zext))
13240//
13241// where the sum of the extend widths match, and the the range of the bin op
13242// fits inside the width of the narrower bin op. (For profitability on rvv, we
13243// use a power of two for both inner and outer extend.)
13245
13246 EVT VT = N->getValueType(0);
13247 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13248 return SDValue();
13249
13250 SDValue N0 = N->getOperand(0);
13251 SDValue N1 = N->getOperand(1);
13253 return SDValue();
13254 if (!N0.hasOneUse() || !N1.hasOneUse())
13255 return SDValue();
13256
13257 SDValue Src0 = N0.getOperand(0);
13258 SDValue Src1 = N1.getOperand(0);
13259 EVT SrcVT = Src0.getValueType();
13260 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13261 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13262 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13263 return SDValue();
13264
13265 LLVMContext &C = *DAG.getContext();
13267 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13268
13269 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13270 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13271
13272 // Src0 and Src1 are zero extended, so they're always positive if signed.
13273 //
13274 // sub can produce a negative from two positive operands, so it needs sign
13275 // extended. Other nodes produce a positive from two positive operands, so
13276 // zero extend instead.
13277 unsigned OuterExtend =
13278 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13279
13280 return DAG.getNode(
13281 OuterExtend, SDLoc(N), VT,
13282 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13283}
13284
13285// Try to turn (add (xor bool, 1) -1) into (neg bool).
13287 SDValue N0 = N->getOperand(0);
13288 SDValue N1 = N->getOperand(1);
13289 EVT VT = N->getValueType(0);
13290 SDLoc DL(N);
13291
13292 // RHS should be -1.
13293 if (!isAllOnesConstant(N1))
13294 return SDValue();
13295
13296 // Look for (xor X, 1).
13297 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13298 return SDValue();
13299
13300 // First xor input should be 0 or 1.
13302 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13303 return SDValue();
13304
13305 // Emit a negate of the setcc.
13306 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13307 N0.getOperand(0));
13308}
13309
13312 const RISCVSubtarget &Subtarget) {
13313 SelectionDAG &DAG = DCI.DAG;
13314 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13315 return V;
13316 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13317 return V;
13318 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13319 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13320 return V;
13321 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13322 return V;
13323 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13324 return V;
13325 if (SDValue V = combineBinOpOfZExt(N, DAG))
13326 return V;
13327
13328 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13329 // (select lhs, rhs, cc, x, (add x, y))
13330 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13331}
13332
13333// Try to turn a sub boolean RHS and constant LHS into an addi.
13335 SDValue N0 = N->getOperand(0);
13336 SDValue N1 = N->getOperand(1);
13337 EVT VT = N->getValueType(0);
13338 SDLoc DL(N);
13339
13340 // Require a constant LHS.
13341 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13342 if (!N0C)
13343 return SDValue();
13344
13345 // All our optimizations involve subtracting 1 from the immediate and forming
13346 // an ADDI. Make sure the new immediate is valid for an ADDI.
13347 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13348 if (!ImmValMinus1.isSignedIntN(12))
13349 return SDValue();
13350
13351 SDValue NewLHS;
13352 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13353 // (sub constant, (setcc x, y, eq/neq)) ->
13354 // (add (setcc x, y, neq/eq), constant - 1)
13355 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13356 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13357 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13358 return SDValue();
13359 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13360 NewLHS =
13361 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13362 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13363 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13364 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13365 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13366 NewLHS = N1.getOperand(0);
13367 } else
13368 return SDValue();
13369
13370 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13371 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13372}
13373
13374// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13375// non-zero. Replace with orc.b.
13377 const RISCVSubtarget &Subtarget) {
13378 if (!Subtarget.hasStdExtZbb())
13379 return SDValue();
13380
13381 EVT VT = N->getValueType(0);
13382
13383 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13384 return SDValue();
13385
13386 SDValue N0 = N->getOperand(0);
13387 SDValue N1 = N->getOperand(1);
13388
13389 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13390 return SDValue();
13391
13392 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13393 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13394 return SDValue();
13395
13396 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13397 if (!DAG.MaskedValueIsZero(N1, Mask))
13398 return SDValue();
13399
13400 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13401}
13402
13404 const RISCVSubtarget &Subtarget) {
13405 if (SDValue V = combineSubOfBoolean(N, DAG))
13406 return V;
13407
13408 EVT VT = N->getValueType(0);
13409 SDValue N0 = N->getOperand(0);
13410 SDValue N1 = N->getOperand(1);
13411 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13412 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13413 isNullConstant(N1.getOperand(1))) {
13414 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13415 if (CCVal == ISD::SETLT) {
13416 SDLoc DL(N);
13417 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13418 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13419 DAG.getConstant(ShAmt, DL, VT));
13420 }
13421 }
13422
13423 if (SDValue V = combineBinOpOfZExt(N, DAG))
13424 return V;
13425 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13426 return V;
13427
13428 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13429 // (select lhs, rhs, cc, x, (sub x, y))
13430 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13431}
13432
13433// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13434// Legalizing setcc can introduce xors like this. Doing this transform reduces
13435// the number of xors and may allow the xor to fold into a branch condition.
13437 SDValue N0 = N->getOperand(0);
13438 SDValue N1 = N->getOperand(1);
13439 bool IsAnd = N->getOpcode() == ISD::AND;
13440
13441 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13442 return SDValue();
13443
13444 if (!N0.hasOneUse() || !N1.hasOneUse())
13445 return SDValue();
13446
13447 SDValue N01 = N0.getOperand(1);
13448 SDValue N11 = N1.getOperand(1);
13449
13450 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13451 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13452 // operation is And, allow one of the Xors to use -1.
13453 if (isOneConstant(N01)) {
13454 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13455 return SDValue();
13456 } else if (isOneConstant(N11)) {
13457 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13458 if (!(IsAnd && isAllOnesConstant(N01)))
13459 return SDValue();
13460 } else
13461 return SDValue();
13462
13463 EVT VT = N->getValueType(0);
13464
13465 SDValue N00 = N0.getOperand(0);
13466 SDValue N10 = N1.getOperand(0);
13467
13468 // The LHS of the xors needs to be 0/1.
13470 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13471 return SDValue();
13472
13473 // Invert the opcode and insert a new xor.
13474 SDLoc DL(N);
13475 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13476 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13477 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13478}
13479
13480// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13481// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13482// value to an unsigned value. This will be lowered to vmax and series of
13483// vnclipu instructions later. This can be extended to other truncated types
13484// other than i8 by replacing 256 and 255 with the equivalent constants for the
13485// type.
13487 EVT VT = N->getValueType(0);
13488 SDValue N0 = N->getOperand(0);
13489 EVT SrcVT = N0.getValueType();
13490
13491 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13492 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13493 return SDValue();
13494
13495 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13496 return SDValue();
13497
13498 SDValue Cond = N0.getOperand(0);
13499 SDValue True = N0.getOperand(1);
13500 SDValue False = N0.getOperand(2);
13501
13502 if (Cond.getOpcode() != ISD::SETCC)
13503 return SDValue();
13504
13505 // FIXME: Support the version of this pattern with the select operands
13506 // swapped.
13507 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13508 if (CCVal != ISD::SETULT)
13509 return SDValue();
13510
13511 SDValue CondLHS = Cond.getOperand(0);
13512 SDValue CondRHS = Cond.getOperand(1);
13513
13514 if (CondLHS != True)
13515 return SDValue();
13516
13517 unsigned ScalarBits = VT.getScalarSizeInBits();
13518
13519 // FIXME: Support other constants.
13520 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13521 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13522 return SDValue();
13523
13524 if (False.getOpcode() != ISD::SIGN_EXTEND)
13525 return SDValue();
13526
13527 False = False.getOperand(0);
13528
13529 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13530 return SDValue();
13531
13532 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13533 if (!FalseRHSC || !FalseRHSC->isZero())
13534 return SDValue();
13535
13536 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13537 if (CCVal2 != ISD::SETGT)
13538 return SDValue();
13539
13540 // Emit the signed to unsigned saturation pattern.
13541 SDLoc DL(N);
13542 SDValue Max =
13543 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13544 SDValue Min =
13545 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13546 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13547 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13548}
13549
13551 const RISCVSubtarget &Subtarget) {
13552 SDValue N0 = N->getOperand(0);
13553 EVT VT = N->getValueType(0);
13554
13555 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13556 // extending X. This is safe since we only need the LSB after the shift and
13557 // shift amounts larger than 31 would produce poison. If we wait until
13558 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13559 // to use a BEXT instruction.
13560 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13561 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13562 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13563 SDLoc DL(N0);
13564 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13565 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13566 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13567 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13568 }
13569
13570 return combineTruncSelectToSMaxUSat(N, DAG);
13571}
13572
13573// Combines two comparison operation and logic operation to one selection
13574// operation(min, max) and logic operation. Returns new constructed Node if
13575// conditions for optimization are satisfied.
13578 const RISCVSubtarget &Subtarget) {
13579 SelectionDAG &DAG = DCI.DAG;
13580
13581 SDValue N0 = N->getOperand(0);
13582 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13583 // extending X. This is safe since we only need the LSB after the shift and
13584 // shift amounts larger than 31 would produce poison. If we wait until
13585 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13586 // to use a BEXT instruction.
13587 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13588 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13589 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13590 N0.hasOneUse()) {
13591 SDLoc DL(N);
13592 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13593 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13594 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13595 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13596 DAG.getConstant(1, DL, MVT::i64));
13597 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13598 }
13599
13600 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13601 return V;
13602 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13603 return V;
13604
13605 if (DCI.isAfterLegalizeDAG())
13606 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13607 return V;
13608
13609 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13610 // (select lhs, rhs, cc, x, (and x, y))
13611 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13612}
13613
13614// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13615// FIXME: Generalize to other binary operators with same operand.
13617 SelectionDAG &DAG) {
13618 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13619
13620 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13622 !N0.hasOneUse() || !N1.hasOneUse())
13623 return SDValue();
13624
13625 // Should have the same condition.
13626 SDValue Cond = N0.getOperand(1);
13627 if (Cond != N1.getOperand(1))
13628 return SDValue();
13629
13630 SDValue TrueV = N0.getOperand(0);
13631 SDValue FalseV = N1.getOperand(0);
13632
13633 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13634 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13635 !isOneConstant(TrueV.getOperand(1)) ||
13636 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13637 return SDValue();
13638
13639 EVT VT = N->getValueType(0);
13640 SDLoc DL(N);
13641
13642 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13643 Cond);
13644 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13645 Cond);
13646 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13647 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13648}
13649
13651 const RISCVSubtarget &Subtarget) {
13652 SelectionDAG &DAG = DCI.DAG;
13653
13654 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13655 return V;
13656 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13657 return V;
13658
13659 if (DCI.isAfterLegalizeDAG())
13660 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13661 return V;
13662
13663 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13664 // We may be able to pull a common operation out of the true and false value.
13665 SDValue N0 = N->getOperand(0);
13666 SDValue N1 = N->getOperand(1);
13667 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13668 return V;
13669 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13670 return V;
13671
13672 // fold (or (select cond, 0, y), x) ->
13673 // (select cond, x, (or x, y))
13674 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13675}
13676
13678 const RISCVSubtarget &Subtarget) {
13679 SDValue N0 = N->getOperand(0);
13680 SDValue N1 = N->getOperand(1);
13681
13682 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13683 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13684 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13685 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13686 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13687 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13688 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13689 SDLoc DL(N);
13690 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13691 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13692 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13693 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13694 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13695 }
13696
13697 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13698 // NOTE: Assumes ROL being legal means ROLW is legal.
13699 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13700 if (N0.getOpcode() == RISCVISD::SLLW &&
13702 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13703 SDLoc DL(N);
13704 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13705 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13706 }
13707
13708 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13709 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13710 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13711 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13712 if (ConstN00 && CC == ISD::SETLT) {
13713 EVT VT = N0.getValueType();
13714 SDLoc DL(N0);
13715 const APInt &Imm = ConstN00->getAPIntValue();
13716 if ((Imm + 1).isSignedIntN(12))
13717 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13718 DAG.getConstant(Imm + 1, DL, VT), CC);
13719 }
13720 }
13721
13722 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13723 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13724 // would have been promoted to i32, but the setcc would have i64 result.
13725 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13726 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13727 SDValue N00 = N0.getOperand(0);
13728 SDLoc DL(N);
13729 SDValue LHS = N00.getOperand(0);
13730 SDValue RHS = N00.getOperand(1);
13731 SDValue CC = N00.getOperand(2);
13732 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13733 LHS.getValueType());
13734 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13735 LHS, RHS, NotCC);
13736 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13737 }
13738
13739 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13740 return V;
13741 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13742 return V;
13743
13744 // fold (xor (select cond, 0, y), x) ->
13745 // (select cond, x, (xor x, y))
13746 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13747}
13748
13749// Try to expand a scalar multiply to a faster sequence.
13752 const RISCVSubtarget &Subtarget) {
13753
13754 EVT VT = N->getValueType(0);
13755
13756 // LI + MUL is usually smaller than the alternative sequence.
13758 return SDValue();
13759
13760 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13761 return SDValue();
13762
13763 if (VT != Subtarget.getXLenVT())
13764 return SDValue();
13765
13766 const bool HasShlAdd =
13767 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
13768
13769 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13770 if (!CNode)
13771 return SDValue();
13772 uint64_t MulAmt = CNode->getZExtValue();
13773
13774 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13775 // We're adding additional uses of X here, and in principle, we should be freezing
13776 // X before doing so. However, adding freeze here causes real regressions, and no
13777 // other target properly freezes X in these cases either.
13778 SDValue X = N->getOperand(0);
13779
13780 if (HasShlAdd) {
13781 for (uint64_t Divisor : {3, 5, 9}) {
13782 if (MulAmt % Divisor != 0)
13783 continue;
13784 uint64_t MulAmt2 = MulAmt / Divisor;
13785 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13786 if (isPowerOf2_64(MulAmt2)) {
13787 SDLoc DL(N);
13788 SDValue X = N->getOperand(0);
13789 // Put the shift first if we can fold a zext into the
13790 // shift forming a slli.uw.
13791 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13792 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13793 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13794 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13795 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13796 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
13797 Shl);
13798 }
13799 // Otherwise, put rhe shl second so that it can fold with following
13800 // instructions (e.g. sext or add).
13801 SDValue Mul359 =
13802 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13803 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13804 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13805 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13806 }
13807
13808 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13809 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13810 SDLoc DL(N);
13811 SDValue Mul359 =
13812 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13813 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13814 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13815 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13816 Mul359);
13817 }
13818 }
13819
13820 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13821 // shXadd. First check if this a sum of two power of 2s because that's
13822 // easy. Then count how many zeros are up to the first bit.
13823 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13824 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13825 if (ScaleShift >= 1 && ScaleShift < 4) {
13826 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13827 SDLoc DL(N);
13828 SDValue Shift1 =
13829 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13830 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13831 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13832 }
13833 }
13834
13835 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13836 // This is the two instruction form, there are also three instruction
13837 // variants we could implement. e.g.
13838 // (2^(1,2,3) * 3,5,9 + 1) << C2
13839 // 2^(C1>3) * 3,5,9 +/- 1
13840 for (uint64_t Divisor : {3, 5, 9}) {
13841 uint64_t C = MulAmt - 1;
13842 if (C <= Divisor)
13843 continue;
13844 unsigned TZ = llvm::countr_zero(C);
13845 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13846 SDLoc DL(N);
13847 SDValue Mul359 =
13848 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13849 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13850 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13851 DAG.getConstant(TZ, DL, VT), X);
13852 }
13853 }
13854
13855 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13856 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13857 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13858 if (ScaleShift >= 1 && ScaleShift < 4) {
13859 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13860 SDLoc DL(N);
13861 SDValue Shift1 =
13862 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13863 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13864 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13865 DAG.getConstant(ScaleShift, DL, VT), X));
13866 }
13867 }
13868
13869 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13870 for (uint64_t Offset : {3, 5, 9}) {
13871 if (isPowerOf2_64(MulAmt + Offset)) {
13872 SDLoc DL(N);
13873 SDValue Shift1 =
13874 DAG.getNode(ISD::SHL, DL, VT, X,
13875 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13876 SDValue Mul359 =
13877 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13878 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
13879 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13880 }
13881 }
13882 }
13883
13884 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
13885 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
13886 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
13887 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
13888 SDLoc DL(N);
13889 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13890 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
13891 SDValue Shift2 =
13892 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13893 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
13894 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
13895 }
13896
13897 return SDValue();
13898}
13899
13900// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
13901// (bitcast (sra (v2Xi16 (bitcast X)), 15))
13902// Same for other equivalent types with other equivalent constants.
13904 EVT VT = N->getValueType(0);
13905 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13906
13907 // Do this for legal vectors unless they are i1 or i8 vectors.
13908 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
13909 return SDValue();
13910
13911 if (N->getOperand(0).getOpcode() != ISD::AND ||
13912 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
13913 return SDValue();
13914
13915 SDValue And = N->getOperand(0);
13916 SDValue Srl = And.getOperand(0);
13917
13918 APInt V1, V2, V3;
13919 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
13920 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
13922 return SDValue();
13923
13924 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
13925 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
13926 V3 != (HalfSize - 1))
13927 return SDValue();
13928
13929 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
13930 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
13931 VT.getVectorElementCount() * 2);
13932 SDLoc DL(N);
13933 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
13934 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
13935 DAG.getConstant(HalfSize - 1, DL, HalfVT));
13936 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
13937}
13938
13941 const RISCVSubtarget &Subtarget) {
13942 EVT VT = N->getValueType(0);
13943 if (!VT.isVector())
13944 return expandMul(N, DAG, DCI, Subtarget);
13945
13946 SDLoc DL(N);
13947 SDValue N0 = N->getOperand(0);
13948 SDValue N1 = N->getOperand(1);
13949 SDValue MulOper;
13950 unsigned AddSubOpc;
13951
13952 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13953 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13954 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13955 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13956 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13957 AddSubOpc = V->getOpcode();
13958 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13959 SDValue Opnd = V->getOperand(1);
13960 MulOper = V->getOperand(0);
13961 if (AddSubOpc == ISD::SUB)
13962 std::swap(Opnd, MulOper);
13963 if (isOneOrOneSplat(Opnd))
13964 return true;
13965 }
13966 return false;
13967 };
13968
13969 if (IsAddSubWith1(N0)) {
13970 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13971 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13972 }
13973
13974 if (IsAddSubWith1(N1)) {
13975 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13976 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13977 }
13978
13979 if (SDValue V = combineBinOpOfZExt(N, DAG))
13980 return V;
13981
13983 return V;
13984
13985 return SDValue();
13986}
13987
13988/// According to the property that indexed load/store instructions zero-extend
13989/// their indices, try to narrow the type of index operand.
13990static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13991 if (isIndexTypeSigned(IndexType))
13992 return false;
13993
13994 if (!N->hasOneUse())
13995 return false;
13996
13997 EVT VT = N.getValueType();
13998 SDLoc DL(N);
13999
14000 // In general, what we're doing here is seeing if we can sink a truncate to
14001 // a smaller element type into the expression tree building our index.
14002 // TODO: We can generalize this and handle a bunch more cases if useful.
14003
14004 // Narrow a buildvector to the narrowest element type. This requires less
14005 // work and less register pressure at high LMUL, and creates smaller constants
14006 // which may be cheaper to materialize.
14007 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14008 KnownBits Known = DAG.computeKnownBits(N);
14009 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14010 LLVMContext &C = *DAG.getContext();
14011 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14012 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14013 N = DAG.getNode(ISD::TRUNCATE, DL,
14014 VT.changeVectorElementType(ResultVT), N);
14015 return true;
14016 }
14017 }
14018
14019 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14020 if (N.getOpcode() != ISD::SHL)
14021 return false;
14022
14023 SDValue N0 = N.getOperand(0);
14024 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14026 return false;
14027 if (!N0->hasOneUse())
14028 return false;
14029
14030 APInt ShAmt;
14031 SDValue N1 = N.getOperand(1);
14032 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14033 return false;
14034
14035 SDValue Src = N0.getOperand(0);
14036 EVT SrcVT = Src.getValueType();
14037 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14038 unsigned ShAmtV = ShAmt.getZExtValue();
14039 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14040 NewElen = std::max(NewElen, 8U);
14041
14042 // Skip if NewElen is not narrower than the original extended type.
14043 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14044 return false;
14045
14046 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14047 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14048
14049 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14050 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14051 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14052 return true;
14053}
14054
14055// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14056// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14057// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14058// can become a sext.w instead of a shift pair.
14060 const RISCVSubtarget &Subtarget) {
14061 SDValue N0 = N->getOperand(0);
14062 SDValue N1 = N->getOperand(1);
14063 EVT VT = N->getValueType(0);
14064 EVT OpVT = N0.getValueType();
14065
14066 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14067 return SDValue();
14068
14069 // RHS needs to be a constant.
14070 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14071 if (!N1C)
14072 return SDValue();
14073
14074 // LHS needs to be (and X, 0xffffffff).
14075 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14076 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14077 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14078 return SDValue();
14079
14080 // Looking for an equality compare.
14081 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14082 if (!isIntEqualitySetCC(Cond))
14083 return SDValue();
14084
14085 // Don't do this if the sign bit is provably zero, it will be turned back into
14086 // an AND.
14087 APInt SignMask = APInt::getOneBitSet(64, 31);
14088 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14089 return SDValue();
14090
14091 const APInt &C1 = N1C->getAPIntValue();
14092
14093 SDLoc dl(N);
14094 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14095 // to be equal.
14096 if (C1.getActiveBits() > 32)
14097 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14098
14099 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14100 N0.getOperand(0), DAG.getValueType(MVT::i32));
14101 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14102 dl, OpVT), Cond);
14103}
14104
14105static SDValue
14107 const RISCVSubtarget &Subtarget) {
14108 SDValue Src = N->getOperand(0);
14109 EVT VT = N->getValueType(0);
14110
14111 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14112 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14113 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
14114 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14115 Src.getOperand(0));
14116
14117 return SDValue();
14118}
14119
14120namespace {
14121// Forward declaration of the structure holding the necessary information to
14122// apply a combine.
14123struct CombineResult;
14124
14125enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14126/// Helper class for folding sign/zero extensions.
14127/// In particular, this class is used for the following combines:
14128/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14129/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14130/// mul | mul_vl -> vwmul(u) | vwmul_su
14131/// shl | shl_vl -> vwsll
14132/// fadd -> vfwadd | vfwadd_w
14133/// fsub -> vfwsub | vfwsub_w
14134/// fmul -> vfwmul
14135/// An object of this class represents an operand of the operation we want to
14136/// combine.
14137/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14138/// NodeExtensionHelper for `a` and one for `b`.
14139///
14140/// This class abstracts away how the extension is materialized and
14141/// how its number of users affect the combines.
14142///
14143/// In particular:
14144/// - VWADD_W is conceptually == add(op0, sext(op1))
14145/// - VWADDU_W == add(op0, zext(op1))
14146/// - VWSUB_W == sub(op0, sext(op1))
14147/// - VWSUBU_W == sub(op0, zext(op1))
14148/// - VFWADD_W == fadd(op0, fpext(op1))
14149/// - VFWSUB_W == fsub(op0, fpext(op1))
14150/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14151/// zext|sext(smaller_value).
14152struct NodeExtensionHelper {
14153 /// Records if this operand is like being zero extended.
14154 bool SupportsZExt;
14155 /// Records if this operand is like being sign extended.
14156 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14157 /// instance, a splat constant (e.g., 3), would support being both sign and
14158 /// zero extended.
14159 bool SupportsSExt;
14160 /// Records if this operand is like being floating-Point extended.
14161 bool SupportsFPExt;
14162 /// This boolean captures whether we care if this operand would still be
14163 /// around after the folding happens.
14164 bool EnforceOneUse;
14165 /// Original value that this NodeExtensionHelper represents.
14166 SDValue OrigOperand;
14167
14168 /// Get the value feeding the extension or the value itself.
14169 /// E.g., for zext(a), this would return a.
14170 SDValue getSource() const {
14171 switch (OrigOperand.getOpcode()) {
14172 case ISD::ZERO_EXTEND:
14173 case ISD::SIGN_EXTEND:
14174 case RISCVISD::VSEXT_VL:
14175 case RISCVISD::VZEXT_VL:
14177 return OrigOperand.getOperand(0);
14178 default:
14179 return OrigOperand;
14180 }
14181 }
14182
14183 /// Check if this instance represents a splat.
14184 bool isSplat() const {
14185 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14186 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14187 }
14188
14189 /// Get the extended opcode.
14190 unsigned getExtOpc(ExtKind SupportsExt) const {
14191 switch (SupportsExt) {
14192 case ExtKind::SExt:
14193 return RISCVISD::VSEXT_VL;
14194 case ExtKind::ZExt:
14195 return RISCVISD::VZEXT_VL;
14196 case ExtKind::FPExt:
14198 }
14199 llvm_unreachable("Unknown ExtKind enum");
14200 }
14201
14202 /// Get or create a value that can feed \p Root with the given extension \p
14203 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14204 /// operand. \see ::getSource().
14205 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14206 const RISCVSubtarget &Subtarget,
14207 std::optional<ExtKind> SupportsExt) const {
14208 if (!SupportsExt.has_value())
14209 return OrigOperand;
14210
14211 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14212
14213 SDValue Source = getSource();
14214 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
14215 if (Source.getValueType() == NarrowVT)
14216 return Source;
14217
14218 unsigned ExtOpc = getExtOpc(*SupportsExt);
14219
14220 // If we need an extension, we should be changing the type.
14221 SDLoc DL(OrigOperand);
14222 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
14223 switch (OrigOperand.getOpcode()) {
14224 case ISD::ZERO_EXTEND:
14225 case ISD::SIGN_EXTEND:
14226 case RISCVISD::VSEXT_VL:
14227 case RISCVISD::VZEXT_VL:
14229 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
14230 case ISD::SPLAT_VECTOR:
14231 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14233 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14234 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14236 Source = Source.getOperand(1);
14237 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
14238 Source = Source.getOperand(0);
14239 assert(Source.getValueType() == NarrowVT.getVectorElementType());
14240 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
14241 DAG.getUNDEF(NarrowVT), Source, VL);
14242 default:
14243 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
14244 // and that operand should already have the right NarrowVT so no
14245 // extension should be required at this point.
14246 llvm_unreachable("Unsupported opcode");
14247 }
14248 }
14249
14250 /// Helper function to get the narrow type for \p Root.
14251 /// The narrow type is the type of \p Root where we divided the size of each
14252 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14253 /// \pre Both the narrow type and the original type should be legal.
14254 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14255 MVT VT = Root->getSimpleValueType(0);
14256
14257 // Determine the narrow size.
14258 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14259
14260 MVT EltVT = SupportsExt == ExtKind::FPExt
14261 ? MVT::getFloatingPointVT(NarrowSize)
14262 : MVT::getIntegerVT(NarrowSize);
14263
14264 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14265 "Trying to extend something we can't represent");
14266 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14267 return NarrowVT;
14268 }
14269
14270 /// Get the opcode to materialize:
14271 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14272 static unsigned getSExtOpcode(unsigned Opcode) {
14273 switch (Opcode) {
14274 case ISD::ADD:
14275 case RISCVISD::ADD_VL:
14278 case ISD::OR:
14279 return RISCVISD::VWADD_VL;
14280 case ISD::SUB:
14281 case RISCVISD::SUB_VL:
14284 return RISCVISD::VWSUB_VL;
14285 case ISD::MUL:
14286 case RISCVISD::MUL_VL:
14287 return RISCVISD::VWMUL_VL;
14288 default:
14289 llvm_unreachable("Unexpected opcode");
14290 }
14291 }
14292
14293 /// Get the opcode to materialize:
14294 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14295 static unsigned getZExtOpcode(unsigned Opcode) {
14296 switch (Opcode) {
14297 case ISD::ADD:
14298 case RISCVISD::ADD_VL:
14301 case ISD::OR:
14302 return RISCVISD::VWADDU_VL;
14303 case ISD::SUB:
14304 case RISCVISD::SUB_VL:
14307 return RISCVISD::VWSUBU_VL;
14308 case ISD::MUL:
14309 case RISCVISD::MUL_VL:
14310 return RISCVISD::VWMULU_VL;
14311 case ISD::SHL:
14312 case RISCVISD::SHL_VL:
14313 return RISCVISD::VWSLL_VL;
14314 default:
14315 llvm_unreachable("Unexpected opcode");
14316 }
14317 }
14318
14319 /// Get the opcode to materialize:
14320 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14321 static unsigned getFPExtOpcode(unsigned Opcode) {
14322 switch (Opcode) {
14323 case RISCVISD::FADD_VL:
14325 return RISCVISD::VFWADD_VL;
14326 case RISCVISD::FSUB_VL:
14328 return RISCVISD::VFWSUB_VL;
14329 case RISCVISD::FMUL_VL:
14330 return RISCVISD::VFWMUL_VL;
14331 default:
14332 llvm_unreachable("Unexpected opcode");
14333 }
14334 }
14335
14336 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14337 /// newOpcode(a, b).
14338 static unsigned getSUOpcode(unsigned Opcode) {
14339 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14340 "SU is only supported for MUL");
14341 return RISCVISD::VWMULSU_VL;
14342 }
14343
14344 /// Get the opcode to materialize
14345 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14346 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14347 switch (Opcode) {
14348 case ISD::ADD:
14349 case RISCVISD::ADD_VL:
14350 case ISD::OR:
14351 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14353 case ISD::SUB:
14354 case RISCVISD::SUB_VL:
14355 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14357 case RISCVISD::FADD_VL:
14358 return RISCVISD::VFWADD_W_VL;
14359 case RISCVISD::FSUB_VL:
14360 return RISCVISD::VFWSUB_W_VL;
14361 default:
14362 llvm_unreachable("Unexpected opcode");
14363 }
14364 }
14365
14366 using CombineToTry = std::function<std::optional<CombineResult>(
14367 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14368 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14369 const RISCVSubtarget &)>;
14370
14371 /// Check if this node needs to be fully folded or extended for all users.
14372 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14373
14374 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14375 const RISCVSubtarget &Subtarget) {
14376 unsigned Opc = OrigOperand.getOpcode();
14377 MVT VT = OrigOperand.getSimpleValueType();
14378
14379 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14380 "Unexpected Opcode");
14381
14382 // The pasthru must be undef for tail agnostic.
14383 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14384 return;
14385
14386 // Get the scalar value.
14387 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14388 : OrigOperand.getOperand(1);
14389
14390 // See if we have enough sign bits or zero bits in the scalar to use a
14391 // widening opcode by splatting to smaller element size.
14392 unsigned EltBits = VT.getScalarSizeInBits();
14393 unsigned ScalarBits = Op.getValueSizeInBits();
14394 // If we're not getting all bits from the element, we need special handling.
14395 if (ScalarBits < EltBits) {
14396 // This should only occur on RV32.
14397 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
14398 !Subtarget.is64Bit() && "Unexpected splat");
14399 // vmv.v.x sign extends narrow inputs.
14400 SupportsSExt = true;
14401
14402 // If the input is positive, then sign extend is also zero extend.
14403 if (DAG.SignBitIsZero(Op))
14404 SupportsZExt = true;
14405
14406 EnforceOneUse = false;
14407 return;
14408 }
14409
14410 unsigned NarrowSize = EltBits / 2;
14411 // If the narrow type cannot be expressed with a legal VMV,
14412 // this is not a valid candidate.
14413 if (NarrowSize < 8)
14414 return;
14415
14416 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14417 SupportsSExt = true;
14418
14419 if (DAG.MaskedValueIsZero(Op,
14420 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14421 SupportsZExt = true;
14422
14423 EnforceOneUse = false;
14424 }
14425
14426 /// Helper method to set the various fields of this struct based on the
14427 /// type of \p Root.
14428 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14429 const RISCVSubtarget &Subtarget) {
14430 SupportsZExt = false;
14431 SupportsSExt = false;
14432 SupportsFPExt = false;
14433 EnforceOneUse = true;
14434 unsigned Opc = OrigOperand.getOpcode();
14435 // For the nodes we handle below, we end up using their inputs directly: see
14436 // getSource(). However since they either don't have a passthru or we check
14437 // that their passthru is undef, we can safely ignore their mask and VL.
14438 switch (Opc) {
14439 case ISD::ZERO_EXTEND:
14440 case ISD::SIGN_EXTEND: {
14441 MVT VT = OrigOperand.getSimpleValueType();
14442 if (!VT.isVector())
14443 break;
14444
14445 SDValue NarrowElt = OrigOperand.getOperand(0);
14446 MVT NarrowVT = NarrowElt.getSimpleValueType();
14447 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14448 if (NarrowVT.getVectorElementType() == MVT::i1)
14449 break;
14450
14451 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14452 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14453 break;
14454 }
14455 case RISCVISD::VZEXT_VL:
14456 SupportsZExt = true;
14457 break;
14458 case RISCVISD::VSEXT_VL:
14459 SupportsSExt = true;
14460 break;
14462 SupportsFPExt = true;
14463 break;
14464 case ISD::SPLAT_VECTOR:
14466 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14467 break;
14468 case RISCVISD::VFMV_V_F_VL: {
14469 MVT VT = OrigOperand.getSimpleValueType();
14470
14471 if (!OrigOperand.getOperand(0).isUndef())
14472 break;
14473
14474 SDValue Op = OrigOperand.getOperand(1);
14475 if (Op.getOpcode() != ISD::FP_EXTEND)
14476 break;
14477
14478 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14479 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
14480 if (NarrowSize != ScalarBits)
14481 break;
14482
14483 SupportsFPExt = true;
14484 break;
14485 }
14486 default:
14487 break;
14488 }
14489 }
14490
14491 /// Check if \p Root supports any extension folding combines.
14492 static bool isSupportedRoot(const SDNode *Root,
14493 const RISCVSubtarget &Subtarget) {
14494 switch (Root->getOpcode()) {
14495 case ISD::ADD:
14496 case ISD::SUB:
14497 case ISD::MUL: {
14498 return Root->getValueType(0).isScalableVector();
14499 }
14500 case ISD::OR: {
14501 return Root->getValueType(0).isScalableVector() &&
14502 Root->getFlags().hasDisjoint();
14503 }
14504 // Vector Widening Integer Add/Sub/Mul Instructions
14505 case RISCVISD::ADD_VL:
14506 case RISCVISD::MUL_VL:
14509 case RISCVISD::SUB_VL:
14512 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14513 case RISCVISD::FADD_VL:
14514 case RISCVISD::FSUB_VL:
14515 case RISCVISD::FMUL_VL:
14518 return true;
14519 case ISD::SHL:
14520 return Root->getValueType(0).isScalableVector() &&
14521 Subtarget.hasStdExtZvbb();
14522 case RISCVISD::SHL_VL:
14523 return Subtarget.hasStdExtZvbb();
14524 default:
14525 return false;
14526 }
14527 }
14528
14529 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14530 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14531 const RISCVSubtarget &Subtarget) {
14532 assert(isSupportedRoot(Root, Subtarget) &&
14533 "Trying to build an helper with an "
14534 "unsupported root");
14535 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14537 OrigOperand = Root->getOperand(OperandIdx);
14538
14539 unsigned Opc = Root->getOpcode();
14540 switch (Opc) {
14541 // We consider
14542 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14543 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14544 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14551 if (OperandIdx == 1) {
14552 SupportsZExt =
14554 SupportsSExt =
14556 SupportsFPExt =
14558 // There's no existing extension here, so we don't have to worry about
14559 // making sure it gets removed.
14560 EnforceOneUse = false;
14561 break;
14562 }
14563 [[fallthrough]];
14564 default:
14565 fillUpExtensionSupport(Root, DAG, Subtarget);
14566 break;
14567 }
14568 }
14569
14570 /// Helper function to get the Mask and VL from \p Root.
14571 static std::pair<SDValue, SDValue>
14572 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14573 const RISCVSubtarget &Subtarget) {
14574 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14575 switch (Root->getOpcode()) {
14576 case ISD::ADD:
14577 case ISD::SUB:
14578 case ISD::MUL:
14579 case ISD::OR:
14580 case ISD::SHL: {
14581 SDLoc DL(Root);
14582 MVT VT = Root->getSimpleValueType(0);
14583 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14584 }
14585 default:
14586 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14587 }
14588 }
14589
14590 /// Helper function to check if \p N is commutative with respect to the
14591 /// foldings that are supported by this class.
14592 static bool isCommutative(const SDNode *N) {
14593 switch (N->getOpcode()) {
14594 case ISD::ADD:
14595 case ISD::MUL:
14596 case ISD::OR:
14597 case RISCVISD::ADD_VL:
14598 case RISCVISD::MUL_VL:
14601 case RISCVISD::FADD_VL:
14602 case RISCVISD::FMUL_VL:
14604 return true;
14605 case ISD::SUB:
14606 case RISCVISD::SUB_VL:
14609 case RISCVISD::FSUB_VL:
14611 case ISD::SHL:
14612 case RISCVISD::SHL_VL:
14613 return false;
14614 default:
14615 llvm_unreachable("Unexpected opcode");
14616 }
14617 }
14618
14619 /// Get a list of combine to try for folding extensions in \p Root.
14620 /// Note that each returned CombineToTry function doesn't actually modify
14621 /// anything. Instead they produce an optional CombineResult that if not None,
14622 /// need to be materialized for the combine to be applied.
14623 /// \see CombineResult::materialize.
14624 /// If the related CombineToTry function returns std::nullopt, that means the
14625 /// combine didn't match.
14626 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14627};
14628
14629/// Helper structure that holds all the necessary information to materialize a
14630/// combine that does some extension folding.
14631struct CombineResult {
14632 /// Opcode to be generated when materializing the combine.
14633 unsigned TargetOpcode;
14634 // No value means no extension is needed.
14635 std::optional<ExtKind> LHSExt;
14636 std::optional<ExtKind> RHSExt;
14637 /// Root of the combine.
14638 SDNode *Root;
14639 /// LHS of the TargetOpcode.
14640 NodeExtensionHelper LHS;
14641 /// RHS of the TargetOpcode.
14642 NodeExtensionHelper RHS;
14643
14644 CombineResult(unsigned TargetOpcode, SDNode *Root,
14645 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14646 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14647 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14648 LHS(LHS), RHS(RHS) {}
14649
14650 /// Return a value that uses TargetOpcode and that can be used to replace
14651 /// Root.
14652 /// The actual replacement is *not* done in that method.
14653 SDValue materialize(SelectionDAG &DAG,
14654 const RISCVSubtarget &Subtarget) const {
14655 SDValue Mask, VL, Merge;
14656 std::tie(Mask, VL) =
14657 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14658 switch (Root->getOpcode()) {
14659 default:
14660 Merge = Root->getOperand(2);
14661 break;
14662 case ISD::ADD:
14663 case ISD::SUB:
14664 case ISD::MUL:
14665 case ISD::OR:
14666 case ISD::SHL:
14667 Merge = DAG.getUNDEF(Root->getValueType(0));
14668 break;
14669 }
14670 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14671 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14672 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14673 Merge, Mask, VL);
14674 }
14675};
14676
14677/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14678/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14679/// are zext) and LHS and RHS can be folded into Root.
14680/// AllowExtMask define which form `ext` can take in this pattern.
14681///
14682/// \note If the pattern can match with both zext and sext, the returned
14683/// CombineResult will feature the zext result.
14684///
14685/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14686/// can be used to apply the pattern.
14687static std::optional<CombineResult>
14688canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14689 const NodeExtensionHelper &RHS,
14690 uint8_t AllowExtMask, SelectionDAG &DAG,
14691 const RISCVSubtarget &Subtarget) {
14692 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14693 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14694 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14695 /*RHSExt=*/{ExtKind::ZExt});
14696 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14697 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14698 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14699 /*RHSExt=*/{ExtKind::SExt});
14700 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14701 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14702 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14703 /*RHSExt=*/{ExtKind::FPExt});
14704 return std::nullopt;
14705}
14706
14707/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14708/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14709/// are zext) and LHS and RHS can be folded into Root.
14710///
14711/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14712/// can be used to apply the pattern.
14713static std::optional<CombineResult>
14714canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14715 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14716 const RISCVSubtarget &Subtarget) {
14717 return canFoldToVWWithSameExtensionImpl(
14718 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14719 Subtarget);
14720}
14721
14722/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14723///
14724/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14725/// can be used to apply the pattern.
14726static std::optional<CombineResult>
14727canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14728 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14729 const RISCVSubtarget &Subtarget) {
14730 if (RHS.SupportsFPExt)
14731 return CombineResult(
14732 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14733 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14734
14735 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14736 // sext/zext?
14737 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14738 // purposes.
14739 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14740 return CombineResult(
14741 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14742 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14743 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14744 return CombineResult(
14745 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14746 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14747 return std::nullopt;
14748}
14749
14750/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14751///
14752/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14753/// can be used to apply the pattern.
14754static std::optional<CombineResult>
14755canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14756 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14757 const RISCVSubtarget &Subtarget) {
14758 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14759 Subtarget);
14760}
14761
14762/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14763///
14764/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14765/// can be used to apply the pattern.
14766static std::optional<CombineResult>
14767canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14768 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14769 const RISCVSubtarget &Subtarget) {
14770 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14771 Subtarget);
14772}
14773
14774/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14775///
14776/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14777/// can be used to apply the pattern.
14778static std::optional<CombineResult>
14779canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14780 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14781 const RISCVSubtarget &Subtarget) {
14782 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14783 Subtarget);
14784}
14785
14786/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14787///
14788/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14789/// can be used to apply the pattern.
14790static std::optional<CombineResult>
14791canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14792 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14793 const RISCVSubtarget &Subtarget) {
14794
14795 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14796 return std::nullopt;
14797 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14798 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14799 /*RHSExt=*/{ExtKind::ZExt});
14800}
14801
14803NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14804 SmallVector<CombineToTry> Strategies;
14805 switch (Root->getOpcode()) {
14806 case ISD::ADD:
14807 case ISD::SUB:
14808 case ISD::OR:
14809 case RISCVISD::ADD_VL:
14810 case RISCVISD::SUB_VL:
14811 case RISCVISD::FADD_VL:
14812 case RISCVISD::FSUB_VL:
14813 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14814 Strategies.push_back(canFoldToVWWithSameExtension);
14815 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14816 Strategies.push_back(canFoldToVW_W);
14817 break;
14818 case RISCVISD::FMUL_VL:
14819 Strategies.push_back(canFoldToVWWithSameExtension);
14820 break;
14821 case ISD::MUL:
14822 case RISCVISD::MUL_VL:
14823 // mul -> vwmul(u)
14824 Strategies.push_back(canFoldToVWWithSameExtension);
14825 // mul -> vwmulsu
14826 Strategies.push_back(canFoldToVW_SU);
14827 break;
14828 case ISD::SHL:
14829 case RISCVISD::SHL_VL:
14830 // shl -> vwsll
14831 Strategies.push_back(canFoldToVWWithZEXT);
14832 break;
14835 // vwadd_w|vwsub_w -> vwadd|vwsub
14836 Strategies.push_back(canFoldToVWWithSEXT);
14837 break;
14840 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14841 Strategies.push_back(canFoldToVWWithZEXT);
14842 break;
14845 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14846 Strategies.push_back(canFoldToVWWithFPEXT);
14847 break;
14848 default:
14849 llvm_unreachable("Unexpected opcode");
14850 }
14851 return Strategies;
14852}
14853} // End anonymous namespace.
14854
14855/// Combine a binary operation to its equivalent VW or VW_W form.
14856/// The supported combines are:
14857/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14858/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14859/// mul | mul_vl -> vwmul(u) | vwmul_su
14860/// shl | shl_vl -> vwsll
14861/// fadd_vl -> vfwadd | vfwadd_w
14862/// fsub_vl -> vfwsub | vfwsub_w
14863/// fmul_vl -> vfwmul
14864/// vwadd_w(u) -> vwadd(u)
14865/// vwsub_w(u) -> vwsub(u)
14866/// vfwadd_w -> vfwadd
14867/// vfwsub_w -> vfwsub
14870 const RISCVSubtarget &Subtarget) {
14871 SelectionDAG &DAG = DCI.DAG;
14872 if (DCI.isBeforeLegalize())
14873 return SDValue();
14874
14875 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14876 return SDValue();
14877
14878 SmallVector<SDNode *> Worklist;
14879 SmallSet<SDNode *, 8> Inserted;
14880 Worklist.push_back(N);
14881 Inserted.insert(N);
14882 SmallVector<CombineResult> CombinesToApply;
14883
14884 while (!Worklist.empty()) {
14885 SDNode *Root = Worklist.pop_back_val();
14886 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14887 return SDValue();
14888
14889 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
14890 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
14891 auto AppendUsersIfNeeded = [&Worklist,
14892 &Inserted](const NodeExtensionHelper &Op) {
14893 if (Op.needToPromoteOtherUsers()) {
14894 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14895 if (Inserted.insert(TheUse).second)
14896 Worklist.push_back(TheUse);
14897 }
14898 }
14899 };
14900
14901 // Control the compile time by limiting the number of node we look at in
14902 // total.
14903 if (Inserted.size() > ExtensionMaxWebSize)
14904 return SDValue();
14905
14907 NodeExtensionHelper::getSupportedFoldings(Root);
14908
14909 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14910 bool Matched = false;
14911 for (int Attempt = 0;
14912 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
14913 ++Attempt) {
14914
14915 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14916 FoldingStrategies) {
14917 std::optional<CombineResult> Res =
14918 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
14919 if (Res) {
14920 Matched = true;
14921 CombinesToApply.push_back(*Res);
14922 // All the inputs that are extended need to be folded, otherwise
14923 // we would be leaving the old input (since it is may still be used),
14924 // and the new one.
14925 if (Res->LHSExt.has_value())
14926 AppendUsersIfNeeded(LHS);
14927 if (Res->RHSExt.has_value())
14928 AppendUsersIfNeeded(RHS);
14929 break;
14930 }
14931 }
14932 std::swap(LHS, RHS);
14933 }
14934 // Right now we do an all or nothing approach.
14935 if (!Matched)
14936 return SDValue();
14937 }
14938 // Store the value for the replacement of the input node separately.
14939 SDValue InputRootReplacement;
14940 // We do the RAUW after we materialize all the combines, because some replaced
14941 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14942 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14943 // yet-to-be-visited CombinesToApply roots.
14945 ValuesToReplace.reserve(CombinesToApply.size());
14946 for (CombineResult Res : CombinesToApply) {
14947 SDValue NewValue = Res.materialize(DAG, Subtarget);
14948 if (!InputRootReplacement) {
14949 assert(Res.Root == N &&
14950 "First element is expected to be the current node");
14951 InputRootReplacement = NewValue;
14952 } else {
14953 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14954 }
14955 }
14956 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14957 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14958 DCI.AddToWorklist(OldNewValues.second.getNode());
14959 }
14960 return InputRootReplacement;
14961}
14962
14963// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14964// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14965// y will be the Passthru and cond will be the Mask.
14967 unsigned Opc = N->getOpcode();
14970
14971 SDValue Y = N->getOperand(0);
14972 SDValue MergeOp = N->getOperand(1);
14973 unsigned MergeOpc = MergeOp.getOpcode();
14974
14975 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14976 return SDValue();
14977
14978 SDValue X = MergeOp->getOperand(1);
14979
14980 if (!MergeOp.hasOneUse())
14981 return SDValue();
14982
14983 // Passthru should be undef
14984 SDValue Passthru = N->getOperand(2);
14985 if (!Passthru.isUndef())
14986 return SDValue();
14987
14988 // Mask should be all ones
14989 SDValue Mask = N->getOperand(3);
14990 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14991 return SDValue();
14992
14993 // False value of MergeOp should be all zeros
14994 SDValue Z = MergeOp->getOperand(2);
14995
14996 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14997 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14998 Z = Z.getOperand(1);
14999
15000 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15001 return SDValue();
15002
15003 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15004 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15005 N->getFlags());
15006}
15007
15010 const RISCVSubtarget &Subtarget) {
15011 [[maybe_unused]] unsigned Opc = N->getOpcode();
15014
15015 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15016 return V;
15017
15018 return combineVWADDSUBWSelect(N, DCI.DAG);
15019}
15020
15021// Helper function for performMemPairCombine.
15022// Try to combine the memory loads/stores LSNode1 and LSNode2
15023// into a single memory pair operation.
15025 LSBaseSDNode *LSNode2, SDValue BasePtr,
15026 uint64_t Imm) {
15028 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15029
15030 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15031 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15032 return SDValue();
15033
15035 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15036
15037 // The new operation has twice the width.
15038 MVT XLenVT = Subtarget.getXLenVT();
15039 EVT MemVT = LSNode1->getMemoryVT();
15040 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15041 MachineMemOperand *MMO = LSNode1->getMemOperand();
15043 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15044
15045 if (LSNode1->getOpcode() == ISD::LOAD) {
15046 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15047 unsigned Opcode;
15048 if (MemVT == MVT::i32)
15049 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15050 else
15051 Opcode = RISCVISD::TH_LDD;
15052
15053 SDValue Res = DAG.getMemIntrinsicNode(
15054 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15055 {LSNode1->getChain(), BasePtr,
15056 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15057 NewMemVT, NewMMO);
15058
15059 SDValue Node1 =
15060 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15061 SDValue Node2 =
15062 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15063
15064 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15065 return Node1;
15066 } else {
15067 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15068
15069 SDValue Res = DAG.getMemIntrinsicNode(
15070 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15071 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15072 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15073 NewMemVT, NewMMO);
15074
15075 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15076 return Res;
15077 }
15078}
15079
15080// Try to combine two adjacent loads/stores to a single pair instruction from
15081// the XTHeadMemPair vendor extension.
15084 SelectionDAG &DAG = DCI.DAG;
15086 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15087
15088 // Target does not support load/store pair.
15089 if (!Subtarget.hasVendorXTHeadMemPair())
15090 return SDValue();
15091
15092 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15093 EVT MemVT = LSNode1->getMemoryVT();
15094 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15095
15096 // No volatile, indexed or atomic loads/stores.
15097 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15098 return SDValue();
15099
15100 // Function to get a base + constant representation from a memory value.
15101 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15102 if (Ptr->getOpcode() == ISD::ADD)
15103 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15104 return {Ptr->getOperand(0), C1->getZExtValue()};
15105 return {Ptr, 0};
15106 };
15107
15108 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15109
15110 SDValue Chain = N->getOperand(0);
15111 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
15112 UI != UE; ++UI) {
15113 SDUse &Use = UI.getUse();
15114 if (Use.getUser() != N && Use.getResNo() == 0 &&
15115 Use.getUser()->getOpcode() == N->getOpcode()) {
15116 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
15117
15118 // No volatile, indexed or atomic loads/stores.
15119 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15120 continue;
15121
15122 // Check if LSNode1 and LSNode2 have the same type and extension.
15123 if (LSNode1->getOpcode() == ISD::LOAD)
15124 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15125 cast<LoadSDNode>(LSNode1)->getExtensionType())
15126 continue;
15127
15128 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15129 continue;
15130
15131 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15132
15133 // Check if the base pointer is the same for both instruction.
15134 if (Base1 != Base2)
15135 continue;
15136
15137 // Check if the offsets match the XTHeadMemPair encoding contraints.
15138 bool Valid = false;
15139 if (MemVT == MVT::i32) {
15140 // Check for adjacent i32 values and a 2-bit index.
15141 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15142 Valid = true;
15143 } else if (MemVT == MVT::i64) {
15144 // Check for adjacent i64 values and a 2-bit index.
15145 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15146 Valid = true;
15147 }
15148
15149 if (!Valid)
15150 continue;
15151
15152 // Try to combine.
15153 if (SDValue Res =
15154 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15155 return Res;
15156 }
15157 }
15158
15159 return SDValue();
15160}
15161
15162// Fold
15163// (fp_to_int (froundeven X)) -> fcvt X, rne
15164// (fp_to_int (ftrunc X)) -> fcvt X, rtz
15165// (fp_to_int (ffloor X)) -> fcvt X, rdn
15166// (fp_to_int (fceil X)) -> fcvt X, rup
15167// (fp_to_int (fround X)) -> fcvt X, rmm
15168// (fp_to_int (frint X)) -> fcvt X
15171 const RISCVSubtarget &Subtarget) {
15172 SelectionDAG &DAG = DCI.DAG;
15173 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15174 MVT XLenVT = Subtarget.getXLenVT();
15175
15176 SDValue Src = N->getOperand(0);
15177
15178 // Don't do this for strict-fp Src.
15179 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15180 return SDValue();
15181
15182 // Ensure the FP type is legal.
15183 if (!TLI.isTypeLegal(Src.getValueType()))
15184 return SDValue();
15185
15186 // Don't do this for f16 with Zfhmin and not Zfh.
15187 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15188 return SDValue();
15189
15190 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15191 // If the result is invalid, we didn't find a foldable instruction.
15192 if (FRM == RISCVFPRndMode::Invalid)
15193 return SDValue();
15194
15195 SDLoc DL(N);
15196 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
15197 EVT VT = N->getValueType(0);
15198
15199 if (VT.isVector() && TLI.isTypeLegal(VT)) {
15200 MVT SrcVT = Src.getSimpleValueType();
15201 MVT SrcContainerVT = SrcVT;
15202 MVT ContainerVT = VT.getSimpleVT();
15203 SDValue XVal = Src.getOperand(0);
15204
15205 // For widening and narrowing conversions we just combine it into a
15206 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
15207 // end up getting lowered to their appropriate pseudo instructions based on
15208 // their operand types
15209 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
15210 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
15211 return SDValue();
15212
15213 // Make fixed-length vectors scalable first
15214 if (SrcVT.isFixedLengthVector()) {
15215 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
15216 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
15217 ContainerVT =
15218 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
15219 }
15220
15221 auto [Mask, VL] =
15222 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
15223
15224 SDValue FpToInt;
15225 if (FRM == RISCVFPRndMode::RTZ) {
15226 // Use the dedicated trunc static rounding mode if we're truncating so we
15227 // don't need to generate calls to fsrmi/fsrm
15228 unsigned Opc =
15230 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15231 } else if (FRM == RISCVFPRndMode::DYN) {
15232 unsigned Opc =
15234 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15235 } else {
15236 unsigned Opc =
15238 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
15239 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
15240 }
15241
15242 // If converted from fixed-length to scalable, convert back
15243 if (VT.isFixedLengthVector())
15244 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
15245
15246 return FpToInt;
15247 }
15248
15249 // Only handle XLen or i32 types. Other types narrower than XLen will
15250 // eventually be legalized to XLenVT.
15251 if (VT != MVT::i32 && VT != XLenVT)
15252 return SDValue();
15253
15254 unsigned Opc;
15255 if (VT == XLenVT)
15256 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15257 else
15259
15260 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15261 DAG.getTargetConstant(FRM, DL, XLenVT));
15262 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15263}
15264
15265// Fold
15266// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15267// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15268// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15269// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15270// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15271// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15274 const RISCVSubtarget &Subtarget) {
15275 SelectionDAG &DAG = DCI.DAG;
15276 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15277 MVT XLenVT = Subtarget.getXLenVT();
15278
15279 // Only handle XLen types. Other types narrower than XLen will eventually be
15280 // legalized to XLenVT.
15281 EVT DstVT = N->getValueType(0);
15282 if (DstVT != XLenVT)
15283 return SDValue();
15284
15285 SDValue Src = N->getOperand(0);
15286
15287 // Don't do this for strict-fp Src.
15288 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15289 return SDValue();
15290
15291 // Ensure the FP type is also legal.
15292 if (!TLI.isTypeLegal(Src.getValueType()))
15293 return SDValue();
15294
15295 // Don't do this for f16 with Zfhmin and not Zfh.
15296 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15297 return SDValue();
15298
15299 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15300
15301 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15302 if (FRM == RISCVFPRndMode::Invalid)
15303 return SDValue();
15304
15305 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15306
15307 unsigned Opc;
15308 if (SatVT == DstVT)
15309 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15310 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15312 else
15313 return SDValue();
15314 // FIXME: Support other SatVTs by clamping before or after the conversion.
15315
15316 Src = Src.getOperand(0);
15317
15318 SDLoc DL(N);
15319 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15320 DAG.getTargetConstant(FRM, DL, XLenVT));
15321
15322 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15323 // extend.
15324 if (Opc == RISCVISD::FCVT_WU_RV64)
15325 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15326
15327 // RISC-V FP-to-int conversions saturate to the destination register size, but
15328 // don't produce 0 for nan.
15329 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15330 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15331}
15332
15333// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15334// smaller than XLenVT.
15336 const RISCVSubtarget &Subtarget) {
15337 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15338
15339 SDValue Src = N->getOperand(0);
15340 if (Src.getOpcode() != ISD::BSWAP)
15341 return SDValue();
15342
15343 EVT VT = N->getValueType(0);
15344 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15345 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15346 return SDValue();
15347
15348 SDLoc DL(N);
15349 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15350}
15351
15352// Convert from one FMA opcode to another based on whether we are negating the
15353// multiply result and/or the accumulator.
15354// NOTE: Only supports RVV operations with VL.
15355static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15356 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15357 if (NegMul) {
15358 // clang-format off
15359 switch (Opcode) {
15360 default: llvm_unreachable("Unexpected opcode");
15361 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15362 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15363 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15364 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15369 }
15370 // clang-format on
15371 }
15372
15373 // Negating the accumulator changes ADD<->SUB.
15374 if (NegAcc) {
15375 // clang-format off
15376 switch (Opcode) {
15377 default: llvm_unreachable("Unexpected opcode");
15378 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15379 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15380 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15381 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15386 }
15387 // clang-format on
15388 }
15389
15390 return Opcode;
15391}
15392
15394 // Fold FNEG_VL into FMA opcodes.
15395 // The first operand of strict-fp is chain.
15396 unsigned Offset = N->isTargetStrictFPOpcode();
15397 SDValue A = N->getOperand(0 + Offset);
15398 SDValue B = N->getOperand(1 + Offset);
15399 SDValue C = N->getOperand(2 + Offset);
15400 SDValue Mask = N->getOperand(3 + Offset);
15401 SDValue VL = N->getOperand(4 + Offset);
15402
15403 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15404 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15405 V.getOperand(2) == VL) {
15406 // Return the negated input.
15407 V = V.getOperand(0);
15408 return true;
15409 }
15410
15411 return false;
15412 };
15413
15414 bool NegA = invertIfNegative(A);
15415 bool NegB = invertIfNegative(B);
15416 bool NegC = invertIfNegative(C);
15417
15418 // If no operands are negated, we're done.
15419 if (!NegA && !NegB && !NegC)
15420 return SDValue();
15421
15422 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15423 if (N->isTargetStrictFPOpcode())
15424 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15425 {N->getOperand(0), A, B, C, Mask, VL});
15426 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15427 VL);
15428}
15429
15431 const RISCVSubtarget &Subtarget) {
15433 return V;
15434
15435 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
15436 !Subtarget.hasVInstructionsF16())
15437 return SDValue();
15438
15439 // FIXME: Ignore strict opcodes for now.
15440 if (N->isTargetStrictFPOpcode())
15441 return SDValue();
15442
15443 // Try to form widening FMA.
15444 SDValue Op0 = N->getOperand(0);
15445 SDValue Op1 = N->getOperand(1);
15446 SDValue Mask = N->getOperand(3);
15447 SDValue VL = N->getOperand(4);
15448
15449 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
15451 return SDValue();
15452
15453 // TODO: Refactor to handle more complex cases similar to
15454 // combineBinOp_VLToVWBinOp_VL.
15455 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
15456 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
15457 return SDValue();
15458
15459 // Check the mask and VL are the same.
15460 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15461 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15462 return SDValue();
15463
15464 unsigned NewOpc;
15465 switch (N->getOpcode()) {
15466 default:
15467 llvm_unreachable("Unexpected opcode");
15469 NewOpc = RISCVISD::VFWMADD_VL;
15470 break;
15472 NewOpc = RISCVISD::VFWNMSUB_VL;
15473 break;
15475 NewOpc = RISCVISD::VFWNMADD_VL;
15476 break;
15478 NewOpc = RISCVISD::VFWMSUB_VL;
15479 break;
15480 }
15481
15482 Op0 = Op0.getOperand(0);
15483 Op1 = Op1.getOperand(0);
15484
15485 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15486 N->getOperand(2), Mask, VL);
15487}
15488
15490 const RISCVSubtarget &Subtarget) {
15491 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15492
15493 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15494 return SDValue();
15495
15496 if (!isa<ConstantSDNode>(N->getOperand(1)))
15497 return SDValue();
15498 uint64_t ShAmt = N->getConstantOperandVal(1);
15499 if (ShAmt > 32)
15500 return SDValue();
15501
15502 SDValue N0 = N->getOperand(0);
15503
15504 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15505 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15506 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15507 if (ShAmt < 32 &&
15508 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15509 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15510 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15511 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15512 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15513 if (LShAmt < 32) {
15514 SDLoc ShlDL(N0.getOperand(0));
15515 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15516 N0.getOperand(0).getOperand(0),
15517 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15518 SDLoc DL(N);
15519 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15520 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15521 }
15522 }
15523
15524 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15525 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15526 //
15527 // Also try these folds where an add or sub is in the middle.
15528 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15529 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15530 SDValue Shl;
15531 ConstantSDNode *AddC = nullptr;
15532
15533 // We might have an ADD or SUB between the SRA and SHL.
15534 bool IsAdd = N0.getOpcode() == ISD::ADD;
15535 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15536 // Other operand needs to be a constant we can modify.
15537 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15538 if (!AddC)
15539 return SDValue();
15540
15541 // AddC needs to have at least 32 trailing zeros.
15542 if (AddC->getAPIntValue().countr_zero() < 32)
15543 return SDValue();
15544
15545 // All users should be a shift by constant less than or equal to 32. This
15546 // ensures we'll do this optimization for each of them to produce an
15547 // add/sub+sext_inreg they can all share.
15548 for (SDNode *U : N0->uses()) {
15549 if (U->getOpcode() != ISD::SRA ||
15550 !isa<ConstantSDNode>(U->getOperand(1)) ||
15551 U->getConstantOperandVal(1) > 32)
15552 return SDValue();
15553 }
15554
15555 Shl = N0.getOperand(IsAdd ? 0 : 1);
15556 } else {
15557 // Not an ADD or SUB.
15558 Shl = N0;
15559 }
15560
15561 // Look for a shift left by 32.
15562 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15563 Shl.getConstantOperandVal(1) != 32)
15564 return SDValue();
15565
15566 // We if we didn't look through an add/sub, then the shl should have one use.
15567 // If we did look through an add/sub, the sext_inreg we create is free so
15568 // we're only creating 2 new instructions. It's enough to only remove the
15569 // original sra+add/sub.
15570 if (!AddC && !Shl.hasOneUse())
15571 return SDValue();
15572
15573 SDLoc DL(N);
15574 SDValue In = Shl.getOperand(0);
15575
15576 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15577 // constant.
15578 if (AddC) {
15579 SDValue ShiftedAddC =
15580 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15581 if (IsAdd)
15582 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15583 else
15584 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15585 }
15586
15587 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15588 DAG.getValueType(MVT::i32));
15589 if (ShAmt == 32)
15590 return SExt;
15591
15592 return DAG.getNode(
15593 ISD::SHL, DL, MVT::i64, SExt,
15594 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15595}
15596
15597// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15598// the result is used as the conditon of a br_cc or select_cc we can invert,
15599// inverting the setcc is free, and Z is 0/1. Caller will invert the
15600// br_cc/select_cc.
15602 bool IsAnd = Cond.getOpcode() == ISD::AND;
15603 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15604 return SDValue();
15605
15606 if (!Cond.hasOneUse())
15607 return SDValue();
15608
15609 SDValue Setcc = Cond.getOperand(0);
15610 SDValue Xor = Cond.getOperand(1);
15611 // Canonicalize setcc to LHS.
15612 if (Setcc.getOpcode() != ISD::SETCC)
15613 std::swap(Setcc, Xor);
15614 // LHS should be a setcc and RHS should be an xor.
15615 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15616 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15617 return SDValue();
15618
15619 // If the condition is an And, SimplifyDemandedBits may have changed
15620 // (xor Z, 1) to (not Z).
15621 SDValue Xor1 = Xor.getOperand(1);
15622 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15623 return SDValue();
15624
15625 EVT VT = Cond.getValueType();
15626 SDValue Xor0 = Xor.getOperand(0);
15627
15628 // The LHS of the xor needs to be 0/1.
15630 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15631 return SDValue();
15632
15633 // We can only invert integer setccs.
15634 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15635 if (!SetCCOpVT.isScalarInteger())
15636 return SDValue();
15637
15638 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15639 if (ISD::isIntEqualitySetCC(CCVal)) {
15640 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15641 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15642 Setcc.getOperand(1), CCVal);
15643 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15644 // Invert (setlt 0, X) by converting to (setlt X, 1).
15645 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15646 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15647 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15648 // (setlt X, 1) by converting to (setlt 0, X).
15649 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15650 DAG.getConstant(0, SDLoc(Setcc), VT),
15651 Setcc.getOperand(0), CCVal);
15652 } else
15653 return SDValue();
15654
15655 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15656 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15657}
15658
15659// Perform common combines for BR_CC and SELECT_CC condtions.
15660static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15661 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15662 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15663
15664 // As far as arithmetic right shift always saves the sign,
15665 // shift can be omitted.
15666 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15667 // setge (sra X, N), 0 -> setge X, 0
15668 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15669 LHS.getOpcode() == ISD::SRA) {
15670 LHS = LHS.getOperand(0);
15671 return true;
15672 }
15673
15674 if (!ISD::isIntEqualitySetCC(CCVal))
15675 return false;
15676
15677 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15678 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15679 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15680 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15681 // If we're looking for eq 0 instead of ne 0, we need to invert the
15682 // condition.
15683 bool Invert = CCVal == ISD::SETEQ;
15684 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15685 if (Invert)
15686 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15687
15688 RHS = LHS.getOperand(1);
15689 LHS = LHS.getOperand(0);
15690 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15691
15692 CC = DAG.getCondCode(CCVal);
15693 return true;
15694 }
15695
15696 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15697 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15698 RHS = LHS.getOperand(1);
15699 LHS = LHS.getOperand(0);
15700 return true;
15701 }
15702
15703 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15704 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15705 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15706 SDValue LHS0 = LHS.getOperand(0);
15707 if (LHS0.getOpcode() == ISD::AND &&
15708 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15709 uint64_t Mask = LHS0.getConstantOperandVal(1);
15710 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15711 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15712 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15713 CC = DAG.getCondCode(CCVal);
15714
15715 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15716 LHS = LHS0.getOperand(0);
15717 if (ShAmt != 0)
15718 LHS =
15719 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15720 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15721 return true;
15722 }
15723 }
15724 }
15725
15726 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15727 // This can occur when legalizing some floating point comparisons.
15728 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15729 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15730 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15731 CC = DAG.getCondCode(CCVal);
15732 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15733 return true;
15734 }
15735
15736 if (isNullConstant(RHS)) {
15737 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15738 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15739 CC = DAG.getCondCode(CCVal);
15740 LHS = NewCond;
15741 return true;
15742 }
15743 }
15744
15745 return false;
15746}
15747
15748// Fold
15749// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15750// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15751// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15752// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15754 SDValue TrueVal, SDValue FalseVal,
15755 bool Swapped) {
15756 bool Commutative = true;
15757 unsigned Opc = TrueVal.getOpcode();
15758 switch (Opc) {
15759 default:
15760 return SDValue();
15761 case ISD::SHL:
15762 case ISD::SRA:
15763 case ISD::SRL:
15764 case ISD::SUB:
15765 Commutative = false;
15766 break;
15767 case ISD::ADD:
15768 case ISD::OR:
15769 case ISD::XOR:
15770 break;
15771 }
15772
15773 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15774 return SDValue();
15775
15776 unsigned OpToFold;
15777 if (FalseVal == TrueVal.getOperand(0))
15778 OpToFold = 0;
15779 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15780 OpToFold = 1;
15781 else
15782 return SDValue();
15783
15784 EVT VT = N->getValueType(0);
15785 SDLoc DL(N);
15786 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15787 EVT OtherOpVT = OtherOp.getValueType();
15788 SDValue IdentityOperand =
15789 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15790 if (!Commutative)
15791 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15792 assert(IdentityOperand && "No identity operand!");
15793
15794 if (Swapped)
15795 std::swap(OtherOp, IdentityOperand);
15796 SDValue NewSel =
15797 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15798 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15799}
15800
15801// This tries to get rid of `select` and `icmp` that are being used to handle
15802// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15804 SDValue Cond = N->getOperand(0);
15805
15806 // This represents either CTTZ or CTLZ instruction.
15807 SDValue CountZeroes;
15808
15809 SDValue ValOnZero;
15810
15811 if (Cond.getOpcode() != ISD::SETCC)
15812 return SDValue();
15813
15814 if (!isNullConstant(Cond->getOperand(1)))
15815 return SDValue();
15816
15817 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15818 if (CCVal == ISD::CondCode::SETEQ) {
15819 CountZeroes = N->getOperand(2);
15820 ValOnZero = N->getOperand(1);
15821 } else if (CCVal == ISD::CondCode::SETNE) {
15822 CountZeroes = N->getOperand(1);
15823 ValOnZero = N->getOperand(2);
15824 } else {
15825 return SDValue();
15826 }
15827
15828 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15829 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15830 CountZeroes = CountZeroes.getOperand(0);
15831
15832 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15833 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15834 CountZeroes.getOpcode() != ISD::CTLZ &&
15835 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15836 return SDValue();
15837
15838 if (!isNullConstant(ValOnZero))
15839 return SDValue();
15840
15841 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15842 if (Cond->getOperand(0) != CountZeroesArgument)
15843 return SDValue();
15844
15845 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15846 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15847 CountZeroes.getValueType(), CountZeroesArgument);
15848 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15849 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15850 CountZeroes.getValueType(), CountZeroesArgument);
15851 }
15852
15853 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15854 SDValue BitWidthMinusOne =
15855 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15856
15857 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15858 CountZeroes, BitWidthMinusOne);
15859 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15860}
15861
15863 const RISCVSubtarget &Subtarget) {
15864 SDValue Cond = N->getOperand(0);
15865 SDValue True = N->getOperand(1);
15866 SDValue False = N->getOperand(2);
15867 SDLoc DL(N);
15868 EVT VT = N->getValueType(0);
15869 EVT CondVT = Cond.getValueType();
15870
15871 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15872 return SDValue();
15873
15874 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15875 // BEXTI, where C is power of 2.
15876 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15877 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15878 SDValue LHS = Cond.getOperand(0);
15879 SDValue RHS = Cond.getOperand(1);
15880 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15881 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15882 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15883 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15884 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15885 return DAG.getSelect(DL, VT,
15886 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15887 False, True);
15888 }
15889 }
15890 return SDValue();
15891}
15892
15894 const RISCVSubtarget &Subtarget) {
15895 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15896 return Folded;
15897
15898 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15899 return V;
15900
15901 if (Subtarget.hasConditionalMoveFusion())
15902 return SDValue();
15903
15904 SDValue TrueVal = N->getOperand(1);
15905 SDValue FalseVal = N->getOperand(2);
15906 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15907 return V;
15908 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15909}
15910
15911/// If we have a build_vector where each lane is binop X, C, where C
15912/// is a constant (but not necessarily the same constant on all lanes),
15913/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15914/// We assume that materializing a constant build vector will be no more
15915/// expensive that performing O(n) binops.
15917 const RISCVSubtarget &Subtarget,
15918 const RISCVTargetLowering &TLI) {
15919 SDLoc DL(N);
15920 EVT VT = N->getValueType(0);
15921
15922 assert(!VT.isScalableVector() && "unexpected build vector");
15923
15924 if (VT.getVectorNumElements() == 1)
15925 return SDValue();
15926
15927 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15928 if (!TLI.isBinOp(Opcode))
15929 return SDValue();
15930
15931 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15932 return SDValue();
15933
15934 // This BUILD_VECTOR involves an implicit truncation, and sinking
15935 // truncates through binops is non-trivial.
15936 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15937 return SDValue();
15938
15939 SmallVector<SDValue> LHSOps;
15940 SmallVector<SDValue> RHSOps;
15941 for (SDValue Op : N->ops()) {
15942 if (Op.isUndef()) {
15943 // We can't form a divide or remainder from undef.
15944 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15945 return SDValue();
15946
15947 LHSOps.push_back(Op);
15948 RHSOps.push_back(Op);
15949 continue;
15950 }
15951
15952 // TODO: We can handle operations which have an neutral rhs value
15953 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15954 // of profit in a more explicit manner.
15955 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15956 return SDValue();
15957
15958 LHSOps.push_back(Op.getOperand(0));
15959 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15960 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15961 return SDValue();
15962 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15963 // have different LHS and RHS types.
15964 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15965 return SDValue();
15966
15967 RHSOps.push_back(Op.getOperand(1));
15968 }
15969
15970 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15971 DAG.getBuildVector(VT, DL, RHSOps));
15972}
15973
15975 const RISCVSubtarget &Subtarget,
15976 const RISCVTargetLowering &TLI) {
15977 SDValue InVec = N->getOperand(0);
15978 SDValue InVal = N->getOperand(1);
15979 SDValue EltNo = N->getOperand(2);
15980 SDLoc DL(N);
15981
15982 EVT VT = InVec.getValueType();
15983 if (VT.isScalableVector())
15984 return SDValue();
15985
15986 if (!InVec.hasOneUse())
15987 return SDValue();
15988
15989 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15990 // move the insert_vector_elts into the arms of the binop. Note that
15991 // the new RHS must be a constant.
15992 const unsigned InVecOpcode = InVec->getOpcode();
15993 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15994 InVal.hasOneUse()) {
15995 SDValue InVecLHS = InVec->getOperand(0);
15996 SDValue InVecRHS = InVec->getOperand(1);
15997 SDValue InValLHS = InVal->getOperand(0);
15998 SDValue InValRHS = InVal->getOperand(1);
15999
16001 return SDValue();
16002 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16003 return SDValue();
16004 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16005 // have different LHS and RHS types.
16006 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16007 return SDValue();
16009 InVecLHS, InValLHS, EltNo);
16011 InVecRHS, InValRHS, EltNo);
16012 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16013 }
16014
16015 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16016 // move the insert_vector_elt to the source operand of the concat_vector.
16017 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16018 return SDValue();
16019
16020 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16021 if (!IndexC)
16022 return SDValue();
16023 unsigned Elt = IndexC->getZExtValue();
16024
16025 EVT ConcatVT = InVec.getOperand(0).getValueType();
16026 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16027 return SDValue();
16028 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16029 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16030
16031 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16032 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16033 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16034 ConcatOp, InVal, NewIdx);
16035
16036 SmallVector<SDValue> ConcatOps;
16037 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16038 ConcatOps[ConcatOpIdx] = ConcatOp;
16039 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16040}
16041
16042// If we're concatenating a series of vector loads like
16043// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16044// Then we can turn this into a strided load by widening the vector elements
16045// vlse32 p, stride=n
16047 const RISCVSubtarget &Subtarget,
16048 const RISCVTargetLowering &TLI) {
16049 SDLoc DL(N);
16050 EVT VT = N->getValueType(0);
16051
16052 // Only perform this combine on legal MVTs.
16053 if (!TLI.isTypeLegal(VT))
16054 return SDValue();
16055
16056 // TODO: Potentially extend this to scalable vectors
16057 if (VT.isScalableVector())
16058 return SDValue();
16059
16060 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16061 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16062 !SDValue(BaseLd, 0).hasOneUse())
16063 return SDValue();
16064
16065 EVT BaseLdVT = BaseLd->getValueType(0);
16066
16067 // Go through the loads and check that they're strided
16069 Lds.push_back(BaseLd);
16070 Align Align = BaseLd->getAlign();
16071 for (SDValue Op : N->ops().drop_front()) {
16072 auto *Ld = dyn_cast<LoadSDNode>(Op);
16073 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16074 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16075 Ld->getValueType(0) != BaseLdVT)
16076 return SDValue();
16077
16078 Lds.push_back(Ld);
16079
16080 // The common alignment is the most restrictive (smallest) of all the loads
16081 Align = std::min(Align, Ld->getAlign());
16082 }
16083
16084 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16085 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16086 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16087 // If the load ptrs can be decomposed into a common (Base + Index) with a
16088 // common constant stride, then return the constant stride.
16089 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16090 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16091 if (BIO1.equalBaseIndex(BIO2, DAG))
16092 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16093
16094 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16095 SDValue P1 = Ld1->getBasePtr();
16096 SDValue P2 = Ld2->getBasePtr();
16097 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16098 return {{P2.getOperand(1), false}};
16099 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16100 return {{P1.getOperand(1), true}};
16101
16102 return std::nullopt;
16103 };
16104
16105 // Get the distance between the first and second loads
16106 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16107 if (!BaseDiff)
16108 return SDValue();
16109
16110 // Check all the loads are the same distance apart
16111 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16112 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16113 return SDValue();
16114
16115 // TODO: At this point, we've successfully matched a generalized gather
16116 // load. Maybe we should emit that, and then move the specialized
16117 // matchers above and below into a DAG combine?
16118
16119 // Get the widened scalar type, e.g. v4i8 -> i64
16120 unsigned WideScalarBitWidth =
16121 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16122 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16123
16124 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16125 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16126 if (!TLI.isTypeLegal(WideVecVT))
16127 return SDValue();
16128
16129 // Check that the operation is legal
16130 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16131 return SDValue();
16132
16133 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16134 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
16135 ? std::get<SDValue>(StrideVariant)
16136 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
16137 Lds[0]->getOffset().getValueType());
16138 if (MustNegateStride)
16139 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16140
16141 SDValue AllOneMask =
16142 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16143 DAG.getConstant(1, DL, MVT::i1));
16144
16145 uint64_t MemSize;
16146 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16147 ConstStride && ConstStride->getSExtValue() >= 0)
16148 // total size = (elsize * n) + (stride - elsize) * (n-1)
16149 // = elsize + stride * (n-1)
16150 MemSize = WideScalarVT.getSizeInBits() +
16151 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16152 else
16153 // If Stride isn't constant, then we can't know how much it will load
16155
16157 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16158 Align);
16159
16160 SDValue StridedLoad = DAG.getStridedLoadVP(
16161 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
16162 AllOneMask,
16163 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
16164
16165 for (SDValue Ld : N->ops())
16166 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16167
16168 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16169}
16170
16172 const RISCVSubtarget &Subtarget) {
16173
16174 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16175
16176 if (N->getValueType(0).isFixedLengthVector())
16177 return SDValue();
16178
16179 SDValue Addend = N->getOperand(0);
16180 SDValue MulOp = N->getOperand(1);
16181
16182 if (N->getOpcode() == RISCVISD::ADD_VL) {
16183 SDValue AddMergeOp = N->getOperand(2);
16184 if (!AddMergeOp.isUndef())
16185 return SDValue();
16186 }
16187
16188 auto IsVWMulOpc = [](unsigned Opc) {
16189 switch (Opc) {
16190 case RISCVISD::VWMUL_VL:
16193 return true;
16194 default:
16195 return false;
16196 }
16197 };
16198
16199 if (!IsVWMulOpc(MulOp.getOpcode()))
16200 std::swap(Addend, MulOp);
16201
16202 if (!IsVWMulOpc(MulOp.getOpcode()))
16203 return SDValue();
16204
16205 SDValue MulMergeOp = MulOp.getOperand(2);
16206
16207 if (!MulMergeOp.isUndef())
16208 return SDValue();
16209
16210 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
16211 const RISCVSubtarget &Subtarget) {
16212 if (N->getOpcode() == ISD::ADD) {
16213 SDLoc DL(N);
16214 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
16215 Subtarget);
16216 }
16217 return std::make_pair(N->getOperand(3), N->getOperand(4));
16218 }(N, DAG, Subtarget);
16219
16220 SDValue MulMask = MulOp.getOperand(3);
16221 SDValue MulVL = MulOp.getOperand(4);
16222
16223 if (AddMask != MulMask || AddVL != MulVL)
16224 return SDValue();
16225
16226 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
16227 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
16228 "Unexpected opcode after VWMACC_VL");
16229 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
16230 "Unexpected opcode after VWMACC_VL!");
16231 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
16232 "Unexpected opcode after VWMUL_VL!");
16233 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
16234 "Unexpected opcode after VWMUL_VL!");
16235
16236 SDLoc DL(N);
16237 EVT VT = N->getValueType(0);
16238 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
16239 AddVL};
16240 return DAG.getNode(Opc, DL, VT, Ops);
16241}
16242
16244 ISD::MemIndexType &IndexType,
16246 if (!DCI.isBeforeLegalize())
16247 return false;
16248
16249 SelectionDAG &DAG = DCI.DAG;
16250 const MVT XLenVT =
16251 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
16252
16253 const EVT IndexVT = Index.getValueType();
16254
16255 // RISC-V indexed loads only support the "unsigned unscaled" addressing
16256 // mode, so anything else must be manually legalized.
16257 if (!isIndexTypeSigned(IndexType))
16258 return false;
16259
16260 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
16261 // Any index legalization should first promote to XLenVT, so we don't lose
16262 // bits when scaling. This may create an illegal index type so we let
16263 // LLVM's legalization take care of the splitting.
16264 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
16266 IndexVT.changeVectorElementType(XLenVT), Index);
16267 }
16268 IndexType = ISD::UNSIGNED_SCALED;
16269 return true;
16270}
16271
16272/// Match the index vector of a scatter or gather node as the shuffle mask
16273/// which performs the rearrangement if possible. Will only match if
16274/// all lanes are touched, and thus replacing the scatter or gather with
16275/// a unit strided access and shuffle is legal.
16277 SmallVector<int> &ShuffleMask) {
16278 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16279 return false;
16281 return false;
16282
16283 const unsigned ElementSize = VT.getScalarStoreSize();
16284 const unsigned NumElems = VT.getVectorNumElements();
16285
16286 // Create the shuffle mask and check all bits active
16287 assert(ShuffleMask.empty());
16288 BitVector ActiveLanes(NumElems);
16289 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16290 // TODO: We've found an active bit of UB, and could be
16291 // more aggressive here if desired.
16292 if (Index->getOperand(i)->isUndef())
16293 return false;
16294 uint64_t C = Index->getConstantOperandVal(i);
16295 if (C % ElementSize != 0)
16296 return false;
16297 C = C / ElementSize;
16298 if (C >= NumElems)
16299 return false;
16300 ShuffleMask.push_back(C);
16301 ActiveLanes.set(C);
16302 }
16303 return ActiveLanes.all();
16304}
16305
16306/// Match the index of a gather or scatter operation as an operation
16307/// with twice the element width and half the number of elements. This is
16308/// generally profitable (if legal) because these operations are linear
16309/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16310/// come out ahead.
16312 Align BaseAlign, const RISCVSubtarget &ST) {
16313 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16314 return false;
16316 return false;
16317
16318 // Attempt a doubling. If we can use a element type 4x or 8x in
16319 // size, this will happen via multiply iterations of the transform.
16320 const unsigned NumElems = VT.getVectorNumElements();
16321 if (NumElems % 2 != 0)
16322 return false;
16323
16324 const unsigned ElementSize = VT.getScalarStoreSize();
16325 const unsigned WiderElementSize = ElementSize * 2;
16326 if (WiderElementSize > ST.getELen()/8)
16327 return false;
16328
16329 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16330 return false;
16331
16332 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16333 // TODO: We've found an active bit of UB, and could be
16334 // more aggressive here if desired.
16335 if (Index->getOperand(i)->isUndef())
16336 return false;
16337 // TODO: This offset check is too strict if we support fully
16338 // misaligned memory operations.
16339 uint64_t C = Index->getConstantOperandVal(i);
16340 if (i % 2 == 0) {
16341 if (C % WiderElementSize != 0)
16342 return false;
16343 continue;
16344 }
16345 uint64_t Last = Index->getConstantOperandVal(i-1);
16346 if (C != Last + ElementSize)
16347 return false;
16348 }
16349 return true;
16350}
16351
16352// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16353// This would be benefit for the cases where X and Y are both the same value
16354// type of low precision vectors. Since the truncate would be lowered into
16355// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16356// restriction, such pattern would be expanded into a series of "vsetvli"
16357// and "vnsrl" instructions later to reach this point.
16359 SDValue Mask = N->getOperand(1);
16360 SDValue VL = N->getOperand(2);
16361
16362 bool IsVLMAX = isAllOnesConstant(VL) ||
16363 (isa<RegisterSDNode>(VL) &&
16364 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16365 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16366 Mask.getOperand(0) != VL)
16367 return SDValue();
16368
16369 auto IsTruncNode = [&](SDValue V) {
16370 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16371 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16372 };
16373
16374 SDValue Op = N->getOperand(0);
16375
16376 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16377 // to distinguish such pattern.
16378 while (IsTruncNode(Op)) {
16379 if (!Op.hasOneUse())
16380 return SDValue();
16381 Op = Op.getOperand(0);
16382 }
16383
16384 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16385 return SDValue();
16386
16387 SDValue N0 = Op.getOperand(0);
16388 SDValue N1 = Op.getOperand(1);
16389 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16390 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16391 return SDValue();
16392
16393 SDValue N00 = N0.getOperand(0);
16394 SDValue N10 = N1.getOperand(0);
16395 if (!N00.getValueType().isVector() ||
16396 N00.getValueType() != N10.getValueType() ||
16397 N->getValueType(0) != N10.getValueType())
16398 return SDValue();
16399
16400 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16401 SDValue SMin =
16402 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16403 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16404 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16405}
16406
16407// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16408// maximum value for the truncated type.
16409// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16410// is the signed maximum value for the truncated type and C2 is the signed
16411// minimum value.
16413 const RISCVSubtarget &Subtarget) {
16414 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16415
16416 MVT VT = N->getSimpleValueType(0);
16417
16418 SDValue Mask = N->getOperand(1);
16419 SDValue VL = N->getOperand(2);
16420
16421 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16422 APInt &SplatVal) {
16423 if (V.getOpcode() != Opc &&
16424 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16425 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16426 return SDValue();
16427
16428 SDValue Op = V.getOperand(1);
16429
16430 // Peek through conversion between fixed and scalable vectors.
16431 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16432 isNullConstant(Op.getOperand(2)) &&
16433 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16434 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16435 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16436 isNullConstant(Op.getOperand(1).getOperand(1)))
16437 Op = Op.getOperand(1).getOperand(0);
16438
16439 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16440 return V.getOperand(0);
16441
16442 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16443 Op.getOperand(2) == VL) {
16444 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16445 SplatVal =
16446 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16447 return V.getOperand(0);
16448 }
16449 }
16450
16451 return SDValue();
16452 };
16453
16454 SDLoc DL(N);
16455
16456 auto DetectUSatPattern = [&](SDValue V) {
16457 APInt LoC, HiC;
16458
16459 // Simple case, V is a UMIN.
16460 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16461 if (HiC.isMask(VT.getScalarSizeInBits()))
16462 return UMinOp;
16463
16464 // If we have an SMAX that removes negative numbers first, then we can match
16465 // SMIN instead of UMIN.
16466 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16467 if (SDValue SMaxOp =
16468 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16469 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16470 return SMinOp;
16471
16472 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16473 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16474 // first.
16475 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16476 if (SDValue SMinOp =
16477 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16478 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16479 HiC.uge(LoC))
16480 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16481 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16482 Mask, VL);
16483
16484 return SDValue();
16485 };
16486
16487 auto DetectSSatPattern = [&](SDValue V) {
16488 unsigned NumDstBits = VT.getScalarSizeInBits();
16489 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16490 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16491 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16492
16493 APInt HiC, LoC;
16494 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16495 if (SDValue SMaxOp =
16496 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16497 if (HiC == SignedMax && LoC == SignedMin)
16498 return SMaxOp;
16499
16500 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16501 if (SDValue SMinOp =
16502 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16503 if (HiC == SignedMax && LoC == SignedMin)
16504 return SMinOp;
16505
16506 return SDValue();
16507 };
16508
16509 SDValue Src = N->getOperand(0);
16510
16511 // Look through multiple layers of truncates.
16512 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16513 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16514 Src.hasOneUse())
16515 Src = Src.getOperand(0);
16516
16517 SDValue Val;
16518 unsigned ClipOpc;
16519 if ((Val = DetectUSatPattern(Src)))
16521 else if ((Val = DetectSSatPattern(Src)))
16523 else
16524 return SDValue();
16525
16526 MVT ValVT = Val.getSimpleValueType();
16527
16528 do {
16529 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16530 ValVT = ValVT.changeVectorElementType(ValEltVT);
16531 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
16532 } while (ValVT != VT);
16533
16534 return Val;
16535}
16536
16538 DAGCombinerInfo &DCI) const {
16539 SelectionDAG &DAG = DCI.DAG;
16540 const MVT XLenVT = Subtarget.getXLenVT();
16541 SDLoc DL(N);
16542
16543 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16544 // bits are demanded. N will be added to the Worklist if it was not deleted.
16545 // Caller should return SDValue(N, 0) if this returns true.
16546 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16547 SDValue Op = N->getOperand(OpNo);
16548 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16549 if (!SimplifyDemandedBits(Op, Mask, DCI))
16550 return false;
16551
16552 if (N->getOpcode() != ISD::DELETED_NODE)
16553 DCI.AddToWorklist(N);
16554 return true;
16555 };
16556
16557 switch (N->getOpcode()) {
16558 default:
16559 break;
16560 case RISCVISD::SplitF64: {
16561 SDValue Op0 = N->getOperand(0);
16562 // If the input to SplitF64 is just BuildPairF64 then the operation is
16563 // redundant. Instead, use BuildPairF64's operands directly.
16564 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16565 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16566
16567 if (Op0->isUndef()) {
16568 SDValue Lo = DAG.getUNDEF(MVT::i32);
16569 SDValue Hi = DAG.getUNDEF(MVT::i32);
16570 return DCI.CombineTo(N, Lo, Hi);
16571 }
16572
16573 // It's cheaper to materialise two 32-bit integers than to load a double
16574 // from the constant pool and transfer it to integer registers through the
16575 // stack.
16576 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16577 APInt V = C->getValueAPF().bitcastToAPInt();
16578 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16579 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16580 return DCI.CombineTo(N, Lo, Hi);
16581 }
16582
16583 // This is a target-specific version of a DAGCombine performed in
16584 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16585 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16586 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16587 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16588 !Op0.getNode()->hasOneUse())
16589 break;
16590 SDValue NewSplitF64 =
16591 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16592 Op0.getOperand(0));
16593 SDValue Lo = NewSplitF64.getValue(0);
16594 SDValue Hi = NewSplitF64.getValue(1);
16595 APInt SignBit = APInt::getSignMask(32);
16596 if (Op0.getOpcode() == ISD::FNEG) {
16597 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16598 DAG.getConstant(SignBit, DL, MVT::i32));
16599 return DCI.CombineTo(N, Lo, NewHi);
16600 }
16601 assert(Op0.getOpcode() == ISD::FABS);
16602 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16603 DAG.getConstant(~SignBit, DL, MVT::i32));
16604 return DCI.CombineTo(N, Lo, NewHi);
16605 }
16606 case RISCVISD::SLLW:
16607 case RISCVISD::SRAW:
16608 case RISCVISD::SRLW:
16609 case RISCVISD::RORW:
16610 case RISCVISD::ROLW: {
16611 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16612 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16613 SimplifyDemandedLowBitsHelper(1, 5))
16614 return SDValue(N, 0);
16615
16616 break;
16617 }
16618 case RISCVISD::CLZW:
16619 case RISCVISD::CTZW: {
16620 // Only the lower 32 bits of the first operand are read
16621 if (SimplifyDemandedLowBitsHelper(0, 32))
16622 return SDValue(N, 0);
16623 break;
16624 }
16626 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16627 // conversion is unnecessary and can be replaced with the
16628 // FMV_X_ANYEXTW_RV64 operand.
16629 SDValue Op0 = N->getOperand(0);
16631 return Op0.getOperand(0);
16632 break;
16633 }
16636 SDLoc DL(N);
16637 SDValue Op0 = N->getOperand(0);
16638 MVT VT = N->getSimpleValueType(0);
16639 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16640 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16641 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16642 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16643 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16644 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16645 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16646 assert(Op0.getOperand(0).getValueType() == VT &&
16647 "Unexpected value type!");
16648 return Op0.getOperand(0);
16649 }
16650
16651 // This is a target-specific version of a DAGCombine performed in
16652 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16653 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16654 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16655 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16656 !Op0.getNode()->hasOneUse())
16657 break;
16658 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16659 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16660 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16661 if (Op0.getOpcode() == ISD::FNEG)
16662 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16663 DAG.getConstant(SignBit, DL, VT));
16664
16665 assert(Op0.getOpcode() == ISD::FABS);
16666 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16667 DAG.getConstant(~SignBit, DL, VT));
16668 }
16669 case ISD::ABS: {
16670 EVT VT = N->getValueType(0);
16671 SDValue N0 = N->getOperand(0);
16672 // abs (sext) -> zext (abs)
16673 // abs (zext) -> zext (handled elsewhere)
16674 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16675 SDValue Src = N0.getOperand(0);
16676 SDLoc DL(N);
16677 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16678 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16679 }
16680 break;
16681 }
16682 case ISD::ADD: {
16683 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16684 return V;
16685 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16686 return V;
16687 return performADDCombine(N, DCI, Subtarget);
16688 }
16689 case ISD::SUB: {
16690 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16691 return V;
16692 return performSUBCombine(N, DAG, Subtarget);
16693 }
16694 case ISD::AND:
16695 return performANDCombine(N, DCI, Subtarget);
16696 case ISD::OR: {
16697 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16698 return V;
16699 return performORCombine(N, DCI, Subtarget);
16700 }
16701 case ISD::XOR:
16702 return performXORCombine(N, DAG, Subtarget);
16703 case ISD::MUL:
16704 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16705 return V;
16706 return performMULCombine(N, DAG, DCI, Subtarget);
16707 case ISD::SDIV:
16708 case ISD::UDIV:
16709 case ISD::SREM:
16710 case ISD::UREM:
16711 if (SDValue V = combineBinOpOfZExt(N, DAG))
16712 return V;
16713 break;
16714 case ISD::FADD:
16715 case ISD::UMAX:
16716 case ISD::UMIN:
16717 case ISD::SMAX:
16718 case ISD::SMIN:
16719 case ISD::FMAXNUM:
16720 case ISD::FMINNUM: {
16721 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16722 return V;
16723 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16724 return V;
16725 return SDValue();
16726 }
16727 case ISD::SETCC:
16728 return performSETCCCombine(N, DAG, Subtarget);
16730 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16731 case ISD::ZERO_EXTEND:
16732 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16733 // type legalization. This is safe because fp_to_uint produces poison if
16734 // it overflows.
16735 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16736 SDValue Src = N->getOperand(0);
16737 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16738 isTypeLegal(Src.getOperand(0).getValueType()))
16739 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16740 Src.getOperand(0));
16741 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16742 isTypeLegal(Src.getOperand(1).getValueType())) {
16743 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16744 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16745 Src.getOperand(0), Src.getOperand(1));
16746 DCI.CombineTo(N, Res);
16747 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16748 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16749 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16750 }
16751 }
16752 return SDValue();
16754 if (SDValue V = combineTruncOfSraSext(N, DAG))
16755 return V;
16756 return combineTruncToVnclip(N, DAG, Subtarget);
16757 case ISD::TRUNCATE:
16758 return performTRUNCATECombine(N, DAG, Subtarget);
16759 case ISD::SELECT:
16760 return performSELECTCombine(N, DAG, Subtarget);
16762 case RISCVISD::CZERO_NEZ: {
16763 SDValue Val = N->getOperand(0);
16764 SDValue Cond = N->getOperand(1);
16765
16766 unsigned Opc = N->getOpcode();
16767
16768 // czero_eqz x, x -> x
16769 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16770 return Val;
16771
16772 unsigned InvOpc =
16774
16775 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16776 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16777 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16778 SDValue NewCond = Cond.getOperand(0);
16779 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16780 if (DAG.MaskedValueIsZero(NewCond, Mask))
16781 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16782 }
16783 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16784 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16785 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16786 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16787 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16788 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16789 if (ISD::isIntEqualitySetCC(CCVal))
16790 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16791 N->getValueType(0), Val, Cond.getOperand(0));
16792 }
16793 return SDValue();
16794 }
16795 case RISCVISD::SELECT_CC: {
16796 // Transform
16797 SDValue LHS = N->getOperand(0);
16798 SDValue RHS = N->getOperand(1);
16799 SDValue CC = N->getOperand(2);
16800 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16801 SDValue TrueV = N->getOperand(3);
16802 SDValue FalseV = N->getOperand(4);
16803 SDLoc DL(N);
16804 EVT VT = N->getValueType(0);
16805
16806 // If the True and False values are the same, we don't need a select_cc.
16807 if (TrueV == FalseV)
16808 return TrueV;
16809
16810 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16811 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16812 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16813 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16814 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16815 if (CCVal == ISD::CondCode::SETGE)
16816 std::swap(TrueV, FalseV);
16817
16818 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16819 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16820 // Only handle simm12, if it is not in this range, it can be considered as
16821 // register.
16822 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16823 isInt<12>(TrueSImm - FalseSImm)) {
16824 SDValue SRA =
16825 DAG.getNode(ISD::SRA, DL, VT, LHS,
16826 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16827 SDValue AND =
16828 DAG.getNode(ISD::AND, DL, VT, SRA,
16829 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16830 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16831 }
16832
16833 if (CCVal == ISD::CondCode::SETGE)
16834 std::swap(TrueV, FalseV);
16835 }
16836
16837 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16838 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16839 {LHS, RHS, CC, TrueV, FalseV});
16840
16841 if (!Subtarget.hasConditionalMoveFusion()) {
16842 // (select c, -1, y) -> -c | y
16843 if (isAllOnesConstant(TrueV)) {
16844 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16845 SDValue Neg = DAG.getNegative(C, DL, VT);
16846 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16847 }
16848 // (select c, y, -1) -> -!c | y
16849 if (isAllOnesConstant(FalseV)) {
16850 SDValue C =
16851 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16852 SDValue Neg = DAG.getNegative(C, DL, VT);
16853 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16854 }
16855
16856 // (select c, 0, y) -> -!c & y
16857 if (isNullConstant(TrueV)) {
16858 SDValue C =
16859 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16860 SDValue Neg = DAG.getNegative(C, DL, VT);
16861 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16862 }
16863 // (select c, y, 0) -> -c & y
16864 if (isNullConstant(FalseV)) {
16865 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16866 SDValue Neg = DAG.getNegative(C, DL, VT);
16867 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16868 }
16869 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16870 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16871 if (((isOneConstant(FalseV) && LHS == TrueV &&
16872 CCVal == ISD::CondCode::SETNE) ||
16873 (isOneConstant(TrueV) && LHS == FalseV &&
16874 CCVal == ISD::CondCode::SETEQ)) &&
16876 // freeze it to be safe.
16877 LHS = DAG.getFreeze(LHS);
16879 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16880 }
16881 }
16882
16883 // If both true/false are an xor with 1, pull through the select.
16884 // This can occur after op legalization if both operands are setccs that
16885 // require an xor to invert.
16886 // FIXME: Generalize to other binary ops with identical operand?
16887 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16888 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16889 isOneConstant(TrueV.getOperand(1)) &&
16890 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16891 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16892 TrueV.getOperand(0), FalseV.getOperand(0));
16893 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16894 }
16895
16896 return SDValue();
16897 }
16898 case RISCVISD::BR_CC: {
16899 SDValue LHS = N->getOperand(1);
16900 SDValue RHS = N->getOperand(2);
16901 SDValue CC = N->getOperand(3);
16902 SDLoc DL(N);
16903
16904 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16905 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16906 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16907
16908 return SDValue();
16909 }
16910 case ISD::BITREVERSE:
16911 return performBITREVERSECombine(N, DAG, Subtarget);
16912 case ISD::FP_TO_SINT:
16913 case ISD::FP_TO_UINT:
16914 return performFP_TO_INTCombine(N, DCI, Subtarget);
16917 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16918 case ISD::FCOPYSIGN: {
16919 EVT VT = N->getValueType(0);
16920 if (!VT.isVector())
16921 break;
16922 // There is a form of VFSGNJ which injects the negated sign of its second
16923 // operand. Try and bubble any FNEG up after the extend/round to produce
16924 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16925 // TRUNC=1.
16926 SDValue In2 = N->getOperand(1);
16927 // Avoid cases where the extend/round has multiple uses, as duplicating
16928 // those is typically more expensive than removing a fneg.
16929 if (!In2.hasOneUse())
16930 break;
16931 if (In2.getOpcode() != ISD::FP_EXTEND &&
16932 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16933 break;
16934 In2 = In2.getOperand(0);
16935 if (In2.getOpcode() != ISD::FNEG)
16936 break;
16937 SDLoc DL(N);
16938 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16939 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16940 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16941 }
16942 case ISD::MGATHER: {
16943 const auto *MGN = cast<MaskedGatherSDNode>(N);
16944 const EVT VT = N->getValueType(0);
16945 SDValue Index = MGN->getIndex();
16946 SDValue ScaleOp = MGN->getScale();
16947 ISD::MemIndexType IndexType = MGN->getIndexType();
16948 assert(!MGN->isIndexScaled() &&
16949 "Scaled gather/scatter should not be formed");
16950
16951 SDLoc DL(N);
16952 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16953 return DAG.getMaskedGather(
16954 N->getVTList(), MGN->getMemoryVT(), DL,
16955 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16956 MGN->getBasePtr(), Index, ScaleOp},
16957 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16958
16959 if (narrowIndex(Index, IndexType, DAG))
16960 return DAG.getMaskedGather(
16961 N->getVTList(), MGN->getMemoryVT(), DL,
16962 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16963 MGN->getBasePtr(), Index, ScaleOp},
16964 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16965
16966 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16967 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16968 // The sequence will be XLenVT, not the type of Index. Tell
16969 // isSimpleVIDSequence this so we avoid overflow.
16970 if (std::optional<VIDSequence> SimpleVID =
16971 isSimpleVIDSequence(Index, Subtarget.getXLen());
16972 SimpleVID && SimpleVID->StepDenominator == 1) {
16973 const int64_t StepNumerator = SimpleVID->StepNumerator;
16974 const int64_t Addend = SimpleVID->Addend;
16975
16976 // Note: We don't need to check alignment here since (by assumption
16977 // from the existance of the gather), our offsets must be sufficiently
16978 // aligned.
16979
16980 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16981 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16982 assert(IndexType == ISD::UNSIGNED_SCALED);
16983 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16984 DAG.getConstant(Addend, DL, PtrVT));
16985
16986 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
16988 SDValue StridedLoad =
16989 DAG.getStridedLoadVP(VT, DL, MGN->getChain(), BasePtr,
16990 DAG.getConstant(StepNumerator, DL, XLenVT),
16991 MGN->getMask(), EVL, MGN->getMemOperand());
16992 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
16993 StridedLoad, MGN->getPassThru(), EVL);
16994 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
16995 DL);
16996 }
16997 }
16998
16999 SmallVector<int> ShuffleMask;
17000 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17001 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17002 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17003 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17004 MGN->getMask(), DAG.getUNDEF(VT),
17005 MGN->getMemoryVT(), MGN->getMemOperand(),
17007 SDValue Shuffle =
17008 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17009 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17010 }
17011
17012 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17013 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17014 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17015 SmallVector<SDValue> NewIndices;
17016 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17017 NewIndices.push_back(Index.getOperand(i));
17018 EVT IndexVT = Index.getValueType()
17020 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17021
17022 unsigned ElementSize = VT.getScalarStoreSize();
17023 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17024 auto EltCnt = VT.getVectorElementCount();
17025 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17026 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17027 EltCnt.divideCoefficientBy(2));
17028 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17029 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17030 EltCnt.divideCoefficientBy(2));
17031 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17032
17033 SDValue Gather =
17034 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17035 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17036 Index, ScaleOp},
17037 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17038 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17039 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17040 }
17041 break;
17042 }
17043 case ISD::MSCATTER:{
17044 const auto *MSN = cast<MaskedScatterSDNode>(N);
17045 SDValue Index = MSN->getIndex();
17046 SDValue ScaleOp = MSN->getScale();
17047 ISD::MemIndexType IndexType = MSN->getIndexType();
17048 assert(!MSN->isIndexScaled() &&
17049 "Scaled gather/scatter should not be formed");
17050
17051 SDLoc DL(N);
17052 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17053 return DAG.getMaskedScatter(
17054 N->getVTList(), MSN->getMemoryVT(), DL,
17055 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17056 Index, ScaleOp},
17057 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17058
17059 if (narrowIndex(Index, IndexType, DAG))
17060 return DAG.getMaskedScatter(
17061 N->getVTList(), MSN->getMemoryVT(), DL,
17062 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17063 Index, ScaleOp},
17064 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17065
17066 EVT VT = MSN->getValue()->getValueType(0);
17067 SmallVector<int> ShuffleMask;
17068 if (!MSN->isTruncatingStore() &&
17069 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17070 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17071 DAG.getUNDEF(VT), ShuffleMask);
17072 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17073 DAG.getUNDEF(XLenVT), MSN->getMask(),
17074 MSN->getMemoryVT(), MSN->getMemOperand(),
17075 ISD::UNINDEXED, false);
17076 }
17077 break;
17078 }
17079 case ISD::VP_GATHER: {
17080 const auto *VPGN = cast<VPGatherSDNode>(N);
17081 SDValue Index = VPGN->getIndex();
17082 SDValue ScaleOp = VPGN->getScale();
17083 ISD::MemIndexType IndexType = VPGN->getIndexType();
17084 assert(!VPGN->isIndexScaled() &&
17085 "Scaled gather/scatter should not be formed");
17086
17087 SDLoc DL(N);
17088 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17089 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17090 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17091 ScaleOp, VPGN->getMask(),
17092 VPGN->getVectorLength()},
17093 VPGN->getMemOperand(), IndexType);
17094
17095 if (narrowIndex(Index, IndexType, DAG))
17096 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17097 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17098 ScaleOp, VPGN->getMask(),
17099 VPGN->getVectorLength()},
17100 VPGN->getMemOperand(), IndexType);
17101
17102 break;
17103 }
17104 case ISD::VP_SCATTER: {
17105 const auto *VPSN = cast<VPScatterSDNode>(N);
17106 SDValue Index = VPSN->getIndex();
17107 SDValue ScaleOp = VPSN->getScale();
17108 ISD::MemIndexType IndexType = VPSN->getIndexType();
17109 assert(!VPSN->isIndexScaled() &&
17110 "Scaled gather/scatter should not be formed");
17111
17112 SDLoc DL(N);
17113 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17114 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17115 {VPSN->getChain(), VPSN->getValue(),
17116 VPSN->getBasePtr(), Index, ScaleOp,
17117 VPSN->getMask(), VPSN->getVectorLength()},
17118 VPSN->getMemOperand(), IndexType);
17119
17120 if (narrowIndex(Index, IndexType, DAG))
17121 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17122 {VPSN->getChain(), VPSN->getValue(),
17123 VPSN->getBasePtr(), Index, ScaleOp,
17124 VPSN->getMask(), VPSN->getVectorLength()},
17125 VPSN->getMemOperand(), IndexType);
17126 break;
17127 }
17128 case RISCVISD::SHL_VL:
17129 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17130 return V;
17131 [[fallthrough]];
17132 case RISCVISD::SRA_VL:
17133 case RISCVISD::SRL_VL: {
17134 SDValue ShAmt = N->getOperand(1);
17136 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17137 SDLoc DL(N);
17138 SDValue VL = N->getOperand(4);
17139 EVT VT = N->getValueType(0);
17140 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17141 ShAmt.getOperand(1), VL);
17142 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
17143 N->getOperand(2), N->getOperand(3), N->getOperand(4));
17144 }
17145 break;
17146 }
17147 case ISD::SRA:
17148 if (SDValue V = performSRACombine(N, DAG, Subtarget))
17149 return V;
17150 [[fallthrough]];
17151 case ISD::SRL:
17152 case ISD::SHL: {
17153 if (N->getOpcode() == ISD::SHL) {
17154 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17155 return V;
17156 }
17157 SDValue ShAmt = N->getOperand(1);
17159 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17160 SDLoc DL(N);
17161 EVT VT = N->getValueType(0);
17162 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17163 ShAmt.getOperand(1),
17164 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
17165 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
17166 }
17167 break;
17168 }
17169 case RISCVISD::ADD_VL:
17170 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17171 return V;
17172 return combineToVWMACC(N, DAG, Subtarget);
17177 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
17178 case RISCVISD::SUB_VL:
17179 case RISCVISD::MUL_VL:
17180 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
17189 return performVFMADD_VLCombine(N, DAG, Subtarget);
17190 case RISCVISD::FADD_VL:
17191 case RISCVISD::FSUB_VL:
17192 case RISCVISD::FMUL_VL:
17194 case RISCVISD::VFWSUB_W_VL: {
17195 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
17196 !Subtarget.hasVInstructionsF16())
17197 return SDValue();
17198 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
17199 }
17200 case ISD::LOAD:
17201 case ISD::STORE: {
17202 if (DCI.isAfterLegalizeDAG())
17203 if (SDValue V = performMemPairCombine(N, DCI))
17204 return V;
17205
17206 if (N->getOpcode() != ISD::STORE)
17207 break;
17208
17209 auto *Store = cast<StoreSDNode>(N);
17210 SDValue Chain = Store->getChain();
17211 EVT MemVT = Store->getMemoryVT();
17212 SDValue Val = Store->getValue();
17213 SDLoc DL(N);
17214
17215 bool IsScalarizable =
17216 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
17217 Store->isSimple() &&
17218 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
17219 isPowerOf2_64(MemVT.getSizeInBits()) &&
17220 MemVT.getSizeInBits() <= Subtarget.getXLen();
17221
17222 // If sufficiently aligned we can scalarize stores of constant vectors of
17223 // any power-of-two size up to XLen bits, provided that they aren't too
17224 // expensive to materialize.
17225 // vsetivli zero, 2, e8, m1, ta, ma
17226 // vmv.v.i v8, 4
17227 // vse64.v v8, (a0)
17228 // ->
17229 // li a1, 1028
17230 // sh a1, 0(a0)
17231 if (DCI.isBeforeLegalize() && IsScalarizable &&
17233 // Get the constant vector bits
17234 APInt NewC(Val.getValueSizeInBits(), 0);
17235 uint64_t EltSize = Val.getScalarValueSizeInBits();
17236 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
17237 if (Val.getOperand(i).isUndef())
17238 continue;
17239 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
17240 i * EltSize);
17241 }
17242 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17243
17244 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
17245 true) <= 2 &&
17247 NewVT, *Store->getMemOperand())) {
17248 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
17249 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
17250 Store->getPointerInfo(), Store->getOriginalAlign(),
17251 Store->getMemOperand()->getFlags());
17252 }
17253 }
17254
17255 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
17256 // vsetivli zero, 2, e16, m1, ta, ma
17257 // vle16.v v8, (a0)
17258 // vse16.v v8, (a1)
17259 if (auto *L = dyn_cast<LoadSDNode>(Val);
17260 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
17261 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
17262 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
17263 L->getMemoryVT() == MemVT) {
17264 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17266 NewVT, *Store->getMemOperand()) &&
17268 NewVT, *L->getMemOperand())) {
17269 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17270 L->getPointerInfo(), L->getOriginalAlign(),
17271 L->getMemOperand()->getFlags());
17272 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17273 Store->getPointerInfo(), Store->getOriginalAlign(),
17274 Store->getMemOperand()->getFlags());
17275 }
17276 }
17277
17278 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17279 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17280 // any illegal types.
17281 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17282 (DCI.isAfterLegalizeDAG() &&
17284 isNullConstant(Val.getOperand(1)))) {
17285 SDValue Src = Val.getOperand(0);
17286 MVT VecVT = Src.getSimpleValueType();
17287 // VecVT should be scalable and memory VT should match the element type.
17288 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17289 MemVT == VecVT.getVectorElementType()) {
17290 SDLoc DL(N);
17291 MVT MaskVT = getMaskTypeFor(VecVT);
17292 return DAG.getStoreVP(
17293 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17294 DAG.getConstant(1, DL, MaskVT),
17295 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17296 Store->getMemOperand(), Store->getAddressingMode(),
17297 Store->isTruncatingStore(), /*IsCompress*/ false);
17298 }
17299 }
17300
17301 break;
17302 }
17303 case ISD::SPLAT_VECTOR: {
17304 EVT VT = N->getValueType(0);
17305 // Only perform this combine on legal MVT types.
17306 if (!isTypeLegal(VT))
17307 break;
17308 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17309 DAG, Subtarget))
17310 return Gather;
17311 break;
17312 }
17313 case ISD::BUILD_VECTOR:
17314 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17315 return V;
17316 break;
17318 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17319 return V;
17320 break;
17322 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17323 return V;
17324 break;
17325 case RISCVISD::VFMV_V_F_VL: {
17326 const MVT VT = N->getSimpleValueType(0);
17327 SDValue Passthru = N->getOperand(0);
17328 SDValue Scalar = N->getOperand(1);
17329 SDValue VL = N->getOperand(2);
17330
17331 // If VL is 1, we can use vfmv.s.f.
17332 if (isOneConstant(VL))
17333 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17334 break;
17335 }
17336 case RISCVISD::VMV_V_X_VL: {
17337 const MVT VT = N->getSimpleValueType(0);
17338 SDValue Passthru = N->getOperand(0);
17339 SDValue Scalar = N->getOperand(1);
17340 SDValue VL = N->getOperand(2);
17341
17342 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17343 // scalar input.
17344 unsigned ScalarSize = Scalar.getValueSizeInBits();
17345 unsigned EltWidth = VT.getScalarSizeInBits();
17346 if (ScalarSize > EltWidth && Passthru.isUndef())
17347 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17348 return SDValue(N, 0);
17349
17350 // If VL is 1 and the scalar value won't benefit from immediate, we can
17351 // use vmv.s.x.
17352 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17353 if (isOneConstant(VL) &&
17354 (!Const || Const->isZero() ||
17355 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17356 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17357
17358 break;
17359 }
17360 case RISCVISD::VFMV_S_F_VL: {
17361 SDValue Src = N->getOperand(1);
17362 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17363 // into an undef vector.
17364 // TODO: Could use a vslide or vmv.v.v for non-undef.
17365 if (N->getOperand(0).isUndef() &&
17366 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17367 isNullConstant(Src.getOperand(1)) &&
17368 Src.getOperand(0).getValueType().isScalableVector()) {
17369 EVT VT = N->getValueType(0);
17370 EVT SrcVT = Src.getOperand(0).getValueType();
17372 // Widths match, just return the original vector.
17373 if (SrcVT == VT)
17374 return Src.getOperand(0);
17375 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17376 }
17377 [[fallthrough]];
17378 }
17379 case RISCVISD::VMV_S_X_VL: {
17380 const MVT VT = N->getSimpleValueType(0);
17381 SDValue Passthru = N->getOperand(0);
17382 SDValue Scalar = N->getOperand(1);
17383 SDValue VL = N->getOperand(2);
17384
17385 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17386 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17387 return Scalar.getOperand(0);
17388
17389 // Use M1 or smaller to avoid over constraining register allocation
17390 const MVT M1VT = getLMUL1VT(VT);
17391 if (M1VT.bitsLT(VT)) {
17392 SDValue M1Passthru =
17393 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17394 DAG.getVectorIdxConstant(0, DL));
17395 SDValue Result =
17396 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17397 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17398 DAG.getVectorIdxConstant(0, DL));
17399 return Result;
17400 }
17401
17402 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17403 // higher would involve overly constraining the register allocator for
17404 // no purpose.
17405 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17406 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17407 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17408 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17409
17410 break;
17411 }
17412 case RISCVISD::VMV_X_S: {
17413 SDValue Vec = N->getOperand(0);
17414 MVT VecVT = N->getOperand(0).getSimpleValueType();
17415 const MVT M1VT = getLMUL1VT(VecVT);
17416 if (M1VT.bitsLT(VecVT)) {
17417 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17418 DAG.getVectorIdxConstant(0, DL));
17419 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17420 }
17421 break;
17422 }
17426 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17427 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17428 switch (IntNo) {
17429 // By default we do not combine any intrinsic.
17430 default:
17431 return SDValue();
17432 case Intrinsic::riscv_vcpop:
17433 case Intrinsic::riscv_vcpop_mask:
17434 case Intrinsic::riscv_vfirst:
17435 case Intrinsic::riscv_vfirst_mask: {
17436 SDValue VL = N->getOperand(2);
17437 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17438 IntNo == Intrinsic::riscv_vfirst_mask)
17439 VL = N->getOperand(3);
17440 if (!isNullConstant(VL))
17441 return SDValue();
17442 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17443 SDLoc DL(N);
17444 EVT VT = N->getValueType(0);
17445 if (IntNo == Intrinsic::riscv_vfirst ||
17446 IntNo == Intrinsic::riscv_vfirst_mask)
17447 return DAG.getConstant(-1, DL, VT);
17448 return DAG.getConstant(0, DL, VT);
17449 }
17450 }
17451 }
17452 case ISD::BITCAST: {
17454 SDValue N0 = N->getOperand(0);
17455 EVT VT = N->getValueType(0);
17456 EVT SrcVT = N0.getValueType();
17457 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17458 // type, widen both sides to avoid a trip through memory.
17459 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17460 VT.isScalarInteger()) {
17461 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17462 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17463 Ops[0] = N0;
17464 SDLoc DL(N);
17465 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17466 N0 = DAG.getBitcast(MVT::i8, N0);
17467 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17468 }
17469
17470 return SDValue();
17471 }
17472 }
17473
17474 return SDValue();
17475}
17476
17478 EVT XVT, unsigned KeptBits) const {
17479 // For vectors, we don't have a preference..
17480 if (XVT.isVector())
17481 return false;
17482
17483 if (XVT != MVT::i32 && XVT != MVT::i64)
17484 return false;
17485
17486 // We can use sext.w for RV64 or an srai 31 on RV32.
17487 if (KeptBits == 32 || KeptBits == 64)
17488 return true;
17489
17490 // With Zbb we can use sext.h/sext.b.
17491 return Subtarget.hasStdExtZbb() &&
17492 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17493 KeptBits == 16);
17494}
17495
17497 const SDNode *N, CombineLevel Level) const {
17498 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17499 N->getOpcode() == ISD::SRL) &&
17500 "Expected shift op");
17501
17502 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17503 // materialised in fewer instructions than `(OP _, c1)`:
17504 //
17505 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17506 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17507 SDValue N0 = N->getOperand(0);
17508 EVT Ty = N0.getValueType();
17509 if (Ty.isScalarInteger() &&
17510 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17511 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17512 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17513 if (C1 && C2) {
17514 const APInt &C1Int = C1->getAPIntValue();
17515 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17516
17517 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17518 // and the combine should happen, to potentially allow further combines
17519 // later.
17520 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17521 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17522 return true;
17523
17524 // We can materialise `c1` in an add immediate, so it's "free", and the
17525 // combine should be prevented.
17526 if (C1Int.getSignificantBits() <= 64 &&
17528 return false;
17529
17530 // Neither constant will fit into an immediate, so find materialisation
17531 // costs.
17532 int C1Cost =
17533 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17534 /*CompressionCost*/ true);
17535 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17536 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17537 /*CompressionCost*/ true);
17538
17539 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17540 // combine should be prevented.
17541 if (C1Cost < ShiftedC1Cost)
17542 return false;
17543 }
17544 }
17545 return true;
17546}
17547
17549 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17550 TargetLoweringOpt &TLO) const {
17551 // Delay this optimization as late as possible.
17552 if (!TLO.LegalOps)
17553 return false;
17554
17555 EVT VT = Op.getValueType();
17556 if (VT.isVector())
17557 return false;
17558
17559 unsigned Opcode = Op.getOpcode();
17560 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17561 return false;
17562
17563 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17564 if (!C)
17565 return false;
17566
17567 const APInt &Mask = C->getAPIntValue();
17568
17569 // Clear all non-demanded bits initially.
17570 APInt ShrunkMask = Mask & DemandedBits;
17571
17572 // Try to make a smaller immediate by setting undemanded bits.
17573
17574 APInt ExpandedMask = Mask | ~DemandedBits;
17575
17576 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17577 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17578 };
17579 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17580 if (NewMask == Mask)
17581 return true;
17582 SDLoc DL(Op);
17583 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17584 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17585 Op.getOperand(0), NewC);
17586 return TLO.CombineTo(Op, NewOp);
17587 };
17588
17589 // If the shrunk mask fits in sign extended 12 bits, let the target
17590 // independent code apply it.
17591 if (ShrunkMask.isSignedIntN(12))
17592 return false;
17593
17594 // And has a few special cases for zext.
17595 if (Opcode == ISD::AND) {
17596 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17597 // otherwise use SLLI + SRLI.
17598 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17599 if (IsLegalMask(NewMask))
17600 return UseMask(NewMask);
17601
17602 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17603 if (VT == MVT::i64) {
17604 APInt NewMask = APInt(64, 0xffffffff);
17605 if (IsLegalMask(NewMask))
17606 return UseMask(NewMask);
17607 }
17608 }
17609
17610 // For the remaining optimizations, we need to be able to make a negative
17611 // number through a combination of mask and undemanded bits.
17612 if (!ExpandedMask.isNegative())
17613 return false;
17614
17615 // What is the fewest number of bits we need to represent the negative number.
17616 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17617
17618 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17619 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17620 // If we can't create a simm12, we shouldn't change opaque constants.
17621 APInt NewMask = ShrunkMask;
17622 if (MinSignedBits <= 12)
17623 NewMask.setBitsFrom(11);
17624 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17625 NewMask.setBitsFrom(31);
17626 else
17627 return false;
17628
17629 // Check that our new mask is a subset of the demanded mask.
17630 assert(IsLegalMask(NewMask));
17631 return UseMask(NewMask);
17632}
17633
17634static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17635 static const uint64_t GREVMasks[] = {
17636 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17637 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17638
17639 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17640 unsigned Shift = 1 << Stage;
17641 if (ShAmt & Shift) {
17642 uint64_t Mask = GREVMasks[Stage];
17643 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17644 if (IsGORC)
17645 Res |= x;
17646 x = Res;
17647 }
17648 }
17649
17650 return x;
17651}
17652
17654 KnownBits &Known,
17655 const APInt &DemandedElts,
17656 const SelectionDAG &DAG,
17657 unsigned Depth) const {
17658 unsigned BitWidth = Known.getBitWidth();
17659 unsigned Opc = Op.getOpcode();
17660 assert((Opc >= ISD::BUILTIN_OP_END ||
17661 Opc == ISD::INTRINSIC_WO_CHAIN ||
17662 Opc == ISD::INTRINSIC_W_CHAIN ||
17663 Opc == ISD::INTRINSIC_VOID) &&
17664 "Should use MaskedValueIsZero if you don't know whether Op"
17665 " is a target node!");
17666
17667 Known.resetAll();
17668 switch (Opc) {
17669 default: break;
17670 case RISCVISD::SELECT_CC: {
17671 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17672 // If we don't know any bits, early out.
17673 if (Known.isUnknown())
17674 break;
17675 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17676
17677 // Only known if known in both the LHS and RHS.
17678 Known = Known.intersectWith(Known2);
17679 break;
17680 }
17683 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17684 // Result is either all zero or operand 0. We can propagate zeros, but not
17685 // ones.
17686 Known.One.clearAllBits();
17687 break;
17688 case RISCVISD::REMUW: {
17689 KnownBits Known2;
17690 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17691 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17692 // We only care about the lower 32 bits.
17693 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17694 // Restore the original width by sign extending.
17695 Known = Known.sext(BitWidth);
17696 break;
17697 }
17698 case RISCVISD::DIVUW: {
17699 KnownBits Known2;
17700 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17701 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17702 // We only care about the lower 32 bits.
17703 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17704 // Restore the original width by sign extending.
17705 Known = Known.sext(BitWidth);
17706 break;
17707 }
17708 case RISCVISD::SLLW: {
17709 KnownBits Known2;
17710 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17711 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17712 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17713 // Restore the original width by sign extending.
17714 Known = Known.sext(BitWidth);
17715 break;
17716 }
17717 case RISCVISD::CTZW: {
17718 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17719 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17720 unsigned LowBits = llvm::bit_width(PossibleTZ);
17721 Known.Zero.setBitsFrom(LowBits);
17722 break;
17723 }
17724 case RISCVISD::CLZW: {
17725 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17726 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17727 unsigned LowBits = llvm::bit_width(PossibleLZ);
17728 Known.Zero.setBitsFrom(LowBits);
17729 break;
17730 }
17731 case RISCVISD::BREV8:
17732 case RISCVISD::ORC_B: {
17733 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17734 // control value of 7 is equivalent to brev8 and orc.b.
17735 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17736 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17737 // To compute zeros, we need to invert the value and invert it back after.
17738 Known.Zero =
17739 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17740 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17741 break;
17742 }
17743 case RISCVISD::READ_VLENB: {
17744 // We can use the minimum and maximum VLEN values to bound VLENB. We
17745 // know VLEN must be a power of two.
17746 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17747 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17748 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17749 Known.Zero.setLowBits(Log2_32(MinVLenB));
17750 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17751 if (MaxVLenB == MinVLenB)
17752 Known.One.setBit(Log2_32(MinVLenB));
17753 break;
17754 }
17755 case RISCVISD::FCLASS: {
17756 // fclass will only set one of the low 10 bits.
17757 Known.Zero.setBitsFrom(10);
17758 break;
17759 }
17762 unsigned IntNo =
17763 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17764 switch (IntNo) {
17765 default:
17766 // We can't do anything for most intrinsics.
17767 break;
17768 case Intrinsic::riscv_vsetvli:
17769 case Intrinsic::riscv_vsetvlimax: {
17770 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17771 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17772 RISCVII::VLMUL VLMUL =
17773 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17774 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17775 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17776 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17777 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17778
17779 // Result of vsetvli must be not larger than AVL.
17780 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17781 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17782
17783 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17784 if (BitWidth > KnownZeroFirstBit)
17785 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17786 break;
17787 }
17788 }
17789 break;
17790 }
17791 }
17792}
17793
17795 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17796 unsigned Depth) const {
17797 switch (Op.getOpcode()) {
17798 default:
17799 break;
17800 case RISCVISD::SELECT_CC: {
17801 unsigned Tmp =
17802 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17803 if (Tmp == 1) return 1; // Early out.
17804 unsigned Tmp2 =
17805 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17806 return std::min(Tmp, Tmp2);
17807 }
17810 // Output is either all zero or operand 0. We can propagate sign bit count
17811 // from operand 0.
17812 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17813 case RISCVISD::ABSW: {
17814 // We expand this at isel to negw+max. The result will have 33 sign bits
17815 // if the input has at least 33 sign bits.
17816 unsigned Tmp =
17817 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17818 if (Tmp < 33) return 1;
17819 return 33;
17820 }
17821 case RISCVISD::SLLW:
17822 case RISCVISD::SRAW:
17823 case RISCVISD::SRLW:
17824 case RISCVISD::DIVW:
17825 case RISCVISD::DIVUW:
17826 case RISCVISD::REMUW:
17827 case RISCVISD::ROLW:
17828 case RISCVISD::RORW:
17833 // TODO: As the result is sign-extended, this is conservatively correct. A
17834 // more precise answer could be calculated for SRAW depending on known
17835 // bits in the shift amount.
17836 return 33;
17837 case RISCVISD::VMV_X_S: {
17838 // The number of sign bits of the scalar result is computed by obtaining the
17839 // element type of the input vector operand, subtracting its width from the
17840 // XLEN, and then adding one (sign bit within the element type). If the
17841 // element type is wider than XLen, the least-significant XLEN bits are
17842 // taken.
17843 unsigned XLen = Subtarget.getXLen();
17844 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17845 if (EltBits <= XLen)
17846 return XLen - EltBits + 1;
17847 break;
17848 }
17850 unsigned IntNo = Op.getConstantOperandVal(1);
17851 switch (IntNo) {
17852 default:
17853 break;
17854 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17855 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17856 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17857 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17858 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17859 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17860 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17861 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17862 case Intrinsic::riscv_masked_cmpxchg_i64:
17863 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17864 // narrow atomic operation. These are implemented using atomic
17865 // operations at the minimum supported atomicrmw/cmpxchg width whose
17866 // result is then sign extended to XLEN. With +A, the minimum width is
17867 // 32 for both 64 and 32.
17868 assert(Subtarget.getXLen() == 64);
17870 assert(Subtarget.hasStdExtA());
17871 return 33;
17872 }
17873 break;
17874 }
17875 }
17876
17877 return 1;
17878}
17879
17881 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17882 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17883
17884 // TODO: Add more target nodes.
17885 switch (Op.getOpcode()) {
17887 // Integer select_cc cannot create poison.
17888 // TODO: What are the FP poison semantics?
17889 // TODO: This instruction blocks poison from the unselected operand, can
17890 // we do anything with that?
17891 return !Op.getValueType().isInteger();
17892 }
17894 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17895}
17896
17897const Constant *
17899 assert(Ld && "Unexpected null LoadSDNode");
17900 if (!ISD::isNormalLoad(Ld))
17901 return nullptr;
17902
17903 SDValue Ptr = Ld->getBasePtr();
17904
17905 // Only constant pools with no offset are supported.
17906 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17907 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17908 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17909 CNode->getOffset() != 0)
17910 return nullptr;
17911
17912 return CNode;
17913 };
17914
17915 // Simple case, LLA.
17916 if (Ptr.getOpcode() == RISCVISD::LLA) {
17917 auto *CNode = GetSupportedConstantPool(Ptr);
17918 if (!CNode || CNode->getTargetFlags() != 0)
17919 return nullptr;
17920
17921 return CNode->getConstVal();
17922 }
17923
17924 // Look for a HI and ADD_LO pair.
17925 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17926 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17927 return nullptr;
17928
17929 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17930 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17931
17932 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17933 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17934 return nullptr;
17935
17936 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17937 return nullptr;
17938
17939 return CNodeLo->getConstVal();
17940}
17941
17943 MachineBasicBlock *BB) {
17944 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17945
17946 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17947 // Should the count have wrapped while it was being read, we need to try
17948 // again.
17949 // For example:
17950 // ```
17951 // read:
17952 // csrrs x3, counterh # load high word of counter
17953 // csrrs x2, counter # load low word of counter
17954 // csrrs x4, counterh # load high word of counter
17955 // bne x3, x4, read # check if high word reads match, otherwise try again
17956 // ```
17957
17958 MachineFunction &MF = *BB->getParent();
17959 const BasicBlock *LLVMBB = BB->getBasicBlock();
17961
17962 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17963 MF.insert(It, LoopMBB);
17964
17965 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17966 MF.insert(It, DoneMBB);
17967
17968 // Transfer the remainder of BB and its successor edges to DoneMBB.
17969 DoneMBB->splice(DoneMBB->begin(), BB,
17970 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17972
17973 BB->addSuccessor(LoopMBB);
17974
17976 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17977 Register LoReg = MI.getOperand(0).getReg();
17978 Register HiReg = MI.getOperand(1).getReg();
17979 int64_t LoCounter = MI.getOperand(2).getImm();
17980 int64_t HiCounter = MI.getOperand(3).getImm();
17981 DebugLoc DL = MI.getDebugLoc();
17982
17984 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17985 .addImm(HiCounter)
17986 .addReg(RISCV::X0);
17987 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17988 .addImm(LoCounter)
17989 .addReg(RISCV::X0);
17990 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17991 .addImm(HiCounter)
17992 .addReg(RISCV::X0);
17993
17994 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17995 .addReg(HiReg)
17996 .addReg(ReadAgainReg)
17997 .addMBB(LoopMBB);
17998
17999 LoopMBB->addSuccessor(LoopMBB);
18000 LoopMBB->addSuccessor(DoneMBB);
18001
18002 MI.eraseFromParent();
18003
18004 return DoneMBB;
18005}
18006
18009 const RISCVSubtarget &Subtarget) {
18010 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18011
18012 MachineFunction &MF = *BB->getParent();
18013 DebugLoc DL = MI.getDebugLoc();
18016 Register LoReg = MI.getOperand(0).getReg();
18017 Register HiReg = MI.getOperand(1).getReg();
18018 Register SrcReg = MI.getOperand(2).getReg();
18019
18020 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18021 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18022
18023 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18024 RI, Register());
18026 MachineMemOperand *MMOLo =
18030 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18031 .addFrameIndex(FI)
18032 .addImm(0)
18033 .addMemOperand(MMOLo);
18034 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18035 .addFrameIndex(FI)
18036 .addImm(4)
18037 .addMemOperand(MMOHi);
18038 MI.eraseFromParent(); // The pseudo instruction is gone now.
18039 return BB;
18040}
18041
18044 const RISCVSubtarget &Subtarget) {
18045 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18046 "Unexpected instruction");
18047
18048 MachineFunction &MF = *BB->getParent();
18049 DebugLoc DL = MI.getDebugLoc();
18052 Register DstReg = MI.getOperand(0).getReg();
18053 Register LoReg = MI.getOperand(1).getReg();
18054 Register HiReg = MI.getOperand(2).getReg();
18055
18056 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18057 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18058
18060 MachineMemOperand *MMOLo =
18064 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18065 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
18066 .addFrameIndex(FI)
18067 .addImm(0)
18068 .addMemOperand(MMOLo);
18069 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18070 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
18071 .addFrameIndex(FI)
18072 .addImm(4)
18073 .addMemOperand(MMOHi);
18074 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
18075 MI.eraseFromParent(); // The pseudo instruction is gone now.
18076 return BB;
18077}
18078
18080 switch (MI.getOpcode()) {
18081 default:
18082 return false;
18083 case RISCV::Select_GPR_Using_CC_GPR:
18084 case RISCV::Select_GPR_Using_CC_Imm:
18085 case RISCV::Select_FPR16_Using_CC_GPR:
18086 case RISCV::Select_FPR16INX_Using_CC_GPR:
18087 case RISCV::Select_FPR32_Using_CC_GPR:
18088 case RISCV::Select_FPR32INX_Using_CC_GPR:
18089 case RISCV::Select_FPR64_Using_CC_GPR:
18090 case RISCV::Select_FPR64INX_Using_CC_GPR:
18091 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18092 return true;
18093 }
18094}
18095
18097 unsigned RelOpcode, unsigned EqOpcode,
18098 const RISCVSubtarget &Subtarget) {
18099 DebugLoc DL = MI.getDebugLoc();
18100 Register DstReg = MI.getOperand(0).getReg();
18101 Register Src1Reg = MI.getOperand(1).getReg();
18102 Register Src2Reg = MI.getOperand(2).getReg();
18104 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18106
18107 // Save the current FFLAGS.
18108 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
18109
18110 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
18111 .addReg(Src1Reg)
18112 .addReg(Src2Reg);
18115
18116 // Restore the FFLAGS.
18117 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18118 .addReg(SavedFFlags, RegState::Kill);
18119
18120 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
18121 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
18122 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
18123 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
18126
18127 // Erase the pseudoinstruction.
18128 MI.eraseFromParent();
18129 return BB;
18130}
18131
18132static MachineBasicBlock *
18134 MachineBasicBlock *ThisMBB,
18135 const RISCVSubtarget &Subtarget) {
18136 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
18137 // Without this, custom-inserter would have generated:
18138 //
18139 // A
18140 // | \
18141 // | B
18142 // | /
18143 // C
18144 // | \
18145 // | D
18146 // | /
18147 // E
18148 //
18149 // A: X = ...; Y = ...
18150 // B: empty
18151 // C: Z = PHI [X, A], [Y, B]
18152 // D: empty
18153 // E: PHI [X, C], [Z, D]
18154 //
18155 // If we lower both Select_FPRX_ in a single step, we can instead generate:
18156 //
18157 // A
18158 // | \
18159 // | C
18160 // | /|
18161 // |/ |
18162 // | |
18163 // | D
18164 // | /
18165 // E
18166 //
18167 // A: X = ...; Y = ...
18168 // D: empty
18169 // E: PHI [X, A], [X, C], [Y, D]
18170
18171 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18172 const DebugLoc &DL = First.getDebugLoc();
18173 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
18174 MachineFunction *F = ThisMBB->getParent();
18175 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
18176 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
18177 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
18178 MachineFunction::iterator It = ++ThisMBB->getIterator();
18179 F->insert(It, FirstMBB);
18180 F->insert(It, SecondMBB);
18181 F->insert(It, SinkMBB);
18182
18183 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
18184 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
18186 ThisMBB->end());
18187 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
18188
18189 // Fallthrough block for ThisMBB.
18190 ThisMBB->addSuccessor(FirstMBB);
18191 // Fallthrough block for FirstMBB.
18192 FirstMBB->addSuccessor(SecondMBB);
18193 ThisMBB->addSuccessor(SinkMBB);
18194 FirstMBB->addSuccessor(SinkMBB);
18195 // This is fallthrough.
18196 SecondMBB->addSuccessor(SinkMBB);
18197
18198 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
18199 Register FLHS = First.getOperand(1).getReg();
18200 Register FRHS = First.getOperand(2).getReg();
18201 // Insert appropriate branch.
18202 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
18203 .addReg(FLHS)
18204 .addReg(FRHS)
18205 .addMBB(SinkMBB);
18206
18207 Register SLHS = Second.getOperand(1).getReg();
18208 Register SRHS = Second.getOperand(2).getReg();
18209 Register Op1Reg4 = First.getOperand(4).getReg();
18210 Register Op1Reg5 = First.getOperand(5).getReg();
18211
18212 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
18213 // Insert appropriate branch.
18214 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
18215 .addReg(SLHS)
18216 .addReg(SRHS)
18217 .addMBB(SinkMBB);
18218
18219 Register DestReg = Second.getOperand(0).getReg();
18220 Register Op2Reg4 = Second.getOperand(4).getReg();
18221 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
18222 .addReg(Op2Reg4)
18223 .addMBB(ThisMBB)
18224 .addReg(Op1Reg4)
18225 .addMBB(FirstMBB)
18226 .addReg(Op1Reg5)
18227 .addMBB(SecondMBB);
18228
18229 // Now remove the Select_FPRX_s.
18230 First.eraseFromParent();
18231 Second.eraseFromParent();
18232 return SinkMBB;
18233}
18234
18237 const RISCVSubtarget &Subtarget) {
18238 // To "insert" Select_* instructions, we actually have to insert the triangle
18239 // control-flow pattern. The incoming instructions know the destination vreg
18240 // to set, the condition code register to branch on, the true/false values to
18241 // select between, and the condcode to use to select the appropriate branch.
18242 //
18243 // We produce the following control flow:
18244 // HeadMBB
18245 // | \
18246 // | IfFalseMBB
18247 // | /
18248 // TailMBB
18249 //
18250 // When we find a sequence of selects we attempt to optimize their emission
18251 // by sharing the control flow. Currently we only handle cases where we have
18252 // multiple selects with the exact same condition (same LHS, RHS and CC).
18253 // The selects may be interleaved with other instructions if the other
18254 // instructions meet some requirements we deem safe:
18255 // - They are not pseudo instructions.
18256 // - They are debug instructions. Otherwise,
18257 // - They do not have side-effects, do not access memory and their inputs do
18258 // not depend on the results of the select pseudo-instructions.
18259 // The TrueV/FalseV operands of the selects cannot depend on the result of
18260 // previous selects in the sequence.
18261 // These conditions could be further relaxed. See the X86 target for a
18262 // related approach and more information.
18263 //
18264 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18265 // is checked here and handled by a separate function -
18266 // EmitLoweredCascadedSelect.
18267
18268 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18269 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18270 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18271 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18272 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18273 Next->getOperand(5).isKill())
18274 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18275
18276 Register LHS = MI.getOperand(1).getReg();
18277 Register RHS;
18278 if (MI.getOperand(2).isReg())
18279 RHS = MI.getOperand(2).getReg();
18280 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18281
18282 SmallVector<MachineInstr *, 4> SelectDebugValues;
18283 SmallSet<Register, 4> SelectDests;
18284 SelectDests.insert(MI.getOperand(0).getReg());
18285
18286 MachineInstr *LastSelectPseudo = &MI;
18287 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18288 SequenceMBBI != E; ++SequenceMBBI) {
18289 if (SequenceMBBI->isDebugInstr())
18290 continue;
18291 if (isSelectPseudo(*SequenceMBBI)) {
18292 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18293 !SequenceMBBI->getOperand(2).isReg() ||
18294 SequenceMBBI->getOperand(2).getReg() != RHS ||
18295 SequenceMBBI->getOperand(3).getImm() != CC ||
18296 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18297 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18298 break;
18299 LastSelectPseudo = &*SequenceMBBI;
18300 SequenceMBBI->collectDebugValues(SelectDebugValues);
18301 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18302 continue;
18303 }
18304 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18305 SequenceMBBI->mayLoadOrStore() ||
18306 SequenceMBBI->usesCustomInsertionHook())
18307 break;
18308 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18309 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18310 }))
18311 break;
18312 }
18313
18314 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18315 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18316 DebugLoc DL = MI.getDebugLoc();
18318
18319 MachineBasicBlock *HeadMBB = BB;
18320 MachineFunction *F = BB->getParent();
18321 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18322 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18323
18324 F->insert(I, IfFalseMBB);
18325 F->insert(I, TailMBB);
18326
18327 // Set the call frame size on entry to the new basic blocks.
18328 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
18329 IfFalseMBB->setCallFrameSize(CallFrameSize);
18330 TailMBB->setCallFrameSize(CallFrameSize);
18331
18332 // Transfer debug instructions associated with the selects to TailMBB.
18333 for (MachineInstr *DebugInstr : SelectDebugValues) {
18334 TailMBB->push_back(DebugInstr->removeFromParent());
18335 }
18336
18337 // Move all instructions after the sequence to TailMBB.
18338 TailMBB->splice(TailMBB->end(), HeadMBB,
18339 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18340 // Update machine-CFG edges by transferring all successors of the current
18341 // block to the new block which will contain the Phi nodes for the selects.
18342 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18343 // Set the successors for HeadMBB.
18344 HeadMBB->addSuccessor(IfFalseMBB);
18345 HeadMBB->addSuccessor(TailMBB);
18346
18347 // Insert appropriate branch.
18348 if (MI.getOperand(2).isImm())
18349 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18350 .addReg(LHS)
18351 .addImm(MI.getOperand(2).getImm())
18352 .addMBB(TailMBB);
18353 else
18354 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18355 .addReg(LHS)
18356 .addReg(RHS)
18357 .addMBB(TailMBB);
18358
18359 // IfFalseMBB just falls through to TailMBB.
18360 IfFalseMBB->addSuccessor(TailMBB);
18361
18362 // Create PHIs for all of the select pseudo-instructions.
18363 auto SelectMBBI = MI.getIterator();
18364 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18365 auto InsertionPoint = TailMBB->begin();
18366 while (SelectMBBI != SelectEnd) {
18367 auto Next = std::next(SelectMBBI);
18368 if (isSelectPseudo(*SelectMBBI)) {
18369 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18370 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18371 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18372 .addReg(SelectMBBI->getOperand(4).getReg())
18373 .addMBB(HeadMBB)
18374 .addReg(SelectMBBI->getOperand(5).getReg())
18375 .addMBB(IfFalseMBB);
18376 SelectMBBI->eraseFromParent();
18377 }
18378 SelectMBBI = Next;
18379 }
18380
18381 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18382 return TailMBB;
18383}
18384
18385// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18386static const RISCV::RISCVMaskedPseudoInfo *
18387lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18389 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18390 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18392 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18393 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18394 return Masked;
18395}
18396
18399 unsigned CVTXOpc) {
18400 DebugLoc DL = MI.getDebugLoc();
18401
18403
18405 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18406
18407 // Save the old value of FFLAGS.
18408 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18409
18410 assert(MI.getNumOperands() == 7);
18411
18412 // Emit a VFCVT_X_F
18413 const TargetRegisterInfo *TRI =
18415 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18416 Register Tmp = MRI.createVirtualRegister(RC);
18417 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18418 .add(MI.getOperand(1))
18419 .add(MI.getOperand(2))
18420 .add(MI.getOperand(3))
18421 .add(MachineOperand::CreateImm(7)) // frm = DYN
18422 .add(MI.getOperand(4))
18423 .add(MI.getOperand(5))
18424 .add(MI.getOperand(6))
18425 .add(MachineOperand::CreateReg(RISCV::FRM,
18426 /*IsDef*/ false,
18427 /*IsImp*/ true));
18428
18429 // Emit a VFCVT_F_X
18430 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18431 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18432 // There is no E8 variant for VFCVT_F_X.
18433 assert(Log2SEW >= 4);
18434 unsigned CVTFOpc =
18435 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18436 ->MaskedPseudo;
18437
18438 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18439 .add(MI.getOperand(0))
18440 .add(MI.getOperand(1))
18441 .addReg(Tmp)
18442 .add(MI.getOperand(3))
18443 .add(MachineOperand::CreateImm(7)) // frm = DYN
18444 .add(MI.getOperand(4))
18445 .add(MI.getOperand(5))
18446 .add(MI.getOperand(6))
18447 .add(MachineOperand::CreateReg(RISCV::FRM,
18448 /*IsDef*/ false,
18449 /*IsImp*/ true));
18450
18451 // Restore FFLAGS.
18452 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18453 .addReg(SavedFFLAGS, RegState::Kill);
18454
18455 // Erase the pseudoinstruction.
18456 MI.eraseFromParent();
18457 return BB;
18458}
18459
18461 const RISCVSubtarget &Subtarget) {
18462 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18463 const TargetRegisterClass *RC;
18464 switch (MI.getOpcode()) {
18465 default:
18466 llvm_unreachable("Unexpected opcode");
18467 case RISCV::PseudoFROUND_H:
18468 CmpOpc = RISCV::FLT_H;
18469 F2IOpc = RISCV::FCVT_W_H;
18470 I2FOpc = RISCV::FCVT_H_W;
18471 FSGNJOpc = RISCV::FSGNJ_H;
18472 FSGNJXOpc = RISCV::FSGNJX_H;
18473 RC = &RISCV::FPR16RegClass;
18474 break;
18475 case RISCV::PseudoFROUND_H_INX:
18476 CmpOpc = RISCV::FLT_H_INX;
18477 F2IOpc = RISCV::FCVT_W_H_INX;
18478 I2FOpc = RISCV::FCVT_H_W_INX;
18479 FSGNJOpc = RISCV::FSGNJ_H_INX;
18480 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18481 RC = &RISCV::GPRF16RegClass;
18482 break;
18483 case RISCV::PseudoFROUND_S:
18484 CmpOpc = RISCV::FLT_S;
18485 F2IOpc = RISCV::FCVT_W_S;
18486 I2FOpc = RISCV::FCVT_S_W;
18487 FSGNJOpc = RISCV::FSGNJ_S;
18488 FSGNJXOpc = RISCV::FSGNJX_S;
18489 RC = &RISCV::FPR32RegClass;
18490 break;
18491 case RISCV::PseudoFROUND_S_INX:
18492 CmpOpc = RISCV::FLT_S_INX;
18493 F2IOpc = RISCV::FCVT_W_S_INX;
18494 I2FOpc = RISCV::FCVT_S_W_INX;
18495 FSGNJOpc = RISCV::FSGNJ_S_INX;
18496 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18497 RC = &RISCV::GPRF32RegClass;
18498 break;
18499 case RISCV::PseudoFROUND_D:
18500 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18501 CmpOpc = RISCV::FLT_D;
18502 F2IOpc = RISCV::FCVT_L_D;
18503 I2FOpc = RISCV::FCVT_D_L;
18504 FSGNJOpc = RISCV::FSGNJ_D;
18505 FSGNJXOpc = RISCV::FSGNJX_D;
18506 RC = &RISCV::FPR64RegClass;
18507 break;
18508 case RISCV::PseudoFROUND_D_INX:
18509 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18510 CmpOpc = RISCV::FLT_D_INX;
18511 F2IOpc = RISCV::FCVT_L_D_INX;
18512 I2FOpc = RISCV::FCVT_D_L_INX;
18513 FSGNJOpc = RISCV::FSGNJ_D_INX;
18514 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18515 RC = &RISCV::GPRRegClass;
18516 break;
18517 }
18518
18519 const BasicBlock *BB = MBB->getBasicBlock();
18520 DebugLoc DL = MI.getDebugLoc();
18522
18524 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18525 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18526
18527 F->insert(I, CvtMBB);
18528 F->insert(I, DoneMBB);
18529 // Move all instructions after the sequence to DoneMBB.
18530 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18531 MBB->end());
18532 // Update machine-CFG edges by transferring all successors of the current
18533 // block to the new block which will contain the Phi nodes for the selects.
18535 // Set the successors for MBB.
18536 MBB->addSuccessor(CvtMBB);
18537 MBB->addSuccessor(DoneMBB);
18538
18539 Register DstReg = MI.getOperand(0).getReg();
18540 Register SrcReg = MI.getOperand(1).getReg();
18541 Register MaxReg = MI.getOperand(2).getReg();
18542 int64_t FRM = MI.getOperand(3).getImm();
18543
18544 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18546
18547 Register FabsReg = MRI.createVirtualRegister(RC);
18548 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18549
18550 // Compare the FP value to the max value.
18551 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18552 auto MIB =
18553 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18556
18557 // Insert branch.
18558 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18559 .addReg(CmpReg)
18560 .addReg(RISCV::X0)
18561 .addMBB(DoneMBB);
18562
18563 CvtMBB->addSuccessor(DoneMBB);
18564
18565 // Convert to integer.
18566 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18567 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18570
18571 // Convert back to FP.
18572 Register I2FReg = MRI.createVirtualRegister(RC);
18573 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18576
18577 // Restore the sign bit.
18578 Register CvtReg = MRI.createVirtualRegister(RC);
18579 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18580
18581 // Merge the results.
18582 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18583 .addReg(SrcReg)
18584 .addMBB(MBB)
18585 .addReg(CvtReg)
18586 .addMBB(CvtMBB);
18587
18588 MI.eraseFromParent();
18589 return DoneMBB;
18590}
18591
18594 MachineBasicBlock *BB) const {
18595 switch (MI.getOpcode()) {
18596 default:
18597 llvm_unreachable("Unexpected instr type to insert");
18598 case RISCV::ReadCounterWide:
18599 assert(!Subtarget.is64Bit() &&
18600 "ReadCounterWide is only to be used on riscv32");
18601 return emitReadCounterWidePseudo(MI, BB);
18602 case RISCV::Select_GPR_Using_CC_GPR:
18603 case RISCV::Select_GPR_Using_CC_Imm:
18604 case RISCV::Select_FPR16_Using_CC_GPR:
18605 case RISCV::Select_FPR16INX_Using_CC_GPR:
18606 case RISCV::Select_FPR32_Using_CC_GPR:
18607 case RISCV::Select_FPR32INX_Using_CC_GPR:
18608 case RISCV::Select_FPR64_Using_CC_GPR:
18609 case RISCV::Select_FPR64INX_Using_CC_GPR:
18610 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18611 return emitSelectPseudo(MI, BB, Subtarget);
18612 case RISCV::BuildPairF64Pseudo:
18613 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18614 case RISCV::SplitF64Pseudo:
18615 return emitSplitF64Pseudo(MI, BB, Subtarget);
18616 case RISCV::PseudoQuietFLE_H:
18617 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18618 case RISCV::PseudoQuietFLE_H_INX:
18619 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18620 case RISCV::PseudoQuietFLT_H:
18621 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18622 case RISCV::PseudoQuietFLT_H_INX:
18623 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18624 case RISCV::PseudoQuietFLE_S:
18625 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18626 case RISCV::PseudoQuietFLE_S_INX:
18627 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18628 case RISCV::PseudoQuietFLT_S:
18629 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18630 case RISCV::PseudoQuietFLT_S_INX:
18631 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18632 case RISCV::PseudoQuietFLE_D:
18633 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18634 case RISCV::PseudoQuietFLE_D_INX:
18635 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18636 case RISCV::PseudoQuietFLE_D_IN32X:
18637 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18638 Subtarget);
18639 case RISCV::PseudoQuietFLT_D:
18640 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18641 case RISCV::PseudoQuietFLT_D_INX:
18642 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18643 case RISCV::PseudoQuietFLT_D_IN32X:
18644 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18645 Subtarget);
18646
18647 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18648 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18649 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18650 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18651 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18652 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18653 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18654 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18655 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18656 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18657 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18658 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18659 case RISCV::PseudoFROUND_H:
18660 case RISCV::PseudoFROUND_H_INX:
18661 case RISCV::PseudoFROUND_S:
18662 case RISCV::PseudoFROUND_S_INX:
18663 case RISCV::PseudoFROUND_D:
18664 case RISCV::PseudoFROUND_D_INX:
18665 case RISCV::PseudoFROUND_D_IN32X:
18666 return emitFROUND(MI, BB, Subtarget);
18667 case TargetOpcode::STATEPOINT:
18668 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18669 // while jal call instruction (where statepoint will be lowered at the end)
18670 // has implicit def. This def is early-clobber as it will be set at
18671 // the moment of the call and earlier than any use is read.
18672 // Add this implicit dead def here as a workaround.
18673 MI.addOperand(*MI.getMF(),
18675 RISCV::X1, /*isDef*/ true,
18676 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18677 /*isUndef*/ false, /*isEarlyClobber*/ true));
18678 [[fallthrough]];
18679 case TargetOpcode::STACKMAP:
18680 case TargetOpcode::PATCHPOINT:
18681 if (!Subtarget.is64Bit())
18682 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18683 "supported on 64-bit targets");
18684 return emitPatchPoint(MI, BB);
18685 }
18686}
18687
18689 SDNode *Node) const {
18690 // Add FRM dependency to any instructions with dynamic rounding mode.
18691 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18692 if (Idx < 0) {
18693 // Vector pseudos have FRM index indicated by TSFlags.
18694 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18695 if (Idx < 0)
18696 return;
18697 }
18698 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18699 return;
18700 // If the instruction already reads FRM, don't add another read.
18701 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18702 return;
18703 MI.addOperand(
18704 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18705}
18706
18707// Calling Convention Implementation.
18708// The expectations for frontend ABI lowering vary from target to target.
18709// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18710// details, but this is a longer term goal. For now, we simply try to keep the
18711// role of the frontend as simple and well-defined as possible. The rules can
18712// be summarised as:
18713// * Never split up large scalar arguments. We handle them here.
18714// * If a hardfloat calling convention is being used, and the struct may be
18715// passed in a pair of registers (fp+fp, int+fp), and both registers are
18716// available, then pass as two separate arguments. If either the GPRs or FPRs
18717// are exhausted, then pass according to the rule below.
18718// * If a struct could never be passed in registers or directly in a stack
18719// slot (as it is larger than 2*XLEN and the floating point rules don't
18720// apply), then pass it using a pointer with the byval attribute.
18721// * If a struct is less than 2*XLEN, then coerce to either a two-element
18722// word-sized array or a 2*XLEN scalar (depending on alignment).
18723// * The frontend can determine whether a struct is returned by reference or
18724// not based on its size and fields. If it will be returned by reference, the
18725// frontend must modify the prototype so a pointer with the sret annotation is
18726// passed as the first argument. This is not necessary for large scalar
18727// returns.
18728// * Struct return values and varargs should be coerced to structs containing
18729// register-size fields in the same situations they would be for fixed
18730// arguments.
18731
18732static const MCPhysReg ArgFPR16s[] = {
18733 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18734 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18735};
18736static const MCPhysReg ArgFPR32s[] = {
18737 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18738 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18739};
18740static const MCPhysReg ArgFPR64s[] = {
18741 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18742 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18743};
18744// This is an interim calling convention and it may be changed in the future.
18745static const MCPhysReg ArgVRs[] = {
18746 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18747 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18748 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18749static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18750 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18751 RISCV::V20M2, RISCV::V22M2};
18752static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18753 RISCV::V20M4};
18754static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18755
18757 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18758 // the ILP32E ABI.
18759 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18760 RISCV::X13, RISCV::X14, RISCV::X15,
18761 RISCV::X16, RISCV::X17};
18762 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18763 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18764 RISCV::X13, RISCV::X14, RISCV::X15};
18765
18766 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18767 return ArrayRef(ArgEGPRs);
18768
18769 return ArrayRef(ArgIGPRs);
18770}
18771
18773 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18774 // for save-restore libcall, so we don't use them.
18775 // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
18776 static const MCPhysReg FastCCIGPRs[] = {
18777 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
18778 RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
18779
18780 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18781 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18782 RISCV::X13, RISCV::X14, RISCV::X15};
18783
18784 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18785 return ArrayRef(FastCCEGPRs);
18786
18787 return ArrayRef(FastCCIGPRs);
18788}
18789
18790// Pass a 2*XLEN argument that has been split into two XLEN values through
18791// registers or the stack as necessary.
18792static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18793 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18794 MVT ValVT2, MVT LocVT2,
18795 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18796 unsigned XLenInBytes = XLen / 8;
18797 const RISCVSubtarget &STI =
18800
18801 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18802 // At least one half can be passed via register.
18803 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18804 VA1.getLocVT(), CCValAssign::Full));
18805 } else {
18806 // Both halves must be passed on the stack, with proper alignment.
18807 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18808 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18809 Align StackAlign(XLenInBytes);
18810 if (!EABI || XLen != 32)
18811 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18812 State.addLoc(
18814 State.AllocateStack(XLenInBytes, StackAlign),
18815 VA1.getLocVT(), CCValAssign::Full));
18817 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18818 LocVT2, CCValAssign::Full));
18819 return false;
18820 }
18821
18822 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18823 // The second half can also be passed via register.
18824 State.addLoc(
18825 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18826 } else {
18827 // The second half is passed via the stack, without additional alignment.
18829 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18830 LocVT2, CCValAssign::Full));
18831 }
18832
18833 return false;
18834}
18835
18836// Implements the RISC-V calling convention. Returns true upon failure.
18837bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18838 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18839 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18840 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18841 RVVArgDispatcher &RVVDispatcher) {
18842 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18843 assert(XLen == 32 || XLen == 64);
18844 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18845
18846 // Static chain parameter must not be passed in normal argument registers,
18847 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18848 if (ArgFlags.isNest()) {
18849 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18850 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18851 return false;
18852 }
18853 }
18854
18855 // Any return value split in to more than two values can't be returned
18856 // directly. Vectors are returned via the available vector registers.
18857 if (!LocVT.isVector() && IsRet && ValNo > 1)
18858 return true;
18859
18860 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18861 // variadic argument, or if no F16/F32 argument registers are available.
18862 bool UseGPRForF16_F32 = true;
18863 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18864 // variadic argument, or if no F64 argument registers are available.
18865 bool UseGPRForF64 = true;
18866
18867 switch (ABI) {
18868 default:
18869 llvm_unreachable("Unexpected ABI");
18872 case RISCVABI::ABI_LP64:
18874 break;
18877 UseGPRForF16_F32 = !IsFixed;
18878 break;
18881 UseGPRForF16_F32 = !IsFixed;
18882 UseGPRForF64 = !IsFixed;
18883 break;
18884 }
18885
18886 // FPR16, FPR32, and FPR64 alias each other.
18887 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18888 UseGPRForF16_F32 = true;
18889 UseGPRForF64 = true;
18890 }
18891
18892 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18893 // similar local variables rather than directly checking against the target
18894 // ABI.
18895
18896 if (UseGPRForF16_F32 &&
18897 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18898 LocVT = XLenVT;
18899 LocInfo = CCValAssign::BCvt;
18900 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18901 LocVT = MVT::i64;
18902 LocInfo = CCValAssign::BCvt;
18903 }
18904
18906
18907 // If this is a variadic argument, the RISC-V calling convention requires
18908 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18909 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18910 // be used regardless of whether the original argument was split during
18911 // legalisation or not. The argument will not be passed by registers if the
18912 // original type is larger than 2*XLEN, so the register alignment rule does
18913 // not apply.
18914 // TODO: To be compatible with GCC's behaviors, we don't align registers
18915 // currently if we are using ILP32E calling convention. This behavior may be
18916 // changed when RV32E/ILP32E is ratified.
18917 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18918 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18919 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18920 ABI != RISCVABI::ABI_ILP32E) {
18921 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18922 // Skip 'odd' register if necessary.
18923 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18924 State.AllocateReg(ArgGPRs);
18925 }
18926
18927 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18928 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18929 State.getPendingArgFlags();
18930
18931 assert(PendingLocs.size() == PendingArgFlags.size() &&
18932 "PendingLocs and PendingArgFlags out of sync");
18933
18934 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18935 // registers are exhausted.
18936 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18937 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18938 // Depending on available argument GPRS, f64 may be passed in a pair of
18939 // GPRs, split between a GPR and the stack, or passed completely on the
18940 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18941 // cases.
18942 Register Reg = State.AllocateReg(ArgGPRs);
18943 if (!Reg) {
18944 unsigned StackOffset = State.AllocateStack(8, Align(8));
18945 State.addLoc(
18946 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18947 return false;
18948 }
18949 LocVT = MVT::i32;
18950 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18951 Register HiReg = State.AllocateReg(ArgGPRs);
18952 if (HiReg) {
18953 State.addLoc(
18954 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18955 } else {
18956 unsigned StackOffset = State.AllocateStack(4, Align(4));
18957 State.addLoc(
18958 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18959 }
18960 return false;
18961 }
18962
18963 // Fixed-length vectors are located in the corresponding scalable-vector
18964 // container types.
18965 if (ValVT.isFixedLengthVector())
18966 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18967
18968 // Split arguments might be passed indirectly, so keep track of the pending
18969 // values. Split vectors are passed via a mix of registers and indirectly, so
18970 // treat them as we would any other argument.
18971 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18972 LocVT = XLenVT;
18973 LocInfo = CCValAssign::Indirect;
18974 PendingLocs.push_back(
18975 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18976 PendingArgFlags.push_back(ArgFlags);
18977 if (!ArgFlags.isSplitEnd()) {
18978 return false;
18979 }
18980 }
18981
18982 // If the split argument only had two elements, it should be passed directly
18983 // in registers or on the stack.
18984 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18985 PendingLocs.size() <= 2) {
18986 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18987 // Apply the normal calling convention rules to the first half of the
18988 // split argument.
18989 CCValAssign VA = PendingLocs[0];
18990 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18991 PendingLocs.clear();
18992 PendingArgFlags.clear();
18993 return CC_RISCVAssign2XLen(
18994 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18995 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18996 }
18997
18998 // Allocate to a register if possible, or else a stack slot.
18999 Register Reg;
19000 unsigned StoreSizeBytes = XLen / 8;
19001 Align StackAlign = Align(XLen / 8);
19002
19003 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
19004 Reg = State.AllocateReg(ArgFPR16s);
19005 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
19006 Reg = State.AllocateReg(ArgFPR32s);
19007 else if (ValVT == MVT::f64 && !UseGPRForF64)
19008 Reg = State.AllocateReg(ArgFPR64s);
19009 else if (ValVT.isVector()) {
19010 Reg = RVVDispatcher.getNextPhysReg();
19011 if (!Reg) {
19012 // For return values, the vector must be passed fully via registers or
19013 // via the stack.
19014 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
19015 // but we're using all of them.
19016 if (IsRet)
19017 return true;
19018 // Try using a GPR to pass the address
19019 if ((Reg = State.AllocateReg(ArgGPRs))) {
19020 LocVT = XLenVT;
19021 LocInfo = CCValAssign::Indirect;
19022 } else if (ValVT.isScalableVector()) {
19023 LocVT = XLenVT;
19024 LocInfo = CCValAssign::Indirect;
19025 } else {
19026 // Pass fixed-length vectors on the stack.
19027 LocVT = ValVT;
19028 StoreSizeBytes = ValVT.getStoreSize();
19029 // Align vectors to their element sizes, being careful for vXi1
19030 // vectors.
19031 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
19032 }
19033 }
19034 } else {
19035 Reg = State.AllocateReg(ArgGPRs);
19036 }
19037
19038 unsigned StackOffset =
19039 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
19040
19041 // If we reach this point and PendingLocs is non-empty, we must be at the
19042 // end of a split argument that must be passed indirectly.
19043 if (!PendingLocs.empty()) {
19044 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
19045 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
19046
19047 for (auto &It : PendingLocs) {
19048 if (Reg)
19049 It.convertToReg(Reg);
19050 else
19051 It.convertToMem(StackOffset);
19052 State.addLoc(It);
19053 }
19054 PendingLocs.clear();
19055 PendingArgFlags.clear();
19056 return false;
19057 }
19058
19059 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
19060 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
19061 "Expected an XLenVT or vector types at this stage");
19062
19063 if (Reg) {
19064 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19065 return false;
19066 }
19067
19068 // When a scalar floating-point value is passed on the stack, no
19069 // bit-conversion is needed.
19070 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
19071 assert(!ValVT.isVector());
19072 LocVT = ValVT;
19073 LocInfo = CCValAssign::Full;
19074 }
19075 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19076 return false;
19077}
19078
19079template <typename ArgTy>
19080static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
19081 for (const auto &ArgIdx : enumerate(Args)) {
19082 MVT ArgVT = ArgIdx.value().VT;
19083 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
19084 return ArgIdx.index();
19085 }
19086 return std::nullopt;
19087}
19088
19089void RISCVTargetLowering::analyzeInputArgs(
19090 MachineFunction &MF, CCState &CCInfo,
19091 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19092 RISCVCCAssignFn Fn) const {
19093 unsigned NumArgs = Ins.size();
19095
19096 RVVArgDispatcher Dispatcher;
19097 if (IsRet) {
19098 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
19099 } else {
19100 SmallVector<Type *, 4> TypeList;
19101 for (const Argument &Arg : MF.getFunction().args())
19102 TypeList.push_back(Arg.getType());
19103 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
19104 }
19105
19106 for (unsigned i = 0; i != NumArgs; ++i) {
19107 MVT ArgVT = Ins[i].VT;
19108 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19109
19110 Type *ArgTy = nullptr;
19111 if (IsRet)
19112 ArgTy = FType->getReturnType();
19113 else if (Ins[i].isOrigArg())
19114 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19115
19117 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
19118 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
19119 Dispatcher)) {
19120 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19121 << ArgVT << '\n');
19122 llvm_unreachable(nullptr);
19123 }
19124 }
19125}
19126
19127void RISCVTargetLowering::analyzeOutputArgs(
19128 MachineFunction &MF, CCState &CCInfo,
19129 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19130 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19131 unsigned NumArgs = Outs.size();
19132
19133 SmallVector<Type *, 4> TypeList;
19134 if (IsRet)
19135 TypeList.push_back(MF.getFunction().getReturnType());
19136 else if (CLI)
19137 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
19138 TypeList.push_back(Arg.Ty);
19139 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
19140
19141 for (unsigned i = 0; i != NumArgs; i++) {
19142 MVT ArgVT = Outs[i].VT;
19143 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19144 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19145
19147 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
19148 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
19149 Dispatcher)) {
19150 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19151 << ArgVT << "\n");
19152 llvm_unreachable(nullptr);
19153 }
19154 }
19155}
19156
19157// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19158// values.
19160 const CCValAssign &VA, const SDLoc &DL,
19161 const RISCVSubtarget &Subtarget) {
19162 switch (VA.getLocInfo()) {
19163 default:
19164 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19165 case CCValAssign::Full:
19167 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19168 break;
19169 case CCValAssign::BCvt:
19170 if (VA.getLocVT().isInteger() &&
19171 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19172 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19173 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
19174 if (RV64LegalI32) {
19175 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
19176 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
19177 } else {
19178 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19179 }
19180 } else {
19181 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19182 }
19183 break;
19184 }
19185 return Val;
19186}
19187
19188// The caller is responsible for loading the full value if the argument is
19189// passed with CCValAssign::Indirect.
19191 const CCValAssign &VA, const SDLoc &DL,
19192 const ISD::InputArg &In,
19193 const RISCVTargetLowering &TLI) {
19196 EVT LocVT = VA.getLocVT();
19197 SDValue Val;
19198 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19199 Register VReg = RegInfo.createVirtualRegister(RC);
19200 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19201 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19202
19203 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19204 if (In.isOrigArg()) {
19205 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19206 if (OrigArg->getType()->isIntegerTy()) {
19207 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19208 // An input zero extended from i31 can also be considered sign extended.
19209 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19210 (BitWidth < 32 && In.Flags.isZExt())) {
19212 RVFI->addSExt32Register(VReg);
19213 }
19214 }
19215 }
19216
19218 return Val;
19219
19220 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19221}
19222
19224 const CCValAssign &VA, const SDLoc &DL,
19225 const RISCVSubtarget &Subtarget) {
19226 EVT LocVT = VA.getLocVT();
19227
19228 switch (VA.getLocInfo()) {
19229 default:
19230 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19231 case CCValAssign::Full:
19232 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19233 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
19234 break;
19235 case CCValAssign::BCvt:
19236 if (LocVT.isInteger() &&
19237 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19238 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19239 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
19240 if (RV64LegalI32) {
19241 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19242 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
19243 } else {
19244 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19245 }
19246 } else {
19247 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19248 }
19249 break;
19250 }
19251 return Val;
19252}
19253
19254// The caller is responsible for loading the full value if the argument is
19255// passed with CCValAssign::Indirect.
19257 const CCValAssign &VA, const SDLoc &DL) {
19259 MachineFrameInfo &MFI = MF.getFrameInfo();
19260 EVT LocVT = VA.getLocVT();
19261 EVT ValVT = VA.getValVT();
19263 if (ValVT.isScalableVector()) {
19264 // When the value is a scalable vector, we save the pointer which points to
19265 // the scalable vector value in the stack. The ValVT will be the pointer
19266 // type, instead of the scalable vector type.
19267 ValVT = LocVT;
19268 }
19269 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19270 /*IsImmutable=*/true);
19271 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19272 SDValue Val;
19273
19274 ISD::LoadExtType ExtType;
19275 switch (VA.getLocInfo()) {
19276 default:
19277 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19278 case CCValAssign::Full:
19280 case CCValAssign::BCvt:
19281 ExtType = ISD::NON_EXTLOAD;
19282 break;
19283 }
19284 Val = DAG.getExtLoad(
19285 ExtType, DL, LocVT, Chain, FIN,
19287 return Val;
19288}
19289
19291 const CCValAssign &VA,
19292 const CCValAssign &HiVA,
19293 const SDLoc &DL) {
19294 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19295 "Unexpected VA");
19297 MachineFrameInfo &MFI = MF.getFrameInfo();
19299
19300 assert(VA.isRegLoc() && "Expected register VA assignment");
19301
19302 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19303 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19304 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19305 SDValue Hi;
19306 if (HiVA.isMemLoc()) {
19307 // Second half of f64 is passed on the stack.
19308 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19309 /*IsImmutable=*/true);
19310 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19311 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19313 } else {
19314 // Second half of f64 is passed in another GPR.
19315 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19316 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19317 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19318 }
19319 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19320}
19321
19322// FastCC has less than 1% performance improvement for some particular
19323// benchmark. But theoretically, it may has benenfit for some cases.
19325 unsigned ValNo, MVT ValVT, MVT LocVT,
19326 CCValAssign::LocInfo LocInfo,
19327 ISD::ArgFlagsTy ArgFlags, CCState &State,
19328 bool IsFixed, bool IsRet, Type *OrigTy,
19329 const RISCVTargetLowering &TLI,
19330 RVVArgDispatcher &RVVDispatcher) {
19331 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19332 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19333 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19334 return false;
19335 }
19336 }
19337
19338 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
19339
19340 if (LocVT == MVT::f16 &&
19341 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
19342 static const MCPhysReg FPR16List[] = {
19343 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
19344 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
19345 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
19346 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
19347 if (unsigned Reg = State.AllocateReg(FPR16List)) {
19348 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19349 return false;
19350 }
19351 }
19352
19353 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19354 static const MCPhysReg FPR32List[] = {
19355 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
19356 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
19357 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
19358 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
19359 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19360 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19361 return false;
19362 }
19363 }
19364
19365 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19366 static const MCPhysReg FPR64List[] = {
19367 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
19368 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
19369 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
19370 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
19371 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19372 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19373 return false;
19374 }
19375 }
19376
19377 // Check if there is an available GPR before hitting the stack.
19378 if ((LocVT == MVT::f16 &&
19379 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19380 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19381 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19382 Subtarget.hasStdExtZdinx())) {
19383 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19384 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19385 return false;
19386 }
19387 }
19388
19389 if (LocVT == MVT::f16) {
19390 unsigned Offset2 = State.AllocateStack(2, Align(2));
19391 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19392 return false;
19393 }
19394
19395 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19396 unsigned Offset4 = State.AllocateStack(4, Align(4));
19397 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19398 return false;
19399 }
19400
19401 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19402 unsigned Offset5 = State.AllocateStack(8, Align(8));
19403 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19404 return false;
19405 }
19406
19407 if (LocVT.isVector()) {
19408 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19409 if (AllocatedVReg) {
19410 // Fixed-length vectors are located in the corresponding scalable-vector
19411 // container types.
19412 if (ValVT.isFixedLengthVector())
19413 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19414 State.addLoc(
19415 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19416 } else {
19417 // Try and pass the address via a "fast" GPR.
19418 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19419 LocInfo = CCValAssign::Indirect;
19420 LocVT = TLI.getSubtarget().getXLenVT();
19421 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19422 } else if (ValVT.isFixedLengthVector()) {
19423 auto StackAlign =
19425 unsigned StackOffset =
19426 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19427 State.addLoc(
19428 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19429 } else {
19430 // Can't pass scalable vectors on the stack.
19431 return true;
19432 }
19433 }
19434
19435 return false;
19436 }
19437
19438 return true; // CC didn't match.
19439}
19440
19441bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19442 CCValAssign::LocInfo LocInfo,
19443 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19444 if (ArgFlags.isNest()) {
19446 "Attribute 'nest' is not supported in GHC calling convention");
19447 }
19448
19449 static const MCPhysReg GPRList[] = {
19450 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19451 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19452
19453 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19454 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19455 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19456 if (unsigned Reg = State.AllocateReg(GPRList)) {
19457 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19458 return false;
19459 }
19460 }
19461
19462 const RISCVSubtarget &Subtarget =
19464
19465 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19466 // Pass in STG registers: F1, ..., F6
19467 // fs0 ... fs5
19468 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19469 RISCV::F18_F, RISCV::F19_F,
19470 RISCV::F20_F, RISCV::F21_F};
19471 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19472 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19473 return false;
19474 }
19475 }
19476
19477 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19478 // Pass in STG registers: D1, ..., D6
19479 // fs6 ... fs11
19480 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19481 RISCV::F24_D, RISCV::F25_D,
19482 RISCV::F26_D, RISCV::F27_D};
19483 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19484 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19485 return false;
19486 }
19487 }
19488
19489 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19490 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19491 Subtarget.is64Bit())) {
19492 if (unsigned Reg = State.AllocateReg(GPRList)) {
19493 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19494 return false;
19495 }
19496 }
19497
19498 report_fatal_error("No registers left in GHC calling convention");
19499 return true;
19500}
19501
19502// Transform physical registers into virtual registers.
19504 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19505 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19506 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19507
19509
19510 switch (CallConv) {
19511 default:
19512 report_fatal_error("Unsupported calling convention");
19513 case CallingConv::C:
19514 case CallingConv::Fast:
19516 case CallingConv::GRAAL:
19518 break;
19519 case CallingConv::GHC:
19520 if (Subtarget.hasStdExtE())
19521 report_fatal_error("GHC calling convention is not supported on RVE!");
19522 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19523 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19524 "(Zdinx/D) instruction set extensions");
19525 }
19526
19527 const Function &Func = MF.getFunction();
19528 if (Func.hasFnAttribute("interrupt")) {
19529 if (!Func.arg_empty())
19531 "Functions with the interrupt attribute cannot have arguments!");
19532
19533 StringRef Kind =
19534 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19535
19536 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19538 "Function interrupt attribute argument not supported!");
19539 }
19540
19541 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19542 MVT XLenVT = Subtarget.getXLenVT();
19543 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19544 // Used with vargs to acumulate store chains.
19545 std::vector<SDValue> OutChains;
19546
19547 // Assign locations to all of the incoming arguments.
19549 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19550
19551 if (CallConv == CallingConv::GHC)
19553 else
19554 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19556 : RISCV::CC_RISCV);
19557
19558 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19559 CCValAssign &VA = ArgLocs[i];
19560 SDValue ArgValue;
19561 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19562 // case.
19563 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19564 assert(VA.needsCustom());
19565 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19566 } else if (VA.isRegLoc())
19567 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19568 else
19569 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19570
19571 if (VA.getLocInfo() == CCValAssign::Indirect) {
19572 // If the original argument was split and passed by reference (e.g. i128
19573 // on RV32), we need to load all parts of it here (using the same
19574 // address). Vectors may be partly split to registers and partly to the
19575 // stack, in which case the base address is partly offset and subsequent
19576 // stores are relative to that.
19577 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19579 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19580 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19581 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19582 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19583 CCValAssign &PartVA = ArgLocs[i + 1];
19584 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19585 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19586 if (PartVA.getValVT().isScalableVector())
19587 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19588 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19589 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19591 ++i;
19592 ++InsIdx;
19593 }
19594 continue;
19595 }
19596 InVals.push_back(ArgValue);
19597 }
19598
19599 if (any_of(ArgLocs,
19600 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19601 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19602
19603 if (IsVarArg) {
19604 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19605 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19606 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19607 MachineFrameInfo &MFI = MF.getFrameInfo();
19608 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19610
19611 // Size of the vararg save area. For now, the varargs save area is either
19612 // zero or large enough to hold a0-a7.
19613 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19614 int FI;
19615
19616 // If all registers are allocated, then all varargs must be passed on the
19617 // stack and we don't need to save any argregs.
19618 if (VarArgsSaveSize == 0) {
19619 int VaArgOffset = CCInfo.getStackSize();
19620 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19621 } else {
19622 int VaArgOffset = -VarArgsSaveSize;
19623 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19624
19625 // If saving an odd number of registers then create an extra stack slot to
19626 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19627 // offsets to even-numbered registered remain 2*XLEN-aligned.
19628 if (Idx % 2) {
19630 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19631 VarArgsSaveSize += XLenInBytes;
19632 }
19633
19634 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19635
19636 // Copy the integer registers that may have been used for passing varargs
19637 // to the vararg save area.
19638 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19639 const Register Reg = RegInfo.createVirtualRegister(RC);
19640 RegInfo.addLiveIn(ArgRegs[I], Reg);
19641 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19642 SDValue Store = DAG.getStore(
19643 Chain, DL, ArgValue, FIN,
19644 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19645 OutChains.push_back(Store);
19646 FIN =
19647 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19648 }
19649 }
19650
19651 // Record the frame index of the first variable argument
19652 // which is a value necessary to VASTART.
19653 RVFI->setVarArgsFrameIndex(FI);
19654 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19655 }
19656
19657 // All stores are grouped in one node to allow the matching between
19658 // the size of Ins and InVals. This only happens for vararg functions.
19659 if (!OutChains.empty()) {
19660 OutChains.push_back(Chain);
19661 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19662 }
19663
19664 return Chain;
19665}
19666
19667/// isEligibleForTailCallOptimization - Check whether the call is eligible
19668/// for tail call optimization.
19669/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19670bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19671 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19672 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19673
19674 auto CalleeCC = CLI.CallConv;
19675 auto &Outs = CLI.Outs;
19676 auto &Caller = MF.getFunction();
19677 auto CallerCC = Caller.getCallingConv();
19678
19679 // Exception-handling functions need a special set of instructions to
19680 // indicate a return to the hardware. Tail-calling another function would
19681 // probably break this.
19682 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19683 // should be expanded as new function attributes are introduced.
19684 if (Caller.hasFnAttribute("interrupt"))
19685 return false;
19686
19687 // Do not tail call opt if the stack is used to pass parameters.
19688 if (CCInfo.getStackSize() != 0)
19689 return false;
19690
19691 // Do not tail call opt if any parameters need to be passed indirectly.
19692 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19693 // passed indirectly. So the address of the value will be passed in a
19694 // register, or if not available, then the address is put on the stack. In
19695 // order to pass indirectly, space on the stack often needs to be allocated
19696 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19697 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19698 // are passed CCValAssign::Indirect.
19699 for (auto &VA : ArgLocs)
19700 if (VA.getLocInfo() == CCValAssign::Indirect)
19701 return false;
19702
19703 // Do not tail call opt if either caller or callee uses struct return
19704 // semantics.
19705 auto IsCallerStructRet = Caller.hasStructRetAttr();
19706 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19707 if (IsCallerStructRet || IsCalleeStructRet)
19708 return false;
19709
19710 // The callee has to preserve all registers the caller needs to preserve.
19711 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19712 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19713 if (CalleeCC != CallerCC) {
19714 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19715 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19716 return false;
19717 }
19718
19719 // Byval parameters hand the function a pointer directly into the stack area
19720 // we want to reuse during a tail call. Working around this *is* possible
19721 // but less efficient and uglier in LowerCall.
19722 for (auto &Arg : Outs)
19723 if (Arg.Flags.isByVal())
19724 return false;
19725
19726 return true;
19727}
19728
19730 return DAG.getDataLayout().getPrefTypeAlign(
19731 VT.getTypeForEVT(*DAG.getContext()));
19732}
19733
19734// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19735// and output parameter nodes.
19737 SmallVectorImpl<SDValue> &InVals) const {
19738 SelectionDAG &DAG = CLI.DAG;
19739 SDLoc &DL = CLI.DL;
19741 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19743 SDValue Chain = CLI.Chain;
19744 SDValue Callee = CLI.Callee;
19745 bool &IsTailCall = CLI.IsTailCall;
19746 CallingConv::ID CallConv = CLI.CallConv;
19747 bool IsVarArg = CLI.IsVarArg;
19748 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19749 MVT XLenVT = Subtarget.getXLenVT();
19750
19752
19753 // Analyze the operands of the call, assigning locations to each operand.
19755 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19756
19757 if (CallConv == CallingConv::GHC) {
19758 if (Subtarget.hasStdExtE())
19759 report_fatal_error("GHC calling convention is not supported on RVE!");
19761 } else
19762 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19764 : RISCV::CC_RISCV);
19765
19766 // Check if it's really possible to do a tail call.
19767 if (IsTailCall)
19768 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19769
19770 if (IsTailCall)
19771 ++NumTailCalls;
19772 else if (CLI.CB && CLI.CB->isMustTailCall())
19773 report_fatal_error("failed to perform tail call elimination on a call "
19774 "site marked musttail");
19775
19776 // Get a count of how many bytes are to be pushed on the stack.
19777 unsigned NumBytes = ArgCCInfo.getStackSize();
19778
19779 // Create local copies for byval args
19780 SmallVector<SDValue, 8> ByValArgs;
19781 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19782 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19783 if (!Flags.isByVal())
19784 continue;
19785
19786 SDValue Arg = OutVals[i];
19787 unsigned Size = Flags.getByValSize();
19788 Align Alignment = Flags.getNonZeroByValAlign();
19789
19790 int FI =
19791 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19792 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19793 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19794
19795 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19796 /*IsVolatile=*/false,
19797 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
19799 ByValArgs.push_back(FIPtr);
19800 }
19801
19802 if (!IsTailCall)
19803 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19804
19805 // Copy argument values to their designated locations.
19807 SmallVector<SDValue, 8> MemOpChains;
19808 SDValue StackPtr;
19809 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19810 ++i, ++OutIdx) {
19811 CCValAssign &VA = ArgLocs[i];
19812 SDValue ArgValue = OutVals[OutIdx];
19813 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19814
19815 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19816 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19817 assert(VA.isRegLoc() && "Expected register VA assignment");
19818 assert(VA.needsCustom());
19819 SDValue SplitF64 = DAG.getNode(
19820 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19821 SDValue Lo = SplitF64.getValue(0);
19822 SDValue Hi = SplitF64.getValue(1);
19823
19824 Register RegLo = VA.getLocReg();
19825 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19826
19827 // Get the CCValAssign for the Hi part.
19828 CCValAssign &HiVA = ArgLocs[++i];
19829
19830 if (HiVA.isMemLoc()) {
19831 // Second half of f64 is passed on the stack.
19832 if (!StackPtr.getNode())
19833 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19835 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19836 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19837 // Emit the store.
19838 MemOpChains.push_back(
19839 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19840 } else {
19841 // Second half of f64 is passed in another GPR.
19842 Register RegHigh = HiVA.getLocReg();
19843 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19844 }
19845 continue;
19846 }
19847
19848 // Promote the value if needed.
19849 // For now, only handle fully promoted and indirect arguments.
19850 if (VA.getLocInfo() == CCValAssign::Indirect) {
19851 // Store the argument in a stack slot and pass its address.
19852 Align StackAlign =
19853 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19854 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19855 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19856 // If the original argument was split (e.g. i128), we need
19857 // to store the required parts of it here (and pass just one address).
19858 // Vectors may be partly split to registers and partly to the stack, in
19859 // which case the base address is partly offset and subsequent stores are
19860 // relative to that.
19861 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19862 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19863 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19864 // Calculate the total size to store. We don't have access to what we're
19865 // actually storing other than performing the loop and collecting the
19866 // info.
19868 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19869 SDValue PartValue = OutVals[OutIdx + 1];
19870 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19871 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19872 EVT PartVT = PartValue.getValueType();
19873 if (PartVT.isScalableVector())
19874 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19875 StoredSize += PartVT.getStoreSize();
19876 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19877 Parts.push_back(std::make_pair(PartValue, Offset));
19878 ++i;
19879 ++OutIdx;
19880 }
19881 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19882 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19883 MemOpChains.push_back(
19884 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19886 for (const auto &Part : Parts) {
19887 SDValue PartValue = Part.first;
19888 SDValue PartOffset = Part.second;
19890 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19891 MemOpChains.push_back(
19892 DAG.getStore(Chain, DL, PartValue, Address,
19894 }
19895 ArgValue = SpillSlot;
19896 } else {
19897 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19898 }
19899
19900 // Use local copy if it is a byval arg.
19901 if (Flags.isByVal())
19902 ArgValue = ByValArgs[j++];
19903
19904 if (VA.isRegLoc()) {
19905 // Queue up the argument copies and emit them at the end.
19906 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19907 } else {
19908 assert(VA.isMemLoc() && "Argument not register or memory");
19909 assert(!IsTailCall && "Tail call not allowed if stack is used "
19910 "for passing parameters");
19911
19912 // Work out the address of the stack slot.
19913 if (!StackPtr.getNode())
19914 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19916 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19918
19919 // Emit the store.
19920 MemOpChains.push_back(
19921 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19922 }
19923 }
19924
19925 // Join the stores, which are independent of one another.
19926 if (!MemOpChains.empty())
19927 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19928
19929 SDValue Glue;
19930
19931 // Build a sequence of copy-to-reg nodes, chained and glued together.
19932 for (auto &Reg : RegsToPass) {
19933 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19934 Glue = Chain.getValue(1);
19935 }
19936
19937 // Validate that none of the argument registers have been marked as
19938 // reserved, if so report an error. Do the same for the return address if this
19939 // is not a tailcall.
19940 validateCCReservedRegs(RegsToPass, MF);
19941 if (!IsTailCall &&
19944 MF.getFunction(),
19945 "Return address register required, but has been reserved."});
19946
19947 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19948 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19949 // split it and then direct call can be matched by PseudoCALL.
19950 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19951 const GlobalValue *GV = S->getGlobal();
19952 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19953 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19954 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19955 }
19956
19957 // The first call operand is the chain and the second is the target address.
19959 Ops.push_back(Chain);
19960 Ops.push_back(Callee);
19961
19962 // Add argument registers to the end of the list so that they are
19963 // known live into the call.
19964 for (auto &Reg : RegsToPass)
19965 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19966
19967 if (!IsTailCall) {
19968 // Add a register mask operand representing the call-preserved registers.
19969 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19970 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19971 assert(Mask && "Missing call preserved mask for calling convention");
19972 Ops.push_back(DAG.getRegisterMask(Mask));
19973 }
19974
19975 // Glue the call to the argument copies, if any.
19976 if (Glue.getNode())
19977 Ops.push_back(Glue);
19978
19979 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19980 "Unexpected CFI type for a direct call");
19981
19982 // Emit the call.
19983 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19984
19985 if (IsTailCall) {
19987 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19988 if (CLI.CFIType)
19989 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19990 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19991 return Ret;
19992 }
19993
19994 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19995 if (CLI.CFIType)
19996 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19997 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19998 Glue = Chain.getValue(1);
19999
20000 // Mark the end of the call, which is glued to the call itself.
20001 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20002 Glue = Chain.getValue(1);
20003
20004 // Assign locations to each value returned by this call.
20006 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20007 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
20008
20009 // Copy all of the result registers out of their specified physreg.
20010 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20011 auto &VA = RVLocs[i];
20012 // Copy the value out
20013 SDValue RetValue =
20014 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20015 // Glue the RetValue to the end of the call sequence
20016 Chain = RetValue.getValue(1);
20017 Glue = RetValue.getValue(2);
20018
20019 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20020 assert(VA.needsCustom());
20021 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20022 MVT::i32, Glue);
20023 Chain = RetValue2.getValue(1);
20024 Glue = RetValue2.getValue(2);
20025 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20026 RetValue2);
20027 }
20028
20029 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20030
20031 InVals.push_back(RetValue);
20032 }
20033
20034 return Chain;
20035}
20036
20038 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20039 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20041 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20042
20043 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
20044
20045 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20046 MVT VT = Outs[i].VT;
20047 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20048 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
20049 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
20050 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
20051 nullptr, *this, Dispatcher))
20052 return false;
20053 }
20054 return true;
20055}
20056
20057SDValue
20059 bool IsVarArg,
20061 const SmallVectorImpl<SDValue> &OutVals,
20062 const SDLoc &DL, SelectionDAG &DAG) const {
20064 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20065
20066 // Stores the assignment of the return value to a location.
20068
20069 // Info about the registers and stack slot.
20070 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20071 *DAG.getContext());
20072
20073 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20074 nullptr, RISCV::CC_RISCV);
20075
20076 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20077 report_fatal_error("GHC functions return void only");
20078
20079 SDValue Glue;
20080 SmallVector<SDValue, 4> RetOps(1, Chain);
20081
20082 // Copy the result values into the output registers.
20083 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20084 SDValue Val = OutVals[OutIdx];
20085 CCValAssign &VA = RVLocs[i];
20086 assert(VA.isRegLoc() && "Can only return in registers!");
20087
20088 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20089 // Handle returning f64 on RV32D with a soft float ABI.
20090 assert(VA.isRegLoc() && "Expected return via registers");
20091 assert(VA.needsCustom());
20092 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20093 DAG.getVTList(MVT::i32, MVT::i32), Val);
20094 SDValue Lo = SplitF64.getValue(0);
20095 SDValue Hi = SplitF64.getValue(1);
20096 Register RegLo = VA.getLocReg();
20097 Register RegHi = RVLocs[++i].getLocReg();
20098
20099 if (STI.isRegisterReservedByUser(RegLo) ||
20100 STI.isRegisterReservedByUser(RegHi))
20102 MF.getFunction(),
20103 "Return value register required, but has been reserved."});
20104
20105 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20106 Glue = Chain.getValue(1);
20107 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20108 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20109 Glue = Chain.getValue(1);
20110 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20111 } else {
20112 // Handle a 'normal' return.
20113 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20114 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20115
20116 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20118 MF.getFunction(),
20119 "Return value register required, but has been reserved."});
20120
20121 // Guarantee that all emitted copies are stuck together.
20122 Glue = Chain.getValue(1);
20123 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20124 }
20125 }
20126
20127 RetOps[0] = Chain; // Update chain.
20128
20129 // Add the glue node if we have it.
20130 if (Glue.getNode()) {
20131 RetOps.push_back(Glue);
20132 }
20133
20134 if (any_of(RVLocs,
20135 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20136 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20137
20138 unsigned RetOpc = RISCVISD::RET_GLUE;
20139 // Interrupt service routines use different return instructions.
20140 const Function &Func = DAG.getMachineFunction().getFunction();
20141 if (Func.hasFnAttribute("interrupt")) {
20142 if (!Func.getReturnType()->isVoidTy())
20144 "Functions with the interrupt attribute must have void return type!");
20145
20147 StringRef Kind =
20148 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20149
20150 if (Kind == "supervisor")
20151 RetOpc = RISCVISD::SRET_GLUE;
20152 else
20153 RetOpc = RISCVISD::MRET_GLUE;
20154 }
20155
20156 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20157}
20158
20159void RISCVTargetLowering::validateCCReservedRegs(
20160 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20161 MachineFunction &MF) const {
20162 const Function &F = MF.getFunction();
20163 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20164
20165 if (llvm::any_of(Regs, [&STI](auto Reg) {
20166 return STI.isRegisterReservedByUser(Reg.first);
20167 }))
20168 F.getContext().diagnose(DiagnosticInfoUnsupported{
20169 F, "Argument register required, but has been reserved."});
20170}
20171
20172// Check if the result of the node is only used as a return value, as
20173// otherwise we can't perform a tail-call.
20175 if (N->getNumValues() != 1)
20176 return false;
20177 if (!N->hasNUsesOfValue(1, 0))
20178 return false;
20179
20180 SDNode *Copy = *N->use_begin();
20181
20182 if (Copy->getOpcode() == ISD::BITCAST) {
20183 return isUsedByReturnOnly(Copy, Chain);
20184 }
20185
20186 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20187 // with soft float ABIs.
20188 if (Copy->getOpcode() != ISD::CopyToReg) {
20189 return false;
20190 }
20191
20192 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20193 // isn't safe to perform a tail call.
20194 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20195 return false;
20196
20197 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20198 bool HasRet = false;
20199 for (SDNode *Node : Copy->uses()) {
20200 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20201 return false;
20202 HasRet = true;
20203 }
20204 if (!HasRet)
20205 return false;
20206
20207 Chain = Copy->getOperand(0);
20208 return true;
20209}
20210
20212 return CI->isTailCall();
20213}
20214
20215const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20216#define NODE_NAME_CASE(NODE) \
20217 case RISCVISD::NODE: \
20218 return "RISCVISD::" #NODE;
20219 // clang-format off
20220 switch ((RISCVISD::NodeType)Opcode) {
20222 break;
20223 NODE_NAME_CASE(RET_GLUE)
20224 NODE_NAME_CASE(SRET_GLUE)
20225 NODE_NAME_CASE(MRET_GLUE)
20226 NODE_NAME_CASE(CALL)
20227 NODE_NAME_CASE(SELECT_CC)
20228 NODE_NAME_CASE(BR_CC)
20229 NODE_NAME_CASE(BuildPairF64)
20230 NODE_NAME_CASE(SplitF64)
20231 NODE_NAME_CASE(TAIL)
20232 NODE_NAME_CASE(ADD_LO)
20233 NODE_NAME_CASE(HI)
20234 NODE_NAME_CASE(LLA)
20235 NODE_NAME_CASE(ADD_TPREL)
20236 NODE_NAME_CASE(MULHSU)
20237 NODE_NAME_CASE(SHL_ADD)
20238 NODE_NAME_CASE(SLLW)
20239 NODE_NAME_CASE(SRAW)
20240 NODE_NAME_CASE(SRLW)
20241 NODE_NAME_CASE(DIVW)
20242 NODE_NAME_CASE(DIVUW)
20243 NODE_NAME_CASE(REMUW)
20244 NODE_NAME_CASE(ROLW)
20245 NODE_NAME_CASE(RORW)
20246 NODE_NAME_CASE(CLZW)
20247 NODE_NAME_CASE(CTZW)
20248 NODE_NAME_CASE(ABSW)
20249 NODE_NAME_CASE(FMV_H_X)
20250 NODE_NAME_CASE(FMV_X_ANYEXTH)
20251 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20252 NODE_NAME_CASE(FMV_W_X_RV64)
20253 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20254 NODE_NAME_CASE(FCVT_X)
20255 NODE_NAME_CASE(FCVT_XU)
20256 NODE_NAME_CASE(FCVT_W_RV64)
20257 NODE_NAME_CASE(FCVT_WU_RV64)
20258 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20259 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20260 NODE_NAME_CASE(FP_ROUND_BF16)
20261 NODE_NAME_CASE(FP_EXTEND_BF16)
20262 NODE_NAME_CASE(FROUND)
20263 NODE_NAME_CASE(FCLASS)
20264 NODE_NAME_CASE(FMAX)
20265 NODE_NAME_CASE(FMIN)
20266 NODE_NAME_CASE(READ_COUNTER_WIDE)
20267 NODE_NAME_CASE(BREV8)
20268 NODE_NAME_CASE(ORC_B)
20269 NODE_NAME_CASE(ZIP)
20270 NODE_NAME_CASE(UNZIP)
20271 NODE_NAME_CASE(CLMUL)
20272 NODE_NAME_CASE(CLMULH)
20273 NODE_NAME_CASE(CLMULR)
20274 NODE_NAME_CASE(MOPR)
20275 NODE_NAME_CASE(MOPRR)
20276 NODE_NAME_CASE(SHA256SIG0)
20277 NODE_NAME_CASE(SHA256SIG1)
20278 NODE_NAME_CASE(SHA256SUM0)
20279 NODE_NAME_CASE(SHA256SUM1)
20280 NODE_NAME_CASE(SM4KS)
20281 NODE_NAME_CASE(SM4ED)
20282 NODE_NAME_CASE(SM3P0)
20283 NODE_NAME_CASE(SM3P1)
20284 NODE_NAME_CASE(TH_LWD)
20285 NODE_NAME_CASE(TH_LWUD)
20286 NODE_NAME_CASE(TH_LDD)
20287 NODE_NAME_CASE(TH_SWD)
20288 NODE_NAME_CASE(TH_SDD)
20289 NODE_NAME_CASE(VMV_V_V_VL)
20290 NODE_NAME_CASE(VMV_V_X_VL)
20291 NODE_NAME_CASE(VFMV_V_F_VL)
20292 NODE_NAME_CASE(VMV_X_S)
20293 NODE_NAME_CASE(VMV_S_X_VL)
20294 NODE_NAME_CASE(VFMV_S_F_VL)
20295 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20296 NODE_NAME_CASE(READ_VLENB)
20297 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20298 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20299 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20300 NODE_NAME_CASE(VSLIDEUP_VL)
20301 NODE_NAME_CASE(VSLIDE1UP_VL)
20302 NODE_NAME_CASE(VSLIDEDOWN_VL)
20303 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20304 NODE_NAME_CASE(VFSLIDE1UP_VL)
20305 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20306 NODE_NAME_CASE(VID_VL)
20307 NODE_NAME_CASE(VFNCVT_ROD_VL)
20308 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20309 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20310 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20311 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20312 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20313 NODE_NAME_CASE(VECREDUCE_AND_VL)
20314 NODE_NAME_CASE(VECREDUCE_OR_VL)
20315 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20316 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20317 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20318 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20319 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20320 NODE_NAME_CASE(ADD_VL)
20321 NODE_NAME_CASE(AND_VL)
20322 NODE_NAME_CASE(MUL_VL)
20323 NODE_NAME_CASE(OR_VL)
20324 NODE_NAME_CASE(SDIV_VL)
20325 NODE_NAME_CASE(SHL_VL)
20326 NODE_NAME_CASE(SREM_VL)
20327 NODE_NAME_CASE(SRA_VL)
20328 NODE_NAME_CASE(SRL_VL)
20329 NODE_NAME_CASE(ROTL_VL)
20330 NODE_NAME_CASE(ROTR_VL)
20331 NODE_NAME_CASE(SUB_VL)
20332 NODE_NAME_CASE(UDIV_VL)
20333 NODE_NAME_CASE(UREM_VL)
20334 NODE_NAME_CASE(XOR_VL)
20335 NODE_NAME_CASE(AVGFLOORS_VL)
20336 NODE_NAME_CASE(AVGFLOORU_VL)
20337 NODE_NAME_CASE(AVGCEILS_VL)
20338 NODE_NAME_CASE(AVGCEILU_VL)
20339 NODE_NAME_CASE(SADDSAT_VL)
20340 NODE_NAME_CASE(UADDSAT_VL)
20341 NODE_NAME_CASE(SSUBSAT_VL)
20342 NODE_NAME_CASE(USUBSAT_VL)
20343 NODE_NAME_CASE(FADD_VL)
20344 NODE_NAME_CASE(FSUB_VL)
20345 NODE_NAME_CASE(FMUL_VL)
20346 NODE_NAME_CASE(FDIV_VL)
20347 NODE_NAME_CASE(FNEG_VL)
20348 NODE_NAME_CASE(FABS_VL)
20349 NODE_NAME_CASE(FSQRT_VL)
20350 NODE_NAME_CASE(FCLASS_VL)
20351 NODE_NAME_CASE(VFMADD_VL)
20352 NODE_NAME_CASE(VFNMADD_VL)
20353 NODE_NAME_CASE(VFMSUB_VL)
20354 NODE_NAME_CASE(VFNMSUB_VL)
20355 NODE_NAME_CASE(VFWMADD_VL)
20356 NODE_NAME_CASE(VFWNMADD_VL)
20357 NODE_NAME_CASE(VFWMSUB_VL)
20358 NODE_NAME_CASE(VFWNMSUB_VL)
20359 NODE_NAME_CASE(FCOPYSIGN_VL)
20360 NODE_NAME_CASE(SMIN_VL)
20361 NODE_NAME_CASE(SMAX_VL)
20362 NODE_NAME_CASE(UMIN_VL)
20363 NODE_NAME_CASE(UMAX_VL)
20364 NODE_NAME_CASE(BITREVERSE_VL)
20365 NODE_NAME_CASE(BSWAP_VL)
20366 NODE_NAME_CASE(CTLZ_VL)
20367 NODE_NAME_CASE(CTTZ_VL)
20368 NODE_NAME_CASE(CTPOP_VL)
20369 NODE_NAME_CASE(VFMIN_VL)
20370 NODE_NAME_CASE(VFMAX_VL)
20371 NODE_NAME_CASE(MULHS_VL)
20372 NODE_NAME_CASE(MULHU_VL)
20373 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20374 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20375 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20376 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20377 NODE_NAME_CASE(VFCVT_X_F_VL)
20378 NODE_NAME_CASE(VFCVT_XU_F_VL)
20379 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20380 NODE_NAME_CASE(SINT_TO_FP_VL)
20381 NODE_NAME_CASE(UINT_TO_FP_VL)
20382 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20383 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20384 NODE_NAME_CASE(FP_EXTEND_VL)
20385 NODE_NAME_CASE(FP_ROUND_VL)
20386 NODE_NAME_CASE(STRICT_FADD_VL)
20387 NODE_NAME_CASE(STRICT_FSUB_VL)
20388 NODE_NAME_CASE(STRICT_FMUL_VL)
20389 NODE_NAME_CASE(STRICT_FDIV_VL)
20390 NODE_NAME_CASE(STRICT_FSQRT_VL)
20391 NODE_NAME_CASE(STRICT_VFMADD_VL)
20392 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20393 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20394 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20395 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20396 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20397 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20398 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20399 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20400 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20401 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20402 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20403 NODE_NAME_CASE(STRICT_FSETCC_VL)
20404 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20405 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20406 NODE_NAME_CASE(VWMUL_VL)
20407 NODE_NAME_CASE(VWMULU_VL)
20408 NODE_NAME_CASE(VWMULSU_VL)
20409 NODE_NAME_CASE(VWADD_VL)
20410 NODE_NAME_CASE(VWADDU_VL)
20411 NODE_NAME_CASE(VWSUB_VL)
20412 NODE_NAME_CASE(VWSUBU_VL)
20413 NODE_NAME_CASE(VWADD_W_VL)
20414 NODE_NAME_CASE(VWADDU_W_VL)
20415 NODE_NAME_CASE(VWSUB_W_VL)
20416 NODE_NAME_CASE(VWSUBU_W_VL)
20417 NODE_NAME_CASE(VWSLL_VL)
20418 NODE_NAME_CASE(VFWMUL_VL)
20419 NODE_NAME_CASE(VFWADD_VL)
20420 NODE_NAME_CASE(VFWSUB_VL)
20421 NODE_NAME_CASE(VFWADD_W_VL)
20422 NODE_NAME_CASE(VFWSUB_W_VL)
20423 NODE_NAME_CASE(VWMACC_VL)
20424 NODE_NAME_CASE(VWMACCU_VL)
20425 NODE_NAME_CASE(VWMACCSU_VL)
20426 NODE_NAME_CASE(VNSRL_VL)
20427 NODE_NAME_CASE(SETCC_VL)
20428 NODE_NAME_CASE(VMERGE_VL)
20429 NODE_NAME_CASE(VMAND_VL)
20430 NODE_NAME_CASE(VMOR_VL)
20431 NODE_NAME_CASE(VMXOR_VL)
20432 NODE_NAME_CASE(VMCLR_VL)
20433 NODE_NAME_CASE(VMSET_VL)
20434 NODE_NAME_CASE(VRGATHER_VX_VL)
20435 NODE_NAME_CASE(VRGATHER_VV_VL)
20436 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20437 NODE_NAME_CASE(VSEXT_VL)
20438 NODE_NAME_CASE(VZEXT_VL)
20439 NODE_NAME_CASE(VCPOP_VL)
20440 NODE_NAME_CASE(VFIRST_VL)
20441 NODE_NAME_CASE(READ_CSR)
20442 NODE_NAME_CASE(WRITE_CSR)
20443 NODE_NAME_CASE(SWAP_CSR)
20444 NODE_NAME_CASE(CZERO_EQZ)
20445 NODE_NAME_CASE(CZERO_NEZ)
20446 NODE_NAME_CASE(SW_GUARDED_BRIND)
20447 NODE_NAME_CASE(SF_VC_XV_SE)
20448 NODE_NAME_CASE(SF_VC_IV_SE)
20449 NODE_NAME_CASE(SF_VC_VV_SE)
20450 NODE_NAME_CASE(SF_VC_FV_SE)
20451 NODE_NAME_CASE(SF_VC_XVV_SE)
20452 NODE_NAME_CASE(SF_VC_IVV_SE)
20453 NODE_NAME_CASE(SF_VC_VVV_SE)
20454 NODE_NAME_CASE(SF_VC_FVV_SE)
20455 NODE_NAME_CASE(SF_VC_XVW_SE)
20456 NODE_NAME_CASE(SF_VC_IVW_SE)
20457 NODE_NAME_CASE(SF_VC_VVW_SE)
20458 NODE_NAME_CASE(SF_VC_FVW_SE)
20459 NODE_NAME_CASE(SF_VC_V_X_SE)
20460 NODE_NAME_CASE(SF_VC_V_I_SE)
20461 NODE_NAME_CASE(SF_VC_V_XV_SE)
20462 NODE_NAME_CASE(SF_VC_V_IV_SE)
20463 NODE_NAME_CASE(SF_VC_V_VV_SE)
20464 NODE_NAME_CASE(SF_VC_V_FV_SE)
20465 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20466 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20467 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20468 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20469 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20470 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20471 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20472 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20473 }
20474 // clang-format on
20475 return nullptr;
20476#undef NODE_NAME_CASE
20477}
20478
20479/// getConstraintType - Given a constraint letter, return the type of
20480/// constraint it is for this target.
20483 if (Constraint.size() == 1) {
20484 switch (Constraint[0]) {
20485 default:
20486 break;
20487 case 'f':
20488 return C_RegisterClass;
20489 case 'I':
20490 case 'J':
20491 case 'K':
20492 return C_Immediate;
20493 case 'A':
20494 return C_Memory;
20495 case 's':
20496 case 'S': // A symbolic address
20497 return C_Other;
20498 }
20499 } else {
20500 if (Constraint == "vr" || Constraint == "vm")
20501 return C_RegisterClass;
20502 }
20503 return TargetLowering::getConstraintType(Constraint);
20504}
20505
20506std::pair<unsigned, const TargetRegisterClass *>
20508 StringRef Constraint,
20509 MVT VT) const {
20510 // First, see if this is a constraint that directly corresponds to a RISC-V
20511 // register class.
20512 if (Constraint.size() == 1) {
20513 switch (Constraint[0]) {
20514 case 'r':
20515 // TODO: Support fixed vectors up to XLen for P extension?
20516 if (VT.isVector())
20517 break;
20518 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20519 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20520 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20521 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20522 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20523 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20524 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20525 case 'f':
20526 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20527 return std::make_pair(0U, &RISCV::FPR16RegClass);
20528 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20529 return std::make_pair(0U, &RISCV::FPR32RegClass);
20530 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20531 return std::make_pair(0U, &RISCV::FPR64RegClass);
20532 break;
20533 default:
20534 break;
20535 }
20536 } else if (Constraint == "vr") {
20537 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20538 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20539 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20540 return std::make_pair(0U, RC);
20541 }
20542 } else if (Constraint == "vm") {
20543 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20544 return std::make_pair(0U, &RISCV::VMV0RegClass);
20545 }
20546
20547 // Clang will correctly decode the usage of register name aliases into their
20548 // official names. However, other frontends like `rustc` do not. This allows
20549 // users of these frontends to use the ABI names for registers in LLVM-style
20550 // register constraints.
20551 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20552 .Case("{zero}", RISCV::X0)
20553 .Case("{ra}", RISCV::X1)
20554 .Case("{sp}", RISCV::X2)
20555 .Case("{gp}", RISCV::X3)
20556 .Case("{tp}", RISCV::X4)
20557 .Case("{t0}", RISCV::X5)
20558 .Case("{t1}", RISCV::X6)
20559 .Case("{t2}", RISCV::X7)
20560 .Cases("{s0}", "{fp}", RISCV::X8)
20561 .Case("{s1}", RISCV::X9)
20562 .Case("{a0}", RISCV::X10)
20563 .Case("{a1}", RISCV::X11)
20564 .Case("{a2}", RISCV::X12)
20565 .Case("{a3}", RISCV::X13)
20566 .Case("{a4}", RISCV::X14)
20567 .Case("{a5}", RISCV::X15)
20568 .Case("{a6}", RISCV::X16)
20569 .Case("{a7}", RISCV::X17)
20570 .Case("{s2}", RISCV::X18)
20571 .Case("{s3}", RISCV::X19)
20572 .Case("{s4}", RISCV::X20)
20573 .Case("{s5}", RISCV::X21)
20574 .Case("{s6}", RISCV::X22)
20575 .Case("{s7}", RISCV::X23)
20576 .Case("{s8}", RISCV::X24)
20577 .Case("{s9}", RISCV::X25)
20578 .Case("{s10}", RISCV::X26)
20579 .Case("{s11}", RISCV::X27)
20580 .Case("{t3}", RISCV::X28)
20581 .Case("{t4}", RISCV::X29)
20582 .Case("{t5}", RISCV::X30)
20583 .Case("{t6}", RISCV::X31)
20584 .Default(RISCV::NoRegister);
20585 if (XRegFromAlias != RISCV::NoRegister)
20586 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20587
20588 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20589 // TableGen record rather than the AsmName to choose registers for InlineAsm
20590 // constraints, plus we want to match those names to the widest floating point
20591 // register type available, manually select floating point registers here.
20592 //
20593 // The second case is the ABI name of the register, so that frontends can also
20594 // use the ABI names in register constraint lists.
20595 if (Subtarget.hasStdExtF()) {
20596 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20597 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20598 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20599 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20600 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20601 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20602 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20603 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20604 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20605 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20606 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20607 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20608 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20609 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20610 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20611 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20612 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20613 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20614 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20615 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20616 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20617 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20618 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20619 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20620 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20621 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20622 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20623 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20624 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20625 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20626 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20627 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20628 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20629 .Default(RISCV::NoRegister);
20630 if (FReg != RISCV::NoRegister) {
20631 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20632 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20633 unsigned RegNo = FReg - RISCV::F0_F;
20634 unsigned DReg = RISCV::F0_D + RegNo;
20635 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20636 }
20637 if (VT == MVT::f32 || VT == MVT::Other)
20638 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20639 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20640 unsigned RegNo = FReg - RISCV::F0_F;
20641 unsigned HReg = RISCV::F0_H + RegNo;
20642 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20643 }
20644 }
20645 }
20646
20647 if (Subtarget.hasVInstructions()) {
20648 Register VReg = StringSwitch<Register>(Constraint.lower())
20649 .Case("{v0}", RISCV::V0)
20650 .Case("{v1}", RISCV::V1)
20651 .Case("{v2}", RISCV::V2)
20652 .Case("{v3}", RISCV::V3)
20653 .Case("{v4}", RISCV::V4)
20654 .Case("{v5}", RISCV::V5)
20655 .Case("{v6}", RISCV::V6)
20656 .Case("{v7}", RISCV::V7)
20657 .Case("{v8}", RISCV::V8)
20658 .Case("{v9}", RISCV::V9)
20659 .Case("{v10}", RISCV::V10)
20660 .Case("{v11}", RISCV::V11)
20661 .Case("{v12}", RISCV::V12)
20662 .Case("{v13}", RISCV::V13)
20663 .Case("{v14}", RISCV::V14)
20664 .Case("{v15}", RISCV::V15)
20665 .Case("{v16}", RISCV::V16)
20666 .Case("{v17}", RISCV::V17)
20667 .Case("{v18}", RISCV::V18)
20668 .Case("{v19}", RISCV::V19)
20669 .Case("{v20}", RISCV::V20)
20670 .Case("{v21}", RISCV::V21)
20671 .Case("{v22}", RISCV::V22)
20672 .Case("{v23}", RISCV::V23)
20673 .Case("{v24}", RISCV::V24)
20674 .Case("{v25}", RISCV::V25)
20675 .Case("{v26}", RISCV::V26)
20676 .Case("{v27}", RISCV::V27)
20677 .Case("{v28}", RISCV::V28)
20678 .Case("{v29}", RISCV::V29)
20679 .Case("{v30}", RISCV::V30)
20680 .Case("{v31}", RISCV::V31)
20681 .Default(RISCV::NoRegister);
20682 if (VReg != RISCV::NoRegister) {
20683 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20684 return std::make_pair(VReg, &RISCV::VMRegClass);
20685 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20686 return std::make_pair(VReg, &RISCV::VRRegClass);
20687 for (const auto *RC :
20688 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20689 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20690 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20691 return std::make_pair(VReg, RC);
20692 }
20693 }
20694 }
20695 }
20696
20697 std::pair<Register, const TargetRegisterClass *> Res =
20699
20700 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20701 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20702 // Subtarget into account.
20703 if (Res.second == &RISCV::GPRF16RegClass ||
20704 Res.second == &RISCV::GPRF32RegClass ||
20705 Res.second == &RISCV::GPRPairRegClass)
20706 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20707
20708 return Res;
20709}
20710
20713 // Currently only support length 1 constraints.
20714 if (ConstraintCode.size() == 1) {
20715 switch (ConstraintCode[0]) {
20716 case 'A':
20718 default:
20719 break;
20720 }
20721 }
20722
20723 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20724}
20725
20727 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20728 SelectionDAG &DAG) const {
20729 // Currently only support length 1 constraints.
20730 if (Constraint.size() == 1) {
20731 switch (Constraint[0]) {
20732 case 'I':
20733 // Validate & create a 12-bit signed immediate operand.
20734 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20735 uint64_t CVal = C->getSExtValue();
20736 if (isInt<12>(CVal))
20737 Ops.push_back(
20738 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20739 }
20740 return;
20741 case 'J':
20742 // Validate & create an integer zero operand.
20743 if (isNullConstant(Op))
20744 Ops.push_back(
20745 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20746 return;
20747 case 'K':
20748 // Validate & create a 5-bit unsigned immediate operand.
20749 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20750 uint64_t CVal = C->getZExtValue();
20751 if (isUInt<5>(CVal))
20752 Ops.push_back(
20753 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20754 }
20755 return;
20756 case 'S':
20758 return;
20759 default:
20760 break;
20761 }
20762 }
20763 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20764}
20765
20767 Instruction *Inst,
20768 AtomicOrdering Ord) const {
20769 if (Subtarget.hasStdExtZtso()) {
20770 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20771 return Builder.CreateFence(Ord);
20772 return nullptr;
20773 }
20774
20775 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20776 return Builder.CreateFence(Ord);
20777 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20778 return Builder.CreateFence(AtomicOrdering::Release);
20779 return nullptr;
20780}
20781
20783 Instruction *Inst,
20784 AtomicOrdering Ord) const {
20785 if (Subtarget.hasStdExtZtso()) {
20786 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20787 return Builder.CreateFence(Ord);
20788 return nullptr;
20789 }
20790
20791 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20792 return Builder.CreateFence(AtomicOrdering::Acquire);
20793 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20796 return nullptr;
20797}
20798
20801 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20802 // point operations can't be used in an lr/sc sequence without breaking the
20803 // forward-progress guarantee.
20804 if (AI->isFloatingPointOperation() ||
20808
20809 // Don't expand forced atomics, we want to have __sync libcalls instead.
20810 if (Subtarget.hasForcedAtomics())
20812
20813 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20814 if (AI->getOperation() == AtomicRMWInst::Nand) {
20815 if (Subtarget.hasStdExtZacas() &&
20816 (Size >= 32 || Subtarget.hasStdExtZabha()))
20818 if (Size < 32)
20820 }
20821
20822 if (Size < 32 && !Subtarget.hasStdExtZabha())
20824
20826}
20827
20828static Intrinsic::ID
20830 if (XLen == 32) {
20831 switch (BinOp) {
20832 default:
20833 llvm_unreachable("Unexpected AtomicRMW BinOp");
20835 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20836 case AtomicRMWInst::Add:
20837 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20838 case AtomicRMWInst::Sub:
20839 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20841 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20842 case AtomicRMWInst::Max:
20843 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20844 case AtomicRMWInst::Min:
20845 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20847 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20849 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20850 }
20851 }
20852
20853 if (XLen == 64) {
20854 switch (BinOp) {
20855 default:
20856 llvm_unreachable("Unexpected AtomicRMW BinOp");
20858 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20859 case AtomicRMWInst::Add:
20860 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20861 case AtomicRMWInst::Sub:
20862 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20864 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20865 case AtomicRMWInst::Max:
20866 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20867 case AtomicRMWInst::Min:
20868 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20870 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20872 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20873 }
20874 }
20875
20876 llvm_unreachable("Unexpected XLen\n");
20877}
20878
20880 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20881 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20882 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20883 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20884 // mask, as this produces better code than the LR/SC loop emitted by
20885 // int_riscv_masked_atomicrmw_xchg.
20886 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20887 isa<ConstantInt>(AI->getValOperand())) {
20888 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20889 if (CVal->isZero())
20890 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20891 Builder.CreateNot(Mask, "Inv_Mask"),
20892 AI->getAlign(), Ord);
20893 if (CVal->isMinusOne())
20894 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20895 AI->getAlign(), Ord);
20896 }
20897
20898 unsigned XLen = Subtarget.getXLen();
20899 Value *Ordering =
20900 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20901 Type *Tys[] = {AlignedAddr->getType()};
20902 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20903 AI->getModule(),
20905
20906 if (XLen == 64) {
20907 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20908 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20909 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20910 }
20911
20912 Value *Result;
20913
20914 // Must pass the shift amount needed to sign extend the loaded value prior
20915 // to performing a signed comparison for min/max. ShiftAmt is the number of
20916 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20917 // is the number of bits to left+right shift the value in order to
20918 // sign-extend.
20919 if (AI->getOperation() == AtomicRMWInst::Min ||
20921 const DataLayout &DL = AI->getDataLayout();
20922 unsigned ValWidth =
20923 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20924 Value *SextShamt =
20925 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20926 Result = Builder.CreateCall(LrwOpScwLoop,
20927 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20928 } else {
20929 Result =
20930 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20931 }
20932
20933 if (XLen == 64)
20934 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20935 return Result;
20936}
20937
20940 AtomicCmpXchgInst *CI) const {
20941 // Don't expand forced atomics, we want to have __sync libcalls instead.
20942 if (Subtarget.hasForcedAtomics())
20944
20946 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20947 (Size == 8 || Size == 16))
20950}
20951
20953 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20954 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20955 unsigned XLen = Subtarget.getXLen();
20956 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20957 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20958 if (XLen == 64) {
20959 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20960 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20961 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20962 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20963 }
20964 Type *Tys[] = {AlignedAddr->getType()};
20965 Function *MaskedCmpXchg =
20966 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20967 Value *Result = Builder.CreateCall(
20968 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20969 if (XLen == 64)
20970 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20971 return Result;
20972}
20973
20975 EVT DataVT) const {
20976 // We have indexed loads for all supported EEW types. Indices are always
20977 // zero extended.
20978 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20979 isTypeLegal(Extend.getValueType()) &&
20980 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20981 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20982}
20983
20985 EVT VT) const {
20986 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20987 return false;
20988
20989 switch (FPVT.getSimpleVT().SimpleTy) {
20990 case MVT::f16:
20991 return Subtarget.hasStdExtZfhmin();
20992 case MVT::f32:
20993 return Subtarget.hasStdExtF();
20994 case MVT::f64:
20995 return Subtarget.hasStdExtD();
20996 default:
20997 return false;
20998 }
20999}
21000
21002 // If we are using the small code model, we can reduce size of jump table
21003 // entry to 4 bytes.
21004 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21007 }
21009}
21010
21012 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21013 unsigned uid, MCContext &Ctx) const {
21014 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21016 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21017}
21018
21020 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21021 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21022 // a power of two as well.
21023 // FIXME: This doesn't work for zve32, but that's already broken
21024 // elsewhere for the same reason.
21025 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21026 static_assert(RISCV::RVVBitsPerBlock == 64,
21027 "RVVBitsPerBlock changed, audit needed");
21028 return true;
21029}
21030
21032 SDValue &Offset,
21034 SelectionDAG &DAG) const {
21035 // Target does not support indexed loads.
21036 if (!Subtarget.hasVendorXTHeadMemIdx())
21037 return false;
21038
21039 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21040 return false;
21041
21042 Base = Op->getOperand(0);
21043 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21044 int64_t RHSC = RHS->getSExtValue();
21045 if (Op->getOpcode() == ISD::SUB)
21046 RHSC = -(uint64_t)RHSC;
21047
21048 // The constants that can be encoded in the THeadMemIdx instructions
21049 // are of the form (sign_extend(imm5) << imm2).
21050 bool isLegalIndexedOffset = false;
21051 for (unsigned i = 0; i < 4; i++)
21052 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21053 isLegalIndexedOffset = true;
21054 break;
21055 }
21056
21057 if (!isLegalIndexedOffset)
21058 return false;
21059
21060 Offset = Op->getOperand(1);
21061 return true;
21062 }
21063
21064 return false;
21065}
21066
21068 SDValue &Offset,
21070 SelectionDAG &DAG) const {
21071 EVT VT;
21072 SDValue Ptr;
21073 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21074 VT = LD->getMemoryVT();
21075 Ptr = LD->getBasePtr();
21076 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21077 VT = ST->getMemoryVT();
21078 Ptr = ST->getBasePtr();
21079 } else
21080 return false;
21081
21082 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21083 return false;
21084
21085 AM = ISD::PRE_INC;
21086 return true;
21087}
21088
21090 SDValue &Base,
21091 SDValue &Offset,
21093 SelectionDAG &DAG) const {
21094 if (Subtarget.hasVendorXCVmem()) {
21095 if (Op->getOpcode() != ISD::ADD)
21096 return false;
21097
21098 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21099 Base = LS->getBasePtr();
21100 else
21101 return false;
21102
21103 if (Base == Op->getOperand(0))
21104 Offset = Op->getOperand(1);
21105 else if (Base == Op->getOperand(1))
21106 Offset = Op->getOperand(0);
21107 else
21108 return false;
21109
21110 AM = ISD::POST_INC;
21111 return true;
21112 }
21113
21114 EVT VT;
21115 SDValue Ptr;
21116 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21117 VT = LD->getMemoryVT();
21118 Ptr = LD->getBasePtr();
21119 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21120 VT = ST->getMemoryVT();
21121 Ptr = ST->getBasePtr();
21122 } else
21123 return false;
21124
21125 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21126 return false;
21127 // Post-indexing updates the base, so it's not a valid transform
21128 // if that's not the same as the load's pointer.
21129 if (Ptr != Base)
21130 return false;
21131
21132 AM = ISD::POST_INC;
21133 return true;
21134}
21135
21137 EVT VT) const {
21138 EVT SVT = VT.getScalarType();
21139
21140 if (!SVT.isSimple())
21141 return false;
21142
21143 switch (SVT.getSimpleVT().SimpleTy) {
21144 case MVT::f16:
21145 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21146 : Subtarget.hasStdExtZfhOrZhinx();
21147 case MVT::f32:
21148 return Subtarget.hasStdExtFOrZfinx();
21149 case MVT::f64:
21150 return Subtarget.hasStdExtDOrZdinx();
21151 default:
21152 break;
21153 }
21154
21155 return false;
21156}
21157
21159 // Zacas will use amocas.w which does not require extension.
21160 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21161}
21162
21164 const Constant *PersonalityFn) const {
21165 return RISCV::X10;
21166}
21167
21169 const Constant *PersonalityFn) const {
21170 return RISCV::X11;
21171}
21172
21174 // Return false to suppress the unnecessary extensions if the LibCall
21175 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21176 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21177 Type.getSizeInBits() < Subtarget.getXLen()))
21178 return false;
21179
21180 return true;
21181}
21182
21184 if (Subtarget.is64Bit() && Type == MVT::i32)
21185 return true;
21186
21187 return IsSigned;
21188}
21189
21191 SDValue C) const {
21192 // Check integral scalar types.
21193 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21194 if (!VT.isScalarInteger())
21195 return false;
21196
21197 // Omit the optimization if the sub target has the M extension and the data
21198 // size exceeds XLen.
21199 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21200 return false;
21201
21202 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
21203 // Break the MUL to a SLLI and an ADD/SUB.
21204 const APInt &Imm = ConstNode->getAPIntValue();
21205 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21206 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21207 return true;
21208
21209 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21210 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21211 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21212 (Imm - 8).isPowerOf2()))
21213 return true;
21214
21215 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21216 // a pair of LUI/ADDI.
21217 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21218 ConstNode->hasOneUse()) {
21219 APInt ImmS = Imm.ashr(Imm.countr_zero());
21220 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21221 (1 - ImmS).isPowerOf2())
21222 return true;
21223 }
21224 }
21225
21226 return false;
21227}
21228
21230 SDValue ConstNode) const {
21231 // Let the DAGCombiner decide for vectors.
21232 EVT VT = AddNode.getValueType();
21233 if (VT.isVector())
21234 return true;
21235
21236 // Let the DAGCombiner decide for larger types.
21237 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21238 return true;
21239
21240 // It is worse if c1 is simm12 while c1*c2 is not.
21241 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21242 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21243 const APInt &C1 = C1Node->getAPIntValue();
21244 const APInt &C2 = C2Node->getAPIntValue();
21245 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21246 return false;
21247
21248 // Default to true and let the DAGCombiner decide.
21249 return true;
21250}
21251
21253 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21254 unsigned *Fast) const {
21255 if (!VT.isVector()) {
21256 if (Fast)
21257 *Fast = Subtarget.enableUnalignedScalarMem();
21258 return Subtarget.enableUnalignedScalarMem();
21259 }
21260
21261 // All vector implementations must support element alignment
21262 EVT ElemVT = VT.getVectorElementType();
21263 if (Alignment >= ElemVT.getStoreSize()) {
21264 if (Fast)
21265 *Fast = 1;
21266 return true;
21267 }
21268
21269 // Note: We lower an unmasked unaligned vector access to an equally sized
21270 // e8 element type access. Given this, we effectively support all unmasked
21271 // misaligned accesses. TODO: Work through the codegen implications of
21272 // allowing such accesses to be formed, and considered fast.
21273 if (Fast)
21274 *Fast = Subtarget.enableUnalignedVectorMem();
21275 return Subtarget.enableUnalignedVectorMem();
21276}
21277
21278
21280 const AttributeList &FuncAttributes) const {
21281 if (!Subtarget.hasVInstructions())
21282 return MVT::Other;
21283
21284 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21285 return MVT::Other;
21286
21287 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21288 // has an expansion threshold, and we want the number of hardware memory
21289 // operations to correspond roughly to that threshold. LMUL>1 operations
21290 // are typically expanded linearly internally, and thus correspond to more
21291 // than one actual memory operation. Note that store merging and load
21292 // combining will typically form larger LMUL operations from the LMUL1
21293 // operations emitted here, and that's okay because combining isn't
21294 // introducing new memory operations; it's just merging existing ones.
21295 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21296 if (Op.size() < MinVLenInBytes)
21297 // TODO: Figure out short memops. For the moment, do the default thing
21298 // which ends up using scalar sequences.
21299 return MVT::Other;
21300
21301 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21302 // a large scalar constant and instead use vmv.v.x/i to do the
21303 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21304 // maximize the chance we can encode the size in the vsetvli.
21305 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21306 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21307
21308 // Do we have sufficient alignment for our preferred VT? If not, revert
21309 // to largest size allowed by our alignment criteria.
21310 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21311 Align RequiredAlign(PreferredVT.getStoreSize());
21312 if (Op.isFixedDstAlign())
21313 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21314 if (Op.isMemcpy())
21315 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21316 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21317 }
21318 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21319}
21320
21322 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21323 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21324 bool IsABIRegCopy = CC.has_value();
21325 EVT ValueVT = Val.getValueType();
21326 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21327 PartVT == MVT::f32) {
21328 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21329 // nan, and cast to f32.
21330 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21331 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21332 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21333 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21334 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21335 Parts[0] = Val;
21336 return true;
21337 }
21338
21339 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21340 LLVMContext &Context = *DAG.getContext();
21341 EVT ValueEltVT = ValueVT.getVectorElementType();
21342 EVT PartEltVT = PartVT.getVectorElementType();
21343 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21344 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21345 if (PartVTBitSize % ValueVTBitSize == 0) {
21346 assert(PartVTBitSize >= ValueVTBitSize);
21347 // If the element types are different, bitcast to the same element type of
21348 // PartVT first.
21349 // Give an example here, we want copy a <vscale x 1 x i8> value to
21350 // <vscale x 4 x i16>.
21351 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21352 // subvector, then we can bitcast to <vscale x 4 x i16>.
21353 if (ValueEltVT != PartEltVT) {
21354 if (PartVTBitSize > ValueVTBitSize) {
21355 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21356 assert(Count != 0 && "The number of element should not be zero.");
21357 EVT SameEltTypeVT =
21358 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21359 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21360 DAG.getUNDEF(SameEltTypeVT), Val,
21361 DAG.getVectorIdxConstant(0, DL));
21362 }
21363 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21364 } else {
21365 Val =
21366 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21367 Val, DAG.getVectorIdxConstant(0, DL));
21368 }
21369 Parts[0] = Val;
21370 return true;
21371 }
21372 }
21373 return false;
21374}
21375
21377 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21378 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21379 bool IsABIRegCopy = CC.has_value();
21380 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21381 PartVT == MVT::f32) {
21382 SDValue Val = Parts[0];
21383
21384 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21385 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21386 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21387 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21388 return Val;
21389 }
21390
21391 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21392 LLVMContext &Context = *DAG.getContext();
21393 SDValue Val = Parts[0];
21394 EVT ValueEltVT = ValueVT.getVectorElementType();
21395 EVT PartEltVT = PartVT.getVectorElementType();
21396 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21397 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21398 if (PartVTBitSize % ValueVTBitSize == 0) {
21399 assert(PartVTBitSize >= ValueVTBitSize);
21400 EVT SameEltTypeVT = ValueVT;
21401 // If the element types are different, convert it to the same element type
21402 // of PartVT.
21403 // Give an example here, we want copy a <vscale x 1 x i8> value from
21404 // <vscale x 4 x i16>.
21405 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21406 // then we can extract <vscale x 1 x i8>.
21407 if (ValueEltVT != PartEltVT) {
21408 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21409 assert(Count != 0 && "The number of element should not be zero.");
21410 SameEltTypeVT =
21411 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21412 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21413 }
21414 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21415 DAG.getVectorIdxConstant(0, DL));
21416 return Val;
21417 }
21418 }
21419 return SDValue();
21420}
21421
21423 // When aggressively optimizing for code size, we prefer to use a div
21424 // instruction, as it is usually smaller than the alternative sequence.
21425 // TODO: Add vector division?
21426 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21427 return OptSize && !VT.isVector();
21428}
21429
21431 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21432 // some situation.
21433 unsigned Opc = N->getOpcode();
21434 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21435 return false;
21436 return true;
21437}
21438
21439static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21440 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21441 Function *ThreadPointerFunc =
21442 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21443 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21444 IRB.CreateCall(ThreadPointerFunc), Offset);
21445}
21446
21448 // Fuchsia provides a fixed TLS slot for the stack cookie.
21449 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21450 if (Subtarget.isTargetFuchsia())
21451 return useTpOffset(IRB, -0x10);
21452
21453 // Android provides a fixed TLS slot for the stack cookie. See the definition
21454 // of TLS_SLOT_STACK_GUARD in
21455 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21456 if (Subtarget.isTargetAndroid())
21457 return useTpOffset(IRB, -0x18);
21458
21460}
21461
21463 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21464 const DataLayout &DL) const {
21465 EVT VT = getValueType(DL, VTy);
21466 // Don't lower vlseg/vsseg for vector types that can't be split.
21467 if (!isTypeLegal(VT))
21468 return false;
21469
21471 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21472 Alignment))
21473 return false;
21474
21475 MVT ContainerVT = VT.getSimpleVT();
21476
21477 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21478 if (!Subtarget.useRVVForFixedLengthVectors())
21479 return false;
21480 // Sometimes the interleaved access pass picks up splats as interleaves of
21481 // one element. Don't lower these.
21482 if (FVTy->getNumElements() < 2)
21483 return false;
21484
21486 } else {
21487 // The intrinsics for scalable vectors are not overloaded on pointer type
21488 // and can only handle the default address space.
21489 if (AddrSpace)
21490 return false;
21491 }
21492
21493 // Need to make sure that EMUL * NFIELDS ≤ 8
21494 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21495 if (Fractional)
21496 return true;
21497 return Factor * LMUL <= 8;
21498}
21499
21501 Align Alignment) const {
21502 if (!Subtarget.hasVInstructions())
21503 return false;
21504
21505 // Only support fixed vectors if we know the minimum vector size.
21506 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21507 return false;
21508
21509 EVT ScalarType = DataType.getScalarType();
21510 if (!isLegalElementTypeForRVV(ScalarType))
21511 return false;
21512
21513 if (!Subtarget.enableUnalignedVectorMem() &&
21514 Alignment < ScalarType.getStoreSize())
21515 return false;
21516
21517 return true;
21518}
21519
21521 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21522 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21523 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21524 Intrinsic::riscv_seg8_load};
21525
21526/// Lower an interleaved load into a vlsegN intrinsic.
21527///
21528/// E.g. Lower an interleaved load (Factor = 2):
21529/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21530/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21531/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21532///
21533/// Into:
21534/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21535/// %ptr, i64 4)
21536/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21537/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21540 ArrayRef<unsigned> Indices, unsigned Factor) const {
21541 IRBuilder<> Builder(LI);
21542
21543 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21544 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21546 LI->getDataLayout()))
21547 return false;
21548
21549 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21550
21551 Function *VlsegNFunc =
21553 {VTy, LI->getPointerOperandType(), XLenTy});
21554
21555 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21556
21557 CallInst *VlsegN =
21558 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21559
21560 for (unsigned i = 0; i < Shuffles.size(); i++) {
21561 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21562 Shuffles[i]->replaceAllUsesWith(SubVec);
21563 }
21564
21565 return true;
21566}
21567
21569 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21570 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21571 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21572 Intrinsic::riscv_seg8_store};
21573
21574/// Lower an interleaved store into a vssegN intrinsic.
21575///
21576/// E.g. Lower an interleaved store (Factor = 3):
21577/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21578/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21579/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21580///
21581/// Into:
21582/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21583/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21584/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21585/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21586/// %ptr, i32 4)
21587///
21588/// Note that the new shufflevectors will be removed and we'll only generate one
21589/// vsseg3 instruction in CodeGen.
21591 ShuffleVectorInst *SVI,
21592 unsigned Factor) const {
21593 IRBuilder<> Builder(SI);
21594 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21595 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21596 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21597 ShuffleVTy->getNumElements() / Factor);
21598 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21599 SI->getPointerAddressSpace(),
21600 SI->getDataLayout()))
21601 return false;
21602
21603 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21604
21605 Function *VssegNFunc =
21606 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21607 {VTy, SI->getPointerOperandType(), XLenTy});
21608
21609 auto Mask = SVI->getShuffleMask();
21611
21612 for (unsigned i = 0; i < Factor; i++) {
21613 Value *Shuffle = Builder.CreateShuffleVector(
21614 SVI->getOperand(0), SVI->getOperand(1),
21615 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21616 Ops.push_back(Shuffle);
21617 }
21618 // This VL should be OK (should be executable in one vsseg instruction,
21619 // potentially under larger LMULs) because we checked that the fixed vector
21620 // type fits in isLegalInterleavedAccessType
21621 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21622 Ops.append({SI->getPointerOperand(), VL});
21623
21624 Builder.CreateCall(VssegNFunc, Ops);
21625
21626 return true;
21627}
21628
21630 LoadInst *LI) const {
21631 assert(LI->isSimple());
21632 IRBuilder<> Builder(LI);
21633
21634 // Only deinterleave2 supported at present.
21635 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21636 return false;
21637
21638 unsigned Factor = 2;
21639
21640 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21641 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21642
21643 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21645 LI->getDataLayout()))
21646 return false;
21647
21648 Function *VlsegNFunc;
21649 Value *VL;
21650 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21652
21653 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21654 VlsegNFunc = Intrinsic::getDeclaration(
21655 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21656 {ResVTy, LI->getPointerOperandType(), XLenTy});
21657 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21658 } else {
21659 static const Intrinsic::ID IntrIds[] = {
21660 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21661 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21662 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21663 Intrinsic::riscv_vlseg8};
21664
21665 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21666 {ResVTy, XLenTy});
21667 VL = Constant::getAllOnesValue(XLenTy);
21668 Ops.append(Factor, PoisonValue::get(ResVTy));
21669 }
21670
21671 Ops.append({LI->getPointerOperand(), VL});
21672
21673 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21674 DI->replaceAllUsesWith(Vlseg);
21675
21676 return true;
21677}
21678
21680 StoreInst *SI) const {
21681 assert(SI->isSimple());
21682 IRBuilder<> Builder(SI);
21683
21684 // Only interleave2 supported at present.
21685 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21686 return false;
21687
21688 unsigned Factor = 2;
21689
21690 VectorType *VTy = cast<VectorType>(II->getType());
21691 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21692
21693 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21694 SI->getPointerAddressSpace(),
21695 SI->getDataLayout()))
21696 return false;
21697
21698 Function *VssegNFunc;
21699 Value *VL;
21700 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21701
21702 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21703 VssegNFunc = Intrinsic::getDeclaration(
21704 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21705 {InVTy, SI->getPointerOperandType(), XLenTy});
21706 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21707 } else {
21708 static const Intrinsic::ID IntrIds[] = {
21709 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21710 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21711 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21712 Intrinsic::riscv_vsseg8};
21713
21714 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21715 {InVTy, XLenTy});
21716 VL = Constant::getAllOnesValue(XLenTy);
21717 }
21718
21719 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21720 SI->getPointerOperand(), VL});
21721
21722 return true;
21723}
21724
21728 const TargetInstrInfo *TII) const {
21729 assert(MBBI->isCall() && MBBI->getCFIType() &&
21730 "Invalid call instruction for a KCFI check");
21731 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21732 MBBI->getOpcode()));
21733
21734 MachineOperand &Target = MBBI->getOperand(0);
21735 Target.setIsRenamable(false);
21736
21737 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21738 .addReg(Target.getReg())
21739 .addImm(MBBI->getCFIType())
21740 .getInstr();
21741}
21742
21743#define GET_REGISTER_MATCHER
21744#include "RISCVGenAsmMatcher.inc"
21745
21748 const MachineFunction &MF) const {
21750 if (Reg == RISCV::NoRegister)
21752 if (Reg == RISCV::NoRegister)
21754 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21755 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21756 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21757 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21758 StringRef(RegName) + "\"."));
21759 return Reg;
21760}
21761
21764 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21765
21766 if (NontemporalInfo == nullptr)
21768
21769 // 1 for default value work as __RISCV_NTLH_ALL
21770 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21771 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21772 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21773 // 5 -> __RISCV_NTLH_ALL
21774 int NontemporalLevel = 5;
21775 const MDNode *RISCVNontemporalInfo =
21776 I.getMetadata("riscv-nontemporal-domain");
21777 if (RISCVNontemporalInfo != nullptr)
21778 NontemporalLevel =
21779 cast<ConstantInt>(
21780 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21781 ->getValue())
21782 ->getZExtValue();
21783
21784 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21785 "RISC-V target doesn't support this non-temporal domain.");
21786
21787 NontemporalLevel -= 2;
21789 if (NontemporalLevel & 0b1)
21790 Flags |= MONontemporalBit0;
21791 if (NontemporalLevel & 0b10)
21792 Flags |= MONontemporalBit1;
21793
21794 return Flags;
21795}
21796
21799
21800 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21802 TargetFlags |= (NodeFlags & MONontemporalBit0);
21803 TargetFlags |= (NodeFlags & MONontemporalBit1);
21804 return TargetFlags;
21805}
21806
21808 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21809 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21810}
21811
21813 if (VT.isScalableVector())
21814 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21815 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21816 return true;
21817 return Subtarget.hasStdExtZbb() &&
21818 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21819}
21820
21822 ISD::CondCode Cond) const {
21823 return isCtpopFast(VT) ? 0 : 1;
21824}
21825
21827
21828 // GISel support is in progress or complete for these opcodes.
21829 unsigned Op = Inst.getOpcode();
21830 if (Op == Instruction::Add || Op == Instruction::Sub ||
21831 Op == Instruction::And || Op == Instruction::Or ||
21832 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21833 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21834 Op == Instruction::Freeze || Op == Instruction::Store)
21835 return false;
21836
21837 if (Inst.getType()->isScalableTy())
21838 return true;
21839
21840 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21841 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21842 !isa<ReturnInst>(&Inst))
21843 return true;
21844
21845 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21846 if (AI->getAllocatedType()->isScalableTy())
21847 return true;
21848 }
21849
21850 return false;
21851}
21852
21853SDValue
21854RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21855 SelectionDAG &DAG,
21856 SmallVectorImpl<SDNode *> &Created) const {
21858 if (isIntDivCheap(N->getValueType(0), Attr))
21859 return SDValue(N, 0); // Lower SDIV as SDIV
21860
21861 // Only perform this transform if short forward branch opt is supported.
21862 if (!Subtarget.hasShortForwardBranchOpt())
21863 return SDValue();
21864 EVT VT = N->getValueType(0);
21865 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21866 return SDValue();
21867
21868 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21869 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21870 return SDValue();
21871 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21872}
21873
21874bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21875 EVT VT, const APInt &AndMask) const {
21876 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21877 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21879}
21880
21881unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21882 return Subtarget.getMinimumJumpTableEntries();
21883}
21884
21885// Handle single arg such as return value.
21886template <typename Arg>
21887void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21888 // This lambda determines whether an array of types are constructed by
21889 // homogeneous vector types.
21890 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21891 // First, extract the first element in the argument type.
21892 auto It = ArgList.begin();
21893 MVT FirstArgRegType = It->VT;
21894
21895 // Return if there is no return or the type needs split.
21896 if (It == ArgList.end() || It->Flags.isSplit())
21897 return false;
21898
21899 ++It;
21900
21901 // Return if this argument type contains only 1 element, or it's not a
21902 // vector type.
21903 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21904 return false;
21905
21906 // Second, check if the following elements in this argument type are all the
21907 // same.
21908 for (; It != ArgList.end(); ++It)
21909 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21910 return false;
21911
21912 return true;
21913 };
21914
21915 if (isHomogeneousScalableVectorType(ArgList)) {
21916 // Handle as tuple type
21917 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21918 } else {
21919 // Handle as normal vector type
21920 bool FirstVMaskAssigned = false;
21921 for (const auto &OutArg : ArgList) {
21922 MVT RegisterVT = OutArg.VT;
21923
21924 // Skip non-RVV register type
21925 if (!RegisterVT.isVector())
21926 continue;
21927
21928 if (RegisterVT.isFixedLengthVector())
21929 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21930
21931 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21932 RVVArgInfos.push_back({1, RegisterVT, true});
21933 FirstVMaskAssigned = true;
21934 continue;
21935 }
21936
21937 RVVArgInfos.push_back({1, RegisterVT, false});
21938 }
21939 }
21940}
21941
21942// Handle multiple args.
21943template <>
21944void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21945 const DataLayout &DL = MF->getDataLayout();
21946 const Function &F = MF->getFunction();
21947 LLVMContext &Context = F.getContext();
21948
21949 bool FirstVMaskAssigned = false;
21950 for (Type *Ty : TypeList) {
21951 StructType *STy = dyn_cast<StructType>(Ty);
21952 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21953 Type *ElemTy = STy->getTypeAtIndex(0U);
21954 EVT VT = TLI->getValueType(DL, ElemTy);
21955 MVT RegisterVT =
21956 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21957 unsigned NumRegs =
21958 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21959
21960 RVVArgInfos.push_back(
21961 {NumRegs * STy->getNumElements(), RegisterVT, false});
21962 } else {
21963 SmallVector<EVT, 4> ValueVTs;
21964 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21965
21966 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21967 ++Value) {
21968 EVT VT = ValueVTs[Value];
21969 MVT RegisterVT =
21970 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21971 unsigned NumRegs =
21972 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21973
21974 // Skip non-RVV register type
21975 if (!RegisterVT.isVector())
21976 continue;
21977
21978 if (RegisterVT.isFixedLengthVector())
21979 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21980
21981 if (!FirstVMaskAssigned &&
21982 RegisterVT.getVectorElementType() == MVT::i1) {
21983 RVVArgInfos.push_back({1, RegisterVT, true});
21984 FirstVMaskAssigned = true;
21985 --NumRegs;
21986 }
21987
21988 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21989 }
21990 }
21991 }
21992}
21993
21994void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21995 unsigned StartReg) {
21996 assert((StartReg % LMul) == 0 &&
21997 "Start register number should be multiple of lmul");
21998 const MCPhysReg *VRArrays;
21999 switch (LMul) {
22000 default:
22001 report_fatal_error("Invalid lmul");
22002 case 1:
22003 VRArrays = ArgVRs;
22004 break;
22005 case 2:
22006 VRArrays = ArgVRM2s;
22007 break;
22008 case 4:
22009 VRArrays = ArgVRM4s;
22010 break;
22011 case 8:
22012 VRArrays = ArgVRM8s;
22013 break;
22014 }
22015
22016 for (unsigned i = 0; i < NF; ++i)
22017 if (StartReg)
22018 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
22019 else
22020 AllocatedPhysRegs.push_back(MCPhysReg());
22021}
22022
22023/// This function determines if each RVV argument is passed by register, if the
22024/// argument can be assigned to a VR, then give it a specific register.
22025/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
22026void RVVArgDispatcher::compute() {
22027 uint32_t AssignedMap = 0;
22028 auto allocate = [&](const RVVArgInfo &ArgInfo) {
22029 // Allocate first vector mask argument to V0.
22030 if (ArgInfo.FirstVMask) {
22031 AllocatedPhysRegs.push_back(RISCV::V0);
22032 return;
22033 }
22034
22035 unsigned RegsNeeded = divideCeil(
22036 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
22037 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
22038 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
22039 StartReg += RegsNeeded) {
22040 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
22041 if ((AssignedMap & Map) == 0) {
22042 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
22043 AssignedMap |= Map;
22044 return;
22045 }
22046 }
22047
22048 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
22049 };
22050
22051 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
22052 allocate(RVVArgInfos[i]);
22053}
22054
22056 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
22057 return AllocatedPhysRegs[CurIdx++];
22058}
22059
22062 int JTI,
22063 SelectionDAG &DAG) const {
22064 if (Subtarget.hasStdExtZicfilp()) {
22065 // When Zicfilp enabled, we need to use software guarded branch for jump
22066 // table branch.
22067 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
22068 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
22069 Addr);
22070 }
22071 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22072}
22073
22075
22076#define GET_RISCVVIntrinsicsTable_IMPL
22077#include "RISCVGenSearchableTables.inc"
22078
22079} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1243
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1235
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1015
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1366
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1181
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1377
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:415
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:314
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1110
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1369
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:61
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
iterator_range< arg_iterator > args()
Definition: Function.h:855
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Argument * getArg(unsigned i) const
Definition: Function.h:849
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1884
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1839
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2038
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2499
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1859
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:494
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:391
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:747
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:843
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:488
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:676
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:877
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:489
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:788
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:691
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:783
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:483
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:814
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:860
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:501
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:754
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:571
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:893
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1169
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1165
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1382
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1330
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1415
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1312
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1198
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1314
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1315
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1400
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1404
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1271
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1276
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1414
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1310
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1311
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1455
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:915
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1231
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1397
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1264
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1401
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1031
@ STRICT_LROUND
Definition: ISDOpcodes.h:445
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1313
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1099
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:600
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1416
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1194
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1409
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:910
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1308
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1254
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:886
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1372
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1291
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1316
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1084
@ STRICT_LRINT
Definition: ISDOpcodes.h:447
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:605
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1342
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1417
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:952
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1306
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1027
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1307
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1225
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1251
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:650
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:448
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1305
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ STRICT_LLROUND
Definition: ISDOpcodes.h:446
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:885
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1405
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1189
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:594
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1540
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1540
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1461
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1623
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1528
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:323
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1042
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1002
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)