LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
253 if (!Subtarget.hasVendorXCValu())
257 if (!Subtarget.hasVendorXCValu())
261
262 if (RV64LegalI32 && Subtarget.is64Bit())
264
266
269 if (RV64LegalI32 && Subtarget.is64Bit())
271
273
275
276 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
277 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
278
279 if (Subtarget.is64Bit()) {
281
282 if (!RV64LegalI32) {
285 MVT::i32, Custom);
287 MVT::i32, Custom);
288 if (!Subtarget.hasStdExtZbb())
290 } else {
292 if (Subtarget.hasStdExtZbb()) {
295 }
296 }
298 }
299 if (!Subtarget.hasStdExtZmmul()) {
301 if (RV64LegalI32 && Subtarget.is64Bit())
303 } else if (Subtarget.is64Bit()) {
305 if (!RV64LegalI32)
307 else
309 } else {
311 }
312
313 if (!Subtarget.hasStdExtM()) {
315 XLenVT, Expand);
316 if (RV64LegalI32 && Subtarget.is64Bit())
318 Promote);
319 } else if (Subtarget.is64Bit()) {
320 if (!RV64LegalI32)
322 {MVT::i8, MVT::i16, MVT::i32}, Custom);
323 }
324
325 if (RV64LegalI32 && Subtarget.is64Bit()) {
329 Expand);
330 }
331
334 Expand);
335
337 Custom);
338
339 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
340 if (!RV64LegalI32 && Subtarget.is64Bit())
342 } else if (Subtarget.hasVendorXTHeadBb()) {
343 if (Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXCVbitmanip()) {
348 } else {
350 if (RV64LegalI32 && Subtarget.is64Bit())
352 }
353
354 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
355 // pattern match it directly in isel.
357 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
358 Subtarget.hasVendorXTHeadBb())
359 ? Legal
360 : Expand);
361 if (RV64LegalI32 && Subtarget.is64Bit())
363 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
364 Subtarget.hasVendorXTHeadBb())
365 ? Promote
366 : Expand);
367
368
369 if (Subtarget.hasVendorXCVbitmanip()) {
371 } else {
372 // Zbkb can use rev8+brev8 to implement bitreverse.
374 Subtarget.hasStdExtZbkb() ? Custom : Expand);
375 }
376
377 if (Subtarget.hasStdExtZbb()) {
379 Legal);
380 if (RV64LegalI32 && Subtarget.is64Bit())
382 Promote);
383
384 if (Subtarget.is64Bit()) {
385 if (RV64LegalI32)
387 else
389 }
390 } else if (!Subtarget.hasVendorXCVbitmanip()) {
392 if (RV64LegalI32 && Subtarget.is64Bit())
394 }
395
396 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
397 Subtarget.hasVendorXCVbitmanip()) {
398 // We need the custom lowering to make sure that the resulting sequence
399 // for the 32bit case is efficient on 64bit targets.
400 if (Subtarget.is64Bit()) {
401 if (RV64LegalI32) {
403 Subtarget.hasStdExtZbb() ? Legal : Promote);
404 if (!Subtarget.hasStdExtZbb())
406 } else
408 }
409 } else {
411 if (RV64LegalI32 && Subtarget.is64Bit())
413 }
414
415 if (!RV64LegalI32 && Subtarget.is64Bit() &&
416 !Subtarget.hasShortForwardBranchOpt())
418
419 // We can use PseudoCCSUB to implement ABS.
420 if (Subtarget.hasShortForwardBranchOpt())
422
423 if (!Subtarget.hasVendorXTHeadCondMov()) {
425 if (RV64LegalI32 && Subtarget.is64Bit())
427 }
428
429 static const unsigned FPLegalNodeTypes[] = {
436
437 static const ISD::CondCode FPCCToExpand[] = {
441
442 static const unsigned FPOpToExpand[] = {
444 ISD::FREM};
445
446 static const unsigned FPRndMode[] = {
449
450 if (Subtarget.hasStdExtZfhminOrZhinxmin())
452
453 static const unsigned ZfhminZfbfminPromoteOps[] = {
463
464 if (Subtarget.hasStdExtZfbfmin()) {
473 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
475 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
476 // DAGCombiner::visitFP_ROUND probably needs improvements first.
478 }
479
480 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
481 if (Subtarget.hasStdExtZfhOrZhinx()) {
482 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
483 setOperationAction(FPRndMode, MVT::f16,
484 Subtarget.hasStdExtZfa() ? Legal : Custom);
487 } else {
488 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
491 MVT::f16, Legal);
492 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
493 // DAGCombiner::visitFP_ROUND probably needs improvements first.
495 }
496
499 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
502
504 Subtarget.hasStdExtZfa() ? Legal : Promote);
509 MVT::f16, Promote);
510
511 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
512 // complete support for all operations in LegalizeDAG.
517 MVT::f16, Promote);
518
519 // We need to custom promote this.
520 if (Subtarget.is64Bit())
522
524 Subtarget.hasStdExtZfa() ? Legal : Custom);
525 }
526
527 if (Subtarget.hasStdExtFOrZfinx()) {
528 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
529 setOperationAction(FPRndMode, MVT::f32,
530 Subtarget.hasStdExtZfa() ? Legal : Custom);
531 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
535 setOperationAction(FPOpToExpand, MVT::f32, Expand);
536 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
537 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
538 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
539 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
543 Subtarget.isSoftFPABI() ? LibCall : Custom);
546
547 if (Subtarget.hasStdExtZfa()) {
550 } else {
552 }
553 }
554
555 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtDOrZdinx()) {
559 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
560
561 if (!Subtarget.is64Bit())
563
564 if (Subtarget.hasStdExtZfa()) {
565 setOperationAction(FPRndMode, MVT::f64, Legal);
568 } else {
569 if (Subtarget.is64Bit())
570 setOperationAction(FPRndMode, MVT::f64, Custom);
571
573 }
574
577 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
582 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
583 setOperationAction(FPOpToExpand, MVT::f64, Expand);
584 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
585 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
586 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
587 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
591 Subtarget.isSoftFPABI() ? LibCall : Custom);
594 }
595
596 if (Subtarget.is64Bit()) {
599 MVT::i32, Custom);
601 }
602
603 if (Subtarget.hasStdExtFOrZfinx()) {
605 Custom);
606
609 XLenVT, Legal);
610
611 if (RV64LegalI32 && Subtarget.is64Bit())
614 MVT::i32, Legal);
615
618 }
619
622 XLenVT, Custom);
623
625
626 if (Subtarget.is64Bit())
628
629 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
630 // Unfortunately this can't be determined just from the ISA naming string.
632 Subtarget.is64Bit() ? Legal : Custom);
634 Subtarget.is64Bit() ? Legal : Custom);
635
638 if (Subtarget.is64Bit())
640
641 if (Subtarget.hasStdExtZicbop()) {
643 }
644
645 if (Subtarget.hasStdExtA()) {
647 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649 else
651 } else if (Subtarget.hasForcedAtomics()) {
653 } else {
655 }
656
658
660
661 if (getTargetMachine().getTargetTriple().isOSLinux()) {
662 // Custom lowering of llvm.clear_cache.
664 }
665
666 if (Subtarget.hasVInstructions()) {
668
670 if (RV64LegalI32 && Subtarget.is64Bit())
672
673 // RVV intrinsics may have illegal operands.
674 // We also need to custom legalize vmv.x.s.
677 {MVT::i8, MVT::i16}, Custom);
678 if (Subtarget.is64Bit())
680 MVT::i32, Custom);
681 else
683 MVT::i64, Custom);
684
686 MVT::Other, Custom);
687
688 static const unsigned IntegerVPOps[] = {
689 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
690 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
691 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
692 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
693 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
694 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
695 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
696 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
697 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
698 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
699 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
700 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
701 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
702 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
703 ISD::EXPERIMENTAL_VP_SPLAT};
704
705 static const unsigned FloatingPointVPOps[] = {
706 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
707 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
708 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
709 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
710 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
711 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
712 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
713 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
714 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
715 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
716 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
717 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
718 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
719 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
720
721 static const unsigned IntegerVecReduceOps[] = {
725
726 static const unsigned FloatingPointVecReduceOps[] = {
729
730 if (!Subtarget.is64Bit()) {
731 // We must custom-lower certain vXi64 operations on RV32 due to the vector
732 // element type being illegal.
734 MVT::i64, Custom);
735
736 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
737
738 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
739 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
740 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
741 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
742 MVT::i64, Custom);
743 }
744
745 for (MVT VT : BoolVecVTs) {
746 if (!isTypeLegal(VT))
747 continue;
748
750
751 // Mask VTs are custom-expanded into a series of standard nodes
755 VT, Custom);
756
758 Custom);
759
762 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
763 Expand);
764
765 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
766 Custom);
767
768 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
769
772 Custom);
773
775 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
776 Custom);
777
778 // RVV has native int->float & float->int conversions where the
779 // element type sizes are within one power-of-two of each other. Any
780 // wider distances between type sizes have to be lowered as sequences
781 // which progressively narrow the gap in stages.
786 VT, Custom);
788 Custom);
789
790 // Expand all extending loads to types larger than this, and truncating
791 // stores from types larger than this.
793 setTruncStoreAction(VT, OtherVT, Expand);
795 OtherVT, Expand);
796 }
797
798 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
799 ISD::VP_TRUNCATE, ISD::VP_SETCC},
800 VT, Custom);
801
804
806
807 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
808 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
809
812 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
813 }
814
815 for (MVT VT : IntVecVTs) {
816 if (!isTypeLegal(VT))
817 continue;
818
821
822 // Vectors implement MULHS/MULHU.
824
825 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
826 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
828
830 Legal);
831
833
834 // Custom-lower extensions and truncations from/to mask types.
836 VT, Custom);
837
838 // RVV has native int->float & float->int conversions where the
839 // element type sizes are within one power-of-two of each other. Any
840 // wider distances between type sizes have to be lowered as sequences
841 // which progressively narrow the gap in stages.
846 VT, Custom);
848 Custom);
852 VT, Legal);
853
854 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
855 // nodes which truncate by one power of two at a time.
857
858 // Custom-lower insert/extract operations to simplify patterns.
860 Custom);
861
862 // Custom-lower reduction operations to set up the corresponding custom
863 // nodes' operands.
864 setOperationAction(IntegerVecReduceOps, VT, Custom);
865
866 setOperationAction(IntegerVPOps, VT, Custom);
867
869
871 VT, Custom);
872
874 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
875 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
876 VT, Custom);
877
880 VT, Custom);
881
884
886
888 setTruncStoreAction(VT, OtherVT, Expand);
890 OtherVT, Expand);
891 }
892
895
896 // Splice
898
899 if (Subtarget.hasStdExtZvkb()) {
901 setOperationAction(ISD::VP_BSWAP, VT, Custom);
902 } else {
903 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
905 }
906
907 if (Subtarget.hasStdExtZvbb()) {
909 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
910 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
911 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
912 VT, Custom);
913 } else {
914 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
916 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
917 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
918 VT, Expand);
919
920 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
921 // range of f32.
922 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
923 if (isTypeLegal(FloatVT)) {
925 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
926 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
927 VT, Custom);
928 }
929 }
930 }
931
932 // Expand various CCs to best match the RVV ISA, which natively supports UNE
933 // but no other unordered comparisons, and supports all ordered comparisons
934 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
935 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
936 // and we pattern-match those back to the "original", swapping operands once
937 // more. This way we catch both operations and both "vf" and "fv" forms with
938 // fewer patterns.
939 static const ISD::CondCode VFPCCToExpand[] = {
943 };
944
945 // TODO: support more ops.
946 static const unsigned ZvfhminPromoteOps[] = {
954
955 // TODO: support more vp ops.
956 static const unsigned ZvfhminPromoteVPOps[] = {
957 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
958 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
959 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
960 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
961 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
962 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
963 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
964 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
965 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
966
967 // Sets common operation actions on RVV floating-point vector types.
968 const auto SetCommonVFPActions = [&](MVT VT) {
970 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
971 // sizes are within one power-of-two of each other. Therefore conversions
972 // between vXf16 and vXf64 must be lowered as sequences which convert via
973 // vXf32.
976 // Custom-lower insert/extract operations to simplify patterns.
978 Custom);
979 // Expand various condition codes (explained above).
980 setCondCodeAction(VFPCCToExpand, VT, Expand);
981
984
988 VT, Custom);
989
990 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
991
992 // Expand FP operations that need libcalls.
1004
1006
1008
1010 VT, Custom);
1011
1013 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1014 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1015 VT, Custom);
1016
1019
1022 VT, Custom);
1023
1026
1028
1029 setOperationAction(FloatingPointVPOps, VT, Custom);
1030
1032 Custom);
1035 VT, Legal);
1040 VT, Custom);
1041 };
1042
1043 // Sets common extload/truncstore actions on RVV floating-point vector
1044 // types.
1045 const auto SetCommonVFPExtLoadTruncStoreActions =
1046 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1047 for (auto SmallVT : SmallerVTs) {
1048 setTruncStoreAction(VT, SmallVT, Expand);
1049 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1050 }
1051 };
1052
1053 if (Subtarget.hasVInstructionsF16()) {
1054 for (MVT VT : F16VecVTs) {
1055 if (!isTypeLegal(VT))
1056 continue;
1057 SetCommonVFPActions(VT);
1058 }
1059 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1060 for (MVT VT : F16VecVTs) {
1061 if (!isTypeLegal(VT))
1062 continue;
1065 Custom);
1066 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1067 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1068 Custom);
1071 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1072 VT, Custom);
1075 VT, Custom);
1076 if (Subtarget.hasStdExtZfhmin())
1078 // load/store
1080
1081 // Custom split nxv32f16 since nxv32f32 if not legal.
1082 if (VT == MVT::nxv32f16) {
1083 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1084 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1085 continue;
1086 }
1087 // Add more promote ops.
1088 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1089 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1090 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1091 }
1092 }
1093
1094 // TODO: Could we merge some code with zvfhmin?
1095 if (Subtarget.hasVInstructionsBF16()) {
1096 for (MVT VT : BF16VecVTs) {
1097 if (!isTypeLegal(VT))
1098 continue;
1100 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1102 Custom);
1105 VT, Custom);
1107 if (Subtarget.hasStdExtZfbfmin())
1109 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1110 Custom);
1112 // TODO: Promote to fp32.
1113 }
1114 }
1115
1116 if (Subtarget.hasVInstructionsF32()) {
1117 for (MVT VT : F32VecVTs) {
1118 if (!isTypeLegal(VT))
1119 continue;
1120 SetCommonVFPActions(VT);
1121 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1122 }
1123 }
1124
1125 if (Subtarget.hasVInstructionsF64()) {
1126 for (MVT VT : F64VecVTs) {
1127 if (!isTypeLegal(VT))
1128 continue;
1129 SetCommonVFPActions(VT);
1130 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1131 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1132 }
1133 }
1134
1135 if (Subtarget.useRVVForFixedLengthVectors()) {
1137 if (!useRVVForFixedLengthVectorVT(VT))
1138 continue;
1139
1140 // By default everything must be expanded.
1141 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1144 setTruncStoreAction(VT, OtherVT, Expand);
1146 OtherVT, Expand);
1147 }
1148
1149 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1150 // expansion to a build_vector of 0s.
1152
1153 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1155 Custom);
1156
1158 Custom);
1159
1161 VT, Custom);
1162
1164
1166
1168
1170
1172
1174
1177 Custom);
1178
1180 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1181 Custom);
1182
1184 {
1193 },
1194 VT, Custom);
1196 Custom);
1197
1199
1200 // Operations below are different for between masks and other vectors.
1201 if (VT.getVectorElementType() == MVT::i1) {
1202 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1203 ISD::OR, ISD::XOR},
1204 VT, Custom);
1205
1206 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1207 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1208 VT, Custom);
1209
1210 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1211 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1212 continue;
1213 }
1214
1215 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1216 // it before type legalization for i64 vectors on RV32. It will then be
1217 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1218 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1219 // improvements first.
1220 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1223 }
1224
1227
1228 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1229 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1230 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1231 ISD::VP_SCATTER},
1232 VT, Custom);
1233
1237 VT, Custom);
1238
1241
1243
1244 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1245 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1247
1251 VT, Custom);
1252
1254
1257
1258 // Custom-lower reduction operations to set up the corresponding custom
1259 // nodes' operands.
1263 VT, Custom);
1264
1265 setOperationAction(IntegerVPOps, VT, Custom);
1266
1267 if (Subtarget.hasStdExtZvkb())
1269
1270 if (Subtarget.hasStdExtZvbb()) {
1273 VT, Custom);
1274 } else {
1275 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1276 // range of f32.
1277 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1278 if (isTypeLegal(FloatVT))
1281 Custom);
1282 }
1283 }
1284
1286 // There are no extending loads or truncating stores.
1287 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1288 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1289 setTruncStoreAction(VT, InnerVT, Expand);
1290 }
1291
1292 if (!useRVVForFixedLengthVectorVT(VT))
1293 continue;
1294
1295 // By default everything must be expanded.
1296 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1298
1299 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1300 // expansion to a build_vector of 0s.
1302
1305 VT, Custom);
1306
1307 // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
1308 // vp_stride_load/store, vp_gather/scatter can be hoisted to here.
1310
1313 Custom);
1314
1315 if (VT.getVectorElementType() == MVT::f16 &&
1316 !Subtarget.hasVInstructionsF16()) {
1317 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1319 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1320 Custom);
1322 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1323 VT, Custom);
1325 if (Subtarget.hasStdExtZfhmin()) {
1326 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1328 } else {
1329 // We need to custom legalize f16 build vectors if Zfhmin isn't
1330 // available.
1332 }
1333 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1334 // Don't promote f16 vector operations to f32 if f32 vector type is
1335 // not legal.
1336 // TODO: could split the f16 vector into two vectors and do promotion.
1337 if (!isTypeLegal(F32VecVT))
1338 continue;
1339 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1340 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1341 continue;
1342 }
1343
1344 if (VT.getVectorElementType() == MVT::bf16) {
1345 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1346 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1349 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1350 Custom);
1351 // TODO: Promote to fp32.
1352 continue;
1353 }
1354
1357 VT, Custom);
1358
1361
1362 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1363 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1364 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1365 ISD::VP_SCATTER},
1366 VT, Custom);
1367
1372 VT, Custom);
1373
1376 VT, Custom);
1377
1378 setCondCodeAction(VFPCCToExpand, VT, Expand);
1379
1382
1384
1385 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1386
1387 setOperationAction(FloatingPointVPOps, VT, Custom);
1388
1395 VT, Custom);
1396 }
1397
1398 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1399 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1400 Custom);
1401 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1403 if (Subtarget.hasStdExtFOrZfinx())
1405 if (Subtarget.hasStdExtDOrZdinx())
1407 }
1408 }
1409
1410 if (Subtarget.hasStdExtA()) {
1412 if (RV64LegalI32 && Subtarget.is64Bit())
1414 }
1415
1416 if (Subtarget.hasForcedAtomics()) {
1417 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1423 XLenVT, LibCall);
1424 }
1425
1426 if (Subtarget.hasVendorXTHeadMemIdx()) {
1427 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1428 setIndexedLoadAction(im, MVT::i8, Legal);
1429 setIndexedStoreAction(im, MVT::i8, Legal);
1430 setIndexedLoadAction(im, MVT::i16, Legal);
1431 setIndexedStoreAction(im, MVT::i16, Legal);
1432 setIndexedLoadAction(im, MVT::i32, Legal);
1433 setIndexedStoreAction(im, MVT::i32, Legal);
1434
1435 if (Subtarget.is64Bit()) {
1436 setIndexedLoadAction(im, MVT::i64, Legal);
1437 setIndexedStoreAction(im, MVT::i64, Legal);
1438 }
1439 }
1440 }
1441
1442 if (Subtarget.hasVendorXCVmem()) {
1446
1450 }
1451
1452 if (Subtarget.hasVendorXCValu()) {
1460 }
1461
1462 // Function alignments.
1463 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1464 setMinFunctionAlignment(FunctionAlignment);
1465 // Set preferred alignments.
1468
1472 if (Subtarget.is64Bit())
1474
1475 if (Subtarget.hasStdExtFOrZfinx())
1477
1478 if (Subtarget.hasStdExtZbb())
1480
1481 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1482 Subtarget.hasStdExtV())
1484
1485 if (Subtarget.hasStdExtZbkb())
1487 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1489 if (Subtarget.hasStdExtFOrZfinx())
1492 if (Subtarget.hasVInstructions())
1494 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1497 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1500 if (Subtarget.hasVendorXTHeadMemPair())
1502 if (Subtarget.useRVVForFixedLengthVectors())
1504
1505 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1506 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1507
1508 // Disable strict node mutation.
1509 IsStrictFPEnabled = true;
1510
1511 // Let the subtarget decide if a predictable select is more expensive than the
1512 // corresponding branch. This information is used in CGP/SelectOpt to decide
1513 // when to convert selects into branches.
1514 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1515}
1516
1518 LLVMContext &Context,
1519 EVT VT) const {
1520 if (!VT.isVector())
1521 return getPointerTy(DL);
1522 if (Subtarget.hasVInstructions() &&
1523 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1524 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1526}
1527
1528MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1529 return Subtarget.getXLenVT();
1530}
1531
1532// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1533bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1534 unsigned VF,
1535 bool IsScalable) const {
1536 if (!Subtarget.hasVInstructions())
1537 return true;
1538
1539 if (!IsScalable)
1540 return true;
1541
1542 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1543 return true;
1544
1545 // Don't allow VF=1 if those types are't legal.
1546 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1547 return true;
1548
1549 // VLEN=32 support is incomplete.
1550 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1551 return true;
1552
1553 // The maximum VF is for the smallest element width with LMUL=8.
1554 // VF must be a power of 2.
1555 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1556 return VF > MaxVF || !isPowerOf2_32(VF);
1557}
1558
1560 return !Subtarget.hasVInstructions() ||
1561 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1562}
1563
1565 const CallInst &I,
1566 MachineFunction &MF,
1567 unsigned Intrinsic) const {
1568 auto &DL = I.getDataLayout();
1569
1570 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1571 bool IsUnitStrided, bool UsePtrVal = false) {
1573 // We can't use ptrVal if the intrinsic can access memory before the
1574 // pointer. This means we can't use it for strided or indexed intrinsics.
1575 if (UsePtrVal)
1576 Info.ptrVal = I.getArgOperand(PtrOp);
1577 else
1578 Info.fallbackAddressSpace =
1579 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1580 Type *MemTy;
1581 if (IsStore) {
1582 // Store value is the first operand.
1583 MemTy = I.getArgOperand(0)->getType();
1584 } else {
1585 // Use return type. If it's segment load, return type is a struct.
1586 MemTy = I.getType();
1587 if (MemTy->isStructTy())
1588 MemTy = MemTy->getStructElementType(0);
1589 }
1590 if (!IsUnitStrided)
1591 MemTy = MemTy->getScalarType();
1592
1593 Info.memVT = getValueType(DL, MemTy);
1594 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1596 Info.flags |=
1598 return true;
1599 };
1600
1601 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1603
1605 switch (Intrinsic) {
1606 default:
1607 return false;
1608 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1609 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1610 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1611 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1612 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1613 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1614 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1615 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1616 case Intrinsic::riscv_masked_cmpxchg_i32:
1618 Info.memVT = MVT::i32;
1619 Info.ptrVal = I.getArgOperand(0);
1620 Info.offset = 0;
1621 Info.align = Align(4);
1624 return true;
1625 case Intrinsic::riscv_masked_strided_load:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1627 /*IsUnitStrided*/ false);
1628 case Intrinsic::riscv_masked_strided_store:
1629 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1630 /*IsUnitStrided*/ false);
1631 case Intrinsic::riscv_seg2_load:
1632 case Intrinsic::riscv_seg3_load:
1633 case Intrinsic::riscv_seg4_load:
1634 case Intrinsic::riscv_seg5_load:
1635 case Intrinsic::riscv_seg6_load:
1636 case Intrinsic::riscv_seg7_load:
1637 case Intrinsic::riscv_seg8_load:
1638 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1639 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1640 case Intrinsic::riscv_seg2_store:
1641 case Intrinsic::riscv_seg3_store:
1642 case Intrinsic::riscv_seg4_store:
1643 case Intrinsic::riscv_seg5_store:
1644 case Intrinsic::riscv_seg6_store:
1645 case Intrinsic::riscv_seg7_store:
1646 case Intrinsic::riscv_seg8_store:
1647 // Operands are (vec, ..., vec, ptr, vl)
1648 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1649 /*IsStore*/ true,
1650 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1651 case Intrinsic::riscv_vle:
1652 case Intrinsic::riscv_vle_mask:
1653 case Intrinsic::riscv_vleff:
1654 case Intrinsic::riscv_vleff_mask:
1655 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1656 /*IsStore*/ false,
1657 /*IsUnitStrided*/ true,
1658 /*UsePtrVal*/ true);
1659 case Intrinsic::riscv_vse:
1660 case Intrinsic::riscv_vse_mask:
1661 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1662 /*IsStore*/ true,
1663 /*IsUnitStrided*/ true,
1664 /*UsePtrVal*/ true);
1665 case Intrinsic::riscv_vlse:
1666 case Intrinsic::riscv_vlse_mask:
1667 case Intrinsic::riscv_vloxei:
1668 case Intrinsic::riscv_vloxei_mask:
1669 case Intrinsic::riscv_vluxei:
1670 case Intrinsic::riscv_vluxei_mask:
1671 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1672 /*IsStore*/ false,
1673 /*IsUnitStrided*/ false);
1674 case Intrinsic::riscv_vsse:
1675 case Intrinsic::riscv_vsse_mask:
1676 case Intrinsic::riscv_vsoxei:
1677 case Intrinsic::riscv_vsoxei_mask:
1678 case Intrinsic::riscv_vsuxei:
1679 case Intrinsic::riscv_vsuxei_mask:
1680 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1681 /*IsStore*/ true,
1682 /*IsUnitStrided*/ false);
1683 case Intrinsic::riscv_vlseg2:
1684 case Intrinsic::riscv_vlseg3:
1685 case Intrinsic::riscv_vlseg4:
1686 case Intrinsic::riscv_vlseg5:
1687 case Intrinsic::riscv_vlseg6:
1688 case Intrinsic::riscv_vlseg7:
1689 case Intrinsic::riscv_vlseg8:
1690 case Intrinsic::riscv_vlseg2ff:
1691 case Intrinsic::riscv_vlseg3ff:
1692 case Intrinsic::riscv_vlseg4ff:
1693 case Intrinsic::riscv_vlseg5ff:
1694 case Intrinsic::riscv_vlseg6ff:
1695 case Intrinsic::riscv_vlseg7ff:
1696 case Intrinsic::riscv_vlseg8ff:
1697 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1698 /*IsStore*/ false,
1699 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1700 case Intrinsic::riscv_vlseg2_mask:
1701 case Intrinsic::riscv_vlseg3_mask:
1702 case Intrinsic::riscv_vlseg4_mask:
1703 case Intrinsic::riscv_vlseg5_mask:
1704 case Intrinsic::riscv_vlseg6_mask:
1705 case Intrinsic::riscv_vlseg7_mask:
1706 case Intrinsic::riscv_vlseg8_mask:
1707 case Intrinsic::riscv_vlseg2ff_mask:
1708 case Intrinsic::riscv_vlseg3ff_mask:
1709 case Intrinsic::riscv_vlseg4ff_mask:
1710 case Intrinsic::riscv_vlseg5ff_mask:
1711 case Intrinsic::riscv_vlseg6ff_mask:
1712 case Intrinsic::riscv_vlseg7ff_mask:
1713 case Intrinsic::riscv_vlseg8ff_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1715 /*IsStore*/ false,
1716 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1717 case Intrinsic::riscv_vlsseg2:
1718 case Intrinsic::riscv_vlsseg3:
1719 case Intrinsic::riscv_vlsseg4:
1720 case Intrinsic::riscv_vlsseg5:
1721 case Intrinsic::riscv_vlsseg6:
1722 case Intrinsic::riscv_vlsseg7:
1723 case Intrinsic::riscv_vlsseg8:
1724 case Intrinsic::riscv_vloxseg2:
1725 case Intrinsic::riscv_vloxseg3:
1726 case Intrinsic::riscv_vloxseg4:
1727 case Intrinsic::riscv_vloxseg5:
1728 case Intrinsic::riscv_vloxseg6:
1729 case Intrinsic::riscv_vloxseg7:
1730 case Intrinsic::riscv_vloxseg8:
1731 case Intrinsic::riscv_vluxseg2:
1732 case Intrinsic::riscv_vluxseg3:
1733 case Intrinsic::riscv_vluxseg4:
1734 case Intrinsic::riscv_vluxseg5:
1735 case Intrinsic::riscv_vluxseg6:
1736 case Intrinsic::riscv_vluxseg7:
1737 case Intrinsic::riscv_vluxseg8:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1739 /*IsStore*/ false,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vlsseg2_mask:
1742 case Intrinsic::riscv_vlsseg3_mask:
1743 case Intrinsic::riscv_vlsseg4_mask:
1744 case Intrinsic::riscv_vlsseg5_mask:
1745 case Intrinsic::riscv_vlsseg6_mask:
1746 case Intrinsic::riscv_vlsseg7_mask:
1747 case Intrinsic::riscv_vlsseg8_mask:
1748 case Intrinsic::riscv_vloxseg2_mask:
1749 case Intrinsic::riscv_vloxseg3_mask:
1750 case Intrinsic::riscv_vloxseg4_mask:
1751 case Intrinsic::riscv_vloxseg5_mask:
1752 case Intrinsic::riscv_vloxseg6_mask:
1753 case Intrinsic::riscv_vloxseg7_mask:
1754 case Intrinsic::riscv_vloxseg8_mask:
1755 case Intrinsic::riscv_vluxseg2_mask:
1756 case Intrinsic::riscv_vluxseg3_mask:
1757 case Intrinsic::riscv_vluxseg4_mask:
1758 case Intrinsic::riscv_vluxseg5_mask:
1759 case Intrinsic::riscv_vluxseg6_mask:
1760 case Intrinsic::riscv_vluxseg7_mask:
1761 case Intrinsic::riscv_vluxseg8_mask:
1762 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1763 /*IsStore*/ false,
1764 /*IsUnitStrided*/ false);
1765 case Intrinsic::riscv_vsseg2:
1766 case Intrinsic::riscv_vsseg3:
1767 case Intrinsic::riscv_vsseg4:
1768 case Intrinsic::riscv_vsseg5:
1769 case Intrinsic::riscv_vsseg6:
1770 case Intrinsic::riscv_vsseg7:
1771 case Intrinsic::riscv_vsseg8:
1772 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1773 /*IsStore*/ true,
1774 /*IsUnitStrided*/ false);
1775 case Intrinsic::riscv_vsseg2_mask:
1776 case Intrinsic::riscv_vsseg3_mask:
1777 case Intrinsic::riscv_vsseg4_mask:
1778 case Intrinsic::riscv_vsseg5_mask:
1779 case Intrinsic::riscv_vsseg6_mask:
1780 case Intrinsic::riscv_vsseg7_mask:
1781 case Intrinsic::riscv_vsseg8_mask:
1782 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1783 /*IsStore*/ true,
1784 /*IsUnitStrided*/ false);
1785 case Intrinsic::riscv_vssseg2:
1786 case Intrinsic::riscv_vssseg3:
1787 case Intrinsic::riscv_vssseg4:
1788 case Intrinsic::riscv_vssseg5:
1789 case Intrinsic::riscv_vssseg6:
1790 case Intrinsic::riscv_vssseg7:
1791 case Intrinsic::riscv_vssseg8:
1792 case Intrinsic::riscv_vsoxseg2:
1793 case Intrinsic::riscv_vsoxseg3:
1794 case Intrinsic::riscv_vsoxseg4:
1795 case Intrinsic::riscv_vsoxseg5:
1796 case Intrinsic::riscv_vsoxseg6:
1797 case Intrinsic::riscv_vsoxseg7:
1798 case Intrinsic::riscv_vsoxseg8:
1799 case Intrinsic::riscv_vsuxseg2:
1800 case Intrinsic::riscv_vsuxseg3:
1801 case Intrinsic::riscv_vsuxseg4:
1802 case Intrinsic::riscv_vsuxseg5:
1803 case Intrinsic::riscv_vsuxseg6:
1804 case Intrinsic::riscv_vsuxseg7:
1805 case Intrinsic::riscv_vsuxseg8:
1806 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1807 /*IsStore*/ true,
1808 /*IsUnitStrided*/ false);
1809 case Intrinsic::riscv_vssseg2_mask:
1810 case Intrinsic::riscv_vssseg3_mask:
1811 case Intrinsic::riscv_vssseg4_mask:
1812 case Intrinsic::riscv_vssseg5_mask:
1813 case Intrinsic::riscv_vssseg6_mask:
1814 case Intrinsic::riscv_vssseg7_mask:
1815 case Intrinsic::riscv_vssseg8_mask:
1816 case Intrinsic::riscv_vsoxseg2_mask:
1817 case Intrinsic::riscv_vsoxseg3_mask:
1818 case Intrinsic::riscv_vsoxseg4_mask:
1819 case Intrinsic::riscv_vsoxseg5_mask:
1820 case Intrinsic::riscv_vsoxseg6_mask:
1821 case Intrinsic::riscv_vsoxseg7_mask:
1822 case Intrinsic::riscv_vsoxseg8_mask:
1823 case Intrinsic::riscv_vsuxseg2_mask:
1824 case Intrinsic::riscv_vsuxseg3_mask:
1825 case Intrinsic::riscv_vsuxseg4_mask:
1826 case Intrinsic::riscv_vsuxseg5_mask:
1827 case Intrinsic::riscv_vsuxseg6_mask:
1828 case Intrinsic::riscv_vsuxseg7_mask:
1829 case Intrinsic::riscv_vsuxseg8_mask:
1830 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1831 /*IsStore*/ true,
1832 /*IsUnitStrided*/ false);
1833 }
1834}
1835
1837 const AddrMode &AM, Type *Ty,
1838 unsigned AS,
1839 Instruction *I) const {
1840 // No global is ever allowed as a base.
1841 if (AM.BaseGV)
1842 return false;
1843
1844 // RVV instructions only support register addressing.
1845 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1846 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1847
1848 // Require a 12-bit signed offset.
1849 if (!isInt<12>(AM.BaseOffs))
1850 return false;
1851
1852 switch (AM.Scale) {
1853 case 0: // "r+i" or just "i", depending on HasBaseReg.
1854 break;
1855 case 1:
1856 if (!AM.HasBaseReg) // allow "r+i".
1857 break;
1858 return false; // disallow "r+r" or "r+r+i".
1859 default:
1860 return false;
1861 }
1862
1863 return true;
1864}
1865
1867 return isInt<12>(Imm);
1868}
1869
1871 return isInt<12>(Imm);
1872}
1873
1874// On RV32, 64-bit integers are split into their high and low parts and held
1875// in two different registers, so the trunc is free since the low register can
1876// just be used.
1877// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1878// isTruncateFree?
1880 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1881 return false;
1882 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1883 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1884 return (SrcBits == 64 && DestBits == 32);
1885}
1886
1888 // We consider i64->i32 free on RV64 since we have good selection of W
1889 // instructions that make promoting operations back to i64 free in many cases.
1890 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1891 !DstVT.isInteger())
1892 return false;
1893 unsigned SrcBits = SrcVT.getSizeInBits();
1894 unsigned DestBits = DstVT.getSizeInBits();
1895 return (SrcBits == 64 && DestBits == 32);
1896}
1897
1899 EVT SrcVT = Val.getValueType();
1900 // free truncate from vnsrl and vnsra
1901 if (Subtarget.hasStdExtV() &&
1902 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1903 SrcVT.isVector() && VT2.isVector()) {
1904 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1905 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1906 if (SrcBits == DestBits * 2) {
1907 return true;
1908 }
1909 }
1910 return TargetLowering::isTruncateFree(Val, VT2);
1911}
1912
1914 // Zexts are free if they can be combined with a load.
1915 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1916 // poorly with type legalization of compares preferring sext.
1917 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1918 EVT MemVT = LD->getMemoryVT();
1919 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1920 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1921 LD->getExtensionType() == ISD::ZEXTLOAD))
1922 return true;
1923 }
1924
1925 return TargetLowering::isZExtFree(Val, VT2);
1926}
1927
1929 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1930}
1931
1933 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1934}
1935
1937 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1938}
1939
1941 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1942 Subtarget.hasVendorXCVbitmanip();
1943}
1944
1946 const Instruction &AndI) const {
1947 // We expect to be able to match a bit extraction instruction if the Zbs
1948 // extension is supported and the mask is a power of two. However, we
1949 // conservatively return false if the mask would fit in an ANDI instruction,
1950 // on the basis that it's possible the sinking+duplication of the AND in
1951 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1952 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1953 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1954 return false;
1955 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1956 if (!Mask)
1957 return false;
1958 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1959}
1960
1962 EVT VT = Y.getValueType();
1963
1964 // FIXME: Support vectors once we have tests.
1965 if (VT.isVector())
1966 return false;
1967
1968 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1969 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1970}
1971
1973 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1974 if (Subtarget.hasStdExtZbs())
1975 return X.getValueType().isScalarInteger();
1976 auto *C = dyn_cast<ConstantSDNode>(Y);
1977 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1978 if (Subtarget.hasVendorXTHeadBs())
1979 return C != nullptr;
1980 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1981 return C && C->getAPIntValue().ule(10);
1982}
1983
1985 EVT VT) const {
1986 // Only enable for rvv.
1987 if (!VT.isVector() || !Subtarget.hasVInstructions())
1988 return false;
1989
1990 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1991 return false;
1992
1993 return true;
1994}
1995
1997 Type *Ty) const {
1998 assert(Ty->isIntegerTy());
1999
2000 unsigned BitSize = Ty->getIntegerBitWidth();
2001 if (BitSize > Subtarget.getXLen())
2002 return false;
2003
2004 // Fast path, assume 32-bit immediates are cheap.
2005 int64_t Val = Imm.getSExtValue();
2006 if (isInt<32>(Val))
2007 return true;
2008
2009 // A constant pool entry may be more aligned thant he load we're trying to
2010 // replace. If we don't support unaligned scalar mem, prefer the constant
2011 // pool.
2012 // TODO: Can the caller pass down the alignment?
2013 if (!Subtarget.enableUnalignedScalarMem())
2014 return true;
2015
2016 // Prefer to keep the load if it would require many instructions.
2017 // This uses the same threshold we use for constant pools but doesn't
2018 // check useConstantPoolForLargeInts.
2019 // TODO: Should we keep the load only when we're definitely going to emit a
2020 // constant pool?
2021
2023 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2024}
2025
2029 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2030 SelectionDAG &DAG) const {
2031 // One interesting pattern that we'd want to form is 'bit extract':
2032 // ((1 >> Y) & 1) ==/!= 0
2033 // But we also need to be careful not to try to reverse that fold.
2034
2035 // Is this '((1 >> Y) & 1)'?
2036 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2037 return false; // Keep the 'bit extract' pattern.
2038
2039 // Will this be '((1 >> Y) & 1)' after the transform?
2040 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2041 return true; // Do form the 'bit extract' pattern.
2042
2043 // If 'X' is a constant, and we transform, then we will immediately
2044 // try to undo the fold, thus causing endless combine loop.
2045 // So only do the transform if X is not a constant. This matches the default
2046 // implementation of this function.
2047 return !XC;
2048}
2049
2050bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2051 switch (Opcode) {
2052 case Instruction::Add:
2053 case Instruction::Sub:
2054 case Instruction::Mul:
2055 case Instruction::And:
2056 case Instruction::Or:
2057 case Instruction::Xor:
2058 case Instruction::FAdd:
2059 case Instruction::FSub:
2060 case Instruction::FMul:
2061 case Instruction::FDiv:
2062 case Instruction::ICmp:
2063 case Instruction::FCmp:
2064 return true;
2065 case Instruction::Shl:
2066 case Instruction::LShr:
2067 case Instruction::AShr:
2068 case Instruction::UDiv:
2069 case Instruction::SDiv:
2070 case Instruction::URem:
2071 case Instruction::SRem:
2072 case Instruction::Select:
2073 return Operand == 1;
2074 default:
2075 return false;
2076 }
2077}
2078
2079
2081 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2082 return false;
2083
2084 if (canSplatOperand(I->getOpcode(), Operand))
2085 return true;
2086
2087 auto *II = dyn_cast<IntrinsicInst>(I);
2088 if (!II)
2089 return false;
2090
2091 switch (II->getIntrinsicID()) {
2092 case Intrinsic::fma:
2093 case Intrinsic::vp_fma:
2094 return Operand == 0 || Operand == 1;
2095 case Intrinsic::vp_shl:
2096 case Intrinsic::vp_lshr:
2097 case Intrinsic::vp_ashr:
2098 case Intrinsic::vp_udiv:
2099 case Intrinsic::vp_sdiv:
2100 case Intrinsic::vp_urem:
2101 case Intrinsic::vp_srem:
2102 case Intrinsic::ssub_sat:
2103 case Intrinsic::vp_ssub_sat:
2104 case Intrinsic::usub_sat:
2105 case Intrinsic::vp_usub_sat:
2106 return Operand == 1;
2107 // These intrinsics are commutative.
2108 case Intrinsic::vp_add:
2109 case Intrinsic::vp_mul:
2110 case Intrinsic::vp_and:
2111 case Intrinsic::vp_or:
2112 case Intrinsic::vp_xor:
2113 case Intrinsic::vp_fadd:
2114 case Intrinsic::vp_fmul:
2115 case Intrinsic::vp_icmp:
2116 case Intrinsic::vp_fcmp:
2117 case Intrinsic::smin:
2118 case Intrinsic::vp_smin:
2119 case Intrinsic::umin:
2120 case Intrinsic::vp_umin:
2121 case Intrinsic::smax:
2122 case Intrinsic::vp_smax:
2123 case Intrinsic::umax:
2124 case Intrinsic::vp_umax:
2125 case Intrinsic::sadd_sat:
2126 case Intrinsic::vp_sadd_sat:
2127 case Intrinsic::uadd_sat:
2128 case Intrinsic::vp_uadd_sat:
2129 // These intrinsics have 'vr' versions.
2130 case Intrinsic::vp_sub:
2131 case Intrinsic::vp_fsub:
2132 case Intrinsic::vp_fdiv:
2133 return Operand == 0 || Operand == 1;
2134 default:
2135 return false;
2136 }
2137}
2138
2139/// Check if sinking \p I's operands to I's basic block is profitable, because
2140/// the operands can be folded into a target instruction, e.g.
2141/// splats of scalars can fold into vector instructions.
2143 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2144 using namespace llvm::PatternMatch;
2145
2146 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2147 return false;
2148
2149 // Don't sink splat operands if the target prefers it. Some targets requires
2150 // S2V transfer buffers and we can run out of them copying the same value
2151 // repeatedly.
2152 // FIXME: It could still be worth doing if it would improve vector register
2153 // pressure and prevent a vector spill.
2154 if (!Subtarget.sinkSplatOperands())
2155 return false;
2156
2157 for (auto OpIdx : enumerate(I->operands())) {
2158 if (!canSplatOperand(I, OpIdx.index()))
2159 continue;
2160
2161 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2162 // Make sure we are not already sinking this operand
2163 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2164 continue;
2165
2166 // We are looking for a splat that can be sunk.
2168 m_Undef(), m_ZeroMask())))
2169 continue;
2170
2171 // Don't sink i1 splats.
2172 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2173 continue;
2174
2175 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2176 // and vector registers
2177 for (Use &U : Op->uses()) {
2178 Instruction *Insn = cast<Instruction>(U.getUser());
2179 if (!canSplatOperand(Insn, U.getOperandNo()))
2180 return false;
2181 }
2182
2183 Ops.push_back(&Op->getOperandUse(0));
2184 Ops.push_back(&OpIdx.value());
2185 }
2186 return true;
2187}
2188
2190 unsigned Opc = VecOp.getOpcode();
2191
2192 // Assume target opcodes can't be scalarized.
2193 // TODO - do we have any exceptions?
2194 if (Opc >= ISD::BUILTIN_OP_END)
2195 return false;
2196
2197 // If the vector op is not supported, try to convert to scalar.
2198 EVT VecVT = VecOp.getValueType();
2199 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2200 return true;
2201
2202 // If the vector op is supported, but the scalar op is not, the transform may
2203 // not be worthwhile.
2204 // Permit a vector binary operation can be converted to scalar binary
2205 // operation which is custom lowered with illegal type.
2206 EVT ScalarVT = VecVT.getScalarType();
2207 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2208 isOperationCustom(Opc, ScalarVT);
2209}
2210
2212 const GlobalAddressSDNode *GA) const {
2213 // In order to maximise the opportunity for common subexpression elimination,
2214 // keep a separate ADD node for the global address offset instead of folding
2215 // it in the global address node. Later peephole optimisations may choose to
2216 // fold it back in when profitable.
2217 return false;
2218}
2219
2220// Return one of the followings:
2221// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2222// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2223// positive counterpart, which will be materialized from the first returned
2224// element. The second returned element indicated that there should be a FNEG
2225// followed.
2226// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2227std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2228 EVT VT) const {
2229 if (!Subtarget.hasStdExtZfa())
2230 return std::make_pair(-1, false);
2231
2232 bool IsSupportedVT = false;
2233 if (VT == MVT::f16) {
2234 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2235 } else if (VT == MVT::f32) {
2236 IsSupportedVT = true;
2237 } else if (VT == MVT::f64) {
2238 assert(Subtarget.hasStdExtD() && "Expect D extension");
2239 IsSupportedVT = true;
2240 }
2241
2242 if (!IsSupportedVT)
2243 return std::make_pair(-1, false);
2244
2246 if (Index < 0 && Imm.isNegative())
2247 // Try the combination of its positive counterpart + FNEG.
2248 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2249 else
2250 return std::make_pair(Index, false);
2251}
2252
2254 bool ForCodeSize) const {
2255 bool IsLegalVT = false;
2256 if (VT == MVT::f16)
2257 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2258 else if (VT == MVT::f32)
2259 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2260 else if (VT == MVT::f64)
2261 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2262 else if (VT == MVT::bf16)
2263 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2264
2265 if (!IsLegalVT)
2266 return false;
2267
2268 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2269 return true;
2270
2271 // Cannot create a 64 bit floating-point immediate value for rv32.
2272 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2273 // td can handle +0.0 or -0.0 already.
2274 // -0.0 can be created by fmv + fneg.
2275 return Imm.isZero();
2276 }
2277
2278 // Special case: fmv + fneg
2279 if (Imm.isNegZero())
2280 return true;
2281
2282 // Building an integer and then converting requires a fmv at the end of
2283 // the integer sequence.
2284 const int Cost =
2285 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2286 Subtarget);
2287 return Cost <= FPImmCost;
2288}
2289
2290// TODO: This is very conservative.
2292 unsigned Index) const {
2294 return false;
2295
2296 // Only support extracting a fixed from a fixed vector for now.
2297 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2298 return false;
2299
2300 EVT EltVT = ResVT.getVectorElementType();
2301 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2302
2303 // The smallest type we can slide is i8.
2304 // TODO: We can extract index 0 from a mask vector without a slide.
2305 if (EltVT == MVT::i1)
2306 return false;
2307
2308 unsigned ResElts = ResVT.getVectorNumElements();
2309 unsigned SrcElts = SrcVT.getVectorNumElements();
2310
2311 unsigned MinVLen = Subtarget.getRealMinVLen();
2312 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2313
2314 // If we're extracting only data from the first VLEN bits of the source
2315 // then we can always do this with an m1 vslidedown.vx. Restricting the
2316 // Index ensures we can use a vslidedown.vi.
2317 // TODO: We can generalize this when the exact VLEN is known.
2318 if (Index + ResElts <= MinVLMAX && Index < 31)
2319 return true;
2320
2321 // Convervatively only handle extracting half of a vector.
2322 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2323 // a cheap extract. However, this case is important in practice for
2324 // shuffled extracts of longer vectors. How resolve?
2325 if ((ResElts * 2) != SrcElts)
2326 return false;
2327
2328 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2329 // cheap.
2330 if (Index >= 32)
2331 return false;
2332
2333 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2334 // the upper half of a vector until we have more test coverage.
2335 return Index == 0 || Index == ResElts;
2336}
2337
2340 EVT VT) const {
2341 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2342 // We might still end up using a GPR but that will be decided based on ABI.
2343 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2344 !Subtarget.hasStdExtZfhminOrZhinxmin())
2345 return MVT::f32;
2346
2348
2349 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2350 return MVT::i64;
2351
2352 return PartVT;
2353}
2354
2357 EVT VT) const {
2358 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2359 // We might still end up using a GPR but that will be decided based on ABI.
2360 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2361 !Subtarget.hasStdExtZfhminOrZhinxmin())
2362 return 1;
2363
2365}
2366
2368 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2369 unsigned &NumIntermediates, MVT &RegisterVT) const {
2371 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2372
2373 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2374 IntermediateVT = MVT::i64;
2375
2376 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2377 RegisterVT = MVT::i64;
2378
2379 return NumRegs;
2380}
2381
2382// Changes the condition code and swaps operands if necessary, so the SetCC
2383// operation matches one of the comparisons supported directly by branches
2384// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2385// with 1/-1.
2386static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2387 ISD::CondCode &CC, SelectionDAG &DAG) {
2388 // If this is a single bit test that can't be handled by ANDI, shift the
2389 // bit to be tested to the MSB and perform a signed compare with 0.
2390 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2391 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2392 isa<ConstantSDNode>(LHS.getOperand(1))) {
2393 uint64_t Mask = LHS.getConstantOperandVal(1);
2394 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2395 unsigned ShAmt = 0;
2396 if (isPowerOf2_64(Mask)) {
2398 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2399 } else {
2400 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2401 }
2402
2403 LHS = LHS.getOperand(0);
2404 if (ShAmt != 0)
2405 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2406 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2407 return;
2408 }
2409 }
2410
2411 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2412 int64_t C = RHSC->getSExtValue();
2413 switch (CC) {
2414 default: break;
2415 case ISD::SETGT:
2416 // Convert X > -1 to X >= 0.
2417 if (C == -1) {
2418 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2419 CC = ISD::SETGE;
2420 return;
2421 }
2422 break;
2423 case ISD::SETLT:
2424 // Convert X < 1 to 0 >= X.
2425 if (C == 1) {
2426 RHS = LHS;
2427 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2428 CC = ISD::SETGE;
2429 return;
2430 }
2431 break;
2432 }
2433 }
2434
2435 switch (CC) {
2436 default:
2437 break;
2438 case ISD::SETGT:
2439 case ISD::SETLE:
2440 case ISD::SETUGT:
2441 case ISD::SETULE:
2443 std::swap(LHS, RHS);
2444 break;
2445 }
2446}
2447
2449 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2450 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2451 if (VT.getVectorElementType() == MVT::i1)
2452 KnownSize *= 8;
2453
2454 switch (KnownSize) {
2455 default:
2456 llvm_unreachable("Invalid LMUL.");
2457 case 8:
2459 case 16:
2461 case 32:
2463 case 64:
2465 case 128:
2467 case 256:
2469 case 512:
2471 }
2472}
2473
2475 switch (LMul) {
2476 default:
2477 llvm_unreachable("Invalid LMUL.");
2482 return RISCV::VRRegClassID;
2484 return RISCV::VRM2RegClassID;
2486 return RISCV::VRM4RegClassID;
2488 return RISCV::VRM8RegClassID;
2489 }
2490}
2491
2493 RISCVII::VLMUL LMUL = getLMUL(VT);
2494 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2495 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2496 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2497 LMUL == RISCVII::VLMUL::LMUL_1) {
2498 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2499 "Unexpected subreg numbering");
2500 return RISCV::sub_vrm1_0 + Index;
2501 }
2502 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2503 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2504 "Unexpected subreg numbering");
2505 return RISCV::sub_vrm2_0 + Index;
2506 }
2507 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2508 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2509 "Unexpected subreg numbering");
2510 return RISCV::sub_vrm4_0 + Index;
2511 }
2512 llvm_unreachable("Invalid vector type.");
2513}
2514
2516 if (VT.getVectorElementType() == MVT::i1)
2517 return RISCV::VRRegClassID;
2518 return getRegClassIDForLMUL(getLMUL(VT));
2519}
2520
2521// Attempt to decompose a subvector insert/extract between VecVT and
2522// SubVecVT via subregister indices. Returns the subregister index that
2523// can perform the subvector insert/extract with the given element index, as
2524// well as the index corresponding to any leftover subvectors that must be
2525// further inserted/extracted within the register class for SubVecVT.
2526std::pair<unsigned, unsigned>
2528 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2529 const RISCVRegisterInfo *TRI) {
2530 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2531 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2532 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2533 "Register classes not ordered");
2534 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2535 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2536 // Try to compose a subregister index that takes us from the incoming
2537 // LMUL>1 register class down to the outgoing one. At each step we half
2538 // the LMUL:
2539 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2540 // Note that this is not guaranteed to find a subregister index, such as
2541 // when we are extracting from one VR type to another.
2542 unsigned SubRegIdx = RISCV::NoSubRegister;
2543 for (const unsigned RCID :
2544 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2545 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2546 VecVT = VecVT.getHalfNumVectorElementsVT();
2547 bool IsHi =
2548 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2549 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2550 getSubregIndexByMVT(VecVT, IsHi));
2551 if (IsHi)
2552 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2553 }
2554 return {SubRegIdx, InsertExtractIdx};
2555}
2556
2557// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2558// stores for those types.
2559bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2560 return !Subtarget.useRVVForFixedLengthVectors() ||
2561 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2562}
2563
2565 if (!ScalarTy.isSimple())
2566 return false;
2567 switch (ScalarTy.getSimpleVT().SimpleTy) {
2568 case MVT::iPTR:
2569 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2570 case MVT::i8:
2571 case MVT::i16:
2572 case MVT::i32:
2573 return true;
2574 case MVT::i64:
2575 return Subtarget.hasVInstructionsI64();
2576 case MVT::f16:
2577 return Subtarget.hasVInstructionsF16();
2578 case MVT::f32:
2579 return Subtarget.hasVInstructionsF32();
2580 case MVT::f64:
2581 return Subtarget.hasVInstructionsF64();
2582 default:
2583 return false;
2584 }
2585}
2586
2587
2588unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2589 return NumRepeatedDivisors;
2590}
2591
2593 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2594 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2595 "Unexpected opcode");
2596 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2597 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2599 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2600 if (!II)
2601 return SDValue();
2602 return Op.getOperand(II->VLOperand + 1 + HasChain);
2603}
2604
2606 const RISCVSubtarget &Subtarget) {
2607 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2608 if (!Subtarget.useRVVForFixedLengthVectors())
2609 return false;
2610
2611 // We only support a set of vector types with a consistent maximum fixed size
2612 // across all supported vector element types to avoid legalization issues.
2613 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2614 // fixed-length vector type we support is 1024 bytes.
2615 if (VT.getFixedSizeInBits() > 1024 * 8)
2616 return false;
2617
2618 unsigned MinVLen = Subtarget.getRealMinVLen();
2619
2620 MVT EltVT = VT.getVectorElementType();
2621
2622 // Don't use RVV for vectors we cannot scalarize if required.
2623 switch (EltVT.SimpleTy) {
2624 // i1 is supported but has different rules.
2625 default:
2626 return false;
2627 case MVT::i1:
2628 // Masks can only use a single register.
2629 if (VT.getVectorNumElements() > MinVLen)
2630 return false;
2631 MinVLen /= 8;
2632 break;
2633 case MVT::i8:
2634 case MVT::i16:
2635 case MVT::i32:
2636 break;
2637 case MVT::i64:
2638 if (!Subtarget.hasVInstructionsI64())
2639 return false;
2640 break;
2641 case MVT::f16:
2642 if (!Subtarget.hasVInstructionsF16Minimal())
2643 return false;
2644 break;
2645 case MVT::bf16:
2646 if (!Subtarget.hasVInstructionsBF16())
2647 return false;
2648 break;
2649 case MVT::f32:
2650 if (!Subtarget.hasVInstructionsF32())
2651 return false;
2652 break;
2653 case MVT::f64:
2654 if (!Subtarget.hasVInstructionsF64())
2655 return false;
2656 break;
2657 }
2658
2659 // Reject elements larger than ELEN.
2660 if (EltVT.getSizeInBits() > Subtarget.getELen())
2661 return false;
2662
2663 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2664 // Don't use RVV for types that don't fit.
2665 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2666 return false;
2667
2668 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2669 // the base fixed length RVV support in place.
2670 if (!VT.isPow2VectorType())
2671 return false;
2672
2673 return true;
2674}
2675
2676bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2677 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2678}
2679
2680// Return the largest legal scalable vector type that matches VT's element type.
2682 const RISCVSubtarget &Subtarget) {
2683 // This may be called before legal types are setup.
2684 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2685 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2686 "Expected legal fixed length vector!");
2687
2688 unsigned MinVLen = Subtarget.getRealMinVLen();
2689 unsigned MaxELen = Subtarget.getELen();
2690
2691 MVT EltVT = VT.getVectorElementType();
2692 switch (EltVT.SimpleTy) {
2693 default:
2694 llvm_unreachable("unexpected element type for RVV container");
2695 case MVT::i1:
2696 case MVT::i8:
2697 case MVT::i16:
2698 case MVT::i32:
2699 case MVT::i64:
2700 case MVT::bf16:
2701 case MVT::f16:
2702 case MVT::f32:
2703 case MVT::f64: {
2704 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2705 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2706 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2707 unsigned NumElts =
2709 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2710 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2711 return MVT::getScalableVectorVT(EltVT, NumElts);
2712 }
2713 }
2714}
2715
2717 const RISCVSubtarget &Subtarget) {
2719 Subtarget);
2720}
2721
2723 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2724}
2725
2726// Grow V to consume an entire RVV register.
2728 const RISCVSubtarget &Subtarget) {
2729 assert(VT.isScalableVector() &&
2730 "Expected to convert into a scalable vector!");
2731 assert(V.getValueType().isFixedLengthVector() &&
2732 "Expected a fixed length vector operand!");
2733 SDLoc DL(V);
2734 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2735 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2736}
2737
2738// Shrink V so it's just big enough to maintain a VT's worth of data.
2740 const RISCVSubtarget &Subtarget) {
2742 "Expected to convert into a fixed length vector!");
2743 assert(V.getValueType().isScalableVector() &&
2744 "Expected a scalable vector operand!");
2745 SDLoc DL(V);
2746 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2747 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2748}
2749
2750/// Return the type of the mask type suitable for masking the provided
2751/// vector type. This is simply an i1 element type vector of the same
2752/// (possibly scalable) length.
2753static MVT getMaskTypeFor(MVT VecVT) {
2754 assert(VecVT.isVector());
2756 return MVT::getVectorVT(MVT::i1, EC);
2757}
2758
2759/// Creates an all ones mask suitable for masking a vector of type VecTy with
2760/// vector length VL. .
2761static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2762 SelectionDAG &DAG) {
2763 MVT MaskVT = getMaskTypeFor(VecVT);
2764 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2765}
2766
2767static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2768 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2769 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2770 // canonicalize the representation. InsertVSETVLI will pick the immediate
2771 // encoding later if profitable.
2772 const auto [MinVLMAX, MaxVLMAX] =
2773 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2774 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2775 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2776
2777 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2778}
2779
2780static std::pair<SDValue, SDValue>
2782 const RISCVSubtarget &Subtarget) {
2783 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2784 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2785 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2786 return {Mask, VL};
2787}
2788
2789static std::pair<SDValue, SDValue>
2790getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2791 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2792 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2793 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2794 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2795 return {Mask, VL};
2796}
2797
2798// Gets the two common "VL" operands: an all-ones mask and the vector length.
2799// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2800// the vector type that the fixed-length vector is contained in. Otherwise if
2801// VecVT is scalable, then ContainerVT should be the same as VecVT.
2802static std::pair<SDValue, SDValue>
2803getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2804 const RISCVSubtarget &Subtarget) {
2805 if (VecVT.isFixedLengthVector())
2806 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2807 Subtarget);
2808 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2809 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2810}
2811
2813 SelectionDAG &DAG) const {
2814 assert(VecVT.isScalableVector() && "Expected scalable vector");
2815 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2816 VecVT.getVectorElementCount());
2817}
2818
2819std::pair<unsigned, unsigned>
2821 const RISCVSubtarget &Subtarget) {
2822 assert(VecVT.isScalableVector() && "Expected scalable vector");
2823
2824 unsigned EltSize = VecVT.getScalarSizeInBits();
2825 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2826
2827 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2828 unsigned MaxVLMAX =
2829 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2830
2831 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2832 unsigned MinVLMAX =
2833 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2834
2835 return std::make_pair(MinVLMAX, MaxVLMAX);
2836}
2837
2838// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2839// of either is (currently) supported. This can get us into an infinite loop
2840// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2841// as a ..., etc.
2842// Until either (or both) of these can reliably lower any node, reporting that
2843// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2844// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2845// which is not desirable.
2847 EVT VT, unsigned DefinedValues) const {
2848 return false;
2849}
2850
2852 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2853 // implementation-defined.
2854 if (!VT.isVector())
2856 unsigned DLenFactor = Subtarget.getDLenFactor();
2857 unsigned Cost;
2858 if (VT.isScalableVector()) {
2859 unsigned LMul;
2860 bool Fractional;
2861 std::tie(LMul, Fractional) =
2863 if (Fractional)
2864 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2865 else
2866 Cost = (LMul * DLenFactor);
2867 } else {
2868 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2869 }
2870 return Cost;
2871}
2872
2873
2874/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2875/// is generally quadratic in the number of vreg implied by LMUL. Note that
2876/// operand (index and possibly mask) are handled separately.
2878 return getLMULCost(VT) * getLMULCost(VT);
2879}
2880
2881/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2882/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2883/// or may track the vrgather.vv cost. It is implementation-dependent.
2885 return getLMULCost(VT);
2886}
2887
2888/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2889/// for the type VT. (This does not cover the vslide1up or vslide1down
2890/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2891/// or may track the vrgather.vv cost. It is implementation-dependent.
2893 return getLMULCost(VT);
2894}
2895
2896/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2897/// for the type VT. (This does not cover the vslide1up or vslide1down
2898/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2899/// or may track the vrgather.vv cost. It is implementation-dependent.
2901 return getLMULCost(VT);
2902}
2903
2905 const RISCVSubtarget &Subtarget) {
2906 // RISC-V FP-to-int conversions saturate to the destination register size, but
2907 // don't produce 0 for nan. We can use a conversion instruction and fix the
2908 // nan case with a compare and a select.
2909 SDValue Src = Op.getOperand(0);
2910
2911 MVT DstVT = Op.getSimpleValueType();
2912 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2913
2914 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2915
2916 if (!DstVT.isVector()) {
2917 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2918 // the result.
2919 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2920 Src.getValueType() == MVT::bf16) {
2921 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2922 }
2923
2924 unsigned Opc;
2925 if (SatVT == DstVT)
2926 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2927 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2929 else
2930 return SDValue();
2931 // FIXME: Support other SatVTs by clamping before or after the conversion.
2932
2933 SDLoc DL(Op);
2934 SDValue FpToInt = DAG.getNode(
2935 Opc, DL, DstVT, Src,
2937
2938 if (Opc == RISCVISD::FCVT_WU_RV64)
2939 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2940
2941 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2942 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2944 }
2945
2946 // Vectors.
2947
2948 MVT DstEltVT = DstVT.getVectorElementType();
2949 MVT SrcVT = Src.getSimpleValueType();
2950 MVT SrcEltVT = SrcVT.getVectorElementType();
2951 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2952 unsigned DstEltSize = DstEltVT.getSizeInBits();
2953
2954 // Only handle saturating to the destination type.
2955 if (SatVT != DstEltVT)
2956 return SDValue();
2957
2958 // FIXME: Don't support narrowing by more than 1 steps for now.
2959 if (SrcEltSize > (2 * DstEltSize))
2960 return SDValue();
2961
2962 MVT DstContainerVT = DstVT;
2963 MVT SrcContainerVT = SrcVT;
2964 if (DstVT.isFixedLengthVector()) {
2965 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2966 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2967 assert(DstContainerVT.getVectorElementCount() ==
2968 SrcContainerVT.getVectorElementCount() &&
2969 "Expected same element count");
2970 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2971 }
2972
2973 SDLoc DL(Op);
2974
2975 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2976
2977 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2978 {Src, Src, DAG.getCondCode(ISD::SETNE),
2979 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2980
2981 // Need to widen by more than 1 step, promote the FP type, then do a widening
2982 // convert.
2983 if (DstEltSize > (2 * SrcEltSize)) {
2984 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2985 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2986 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2987 }
2988
2989 unsigned RVVOpc =
2991 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2992
2993 SDValue SplatZero = DAG.getNode(
2994 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2995 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2996 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2997 Res, DAG.getUNDEF(DstContainerVT), VL);
2998
2999 if (DstVT.isFixedLengthVector())
3000 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3001
3002 return Res;
3003}
3004
3006 switch (Opc) {
3007 case ISD::FROUNDEVEN:
3009 case ISD::VP_FROUNDEVEN:
3010 return RISCVFPRndMode::RNE;
3011 case ISD::FTRUNC:
3012 case ISD::STRICT_FTRUNC:
3013 case ISD::VP_FROUNDTOZERO:
3014 return RISCVFPRndMode::RTZ;
3015 case ISD::FFLOOR:
3016 case ISD::STRICT_FFLOOR:
3017 case ISD::VP_FFLOOR:
3018 return RISCVFPRndMode::RDN;
3019 case ISD::FCEIL:
3020 case ISD::STRICT_FCEIL:
3021 case ISD::VP_FCEIL:
3022 return RISCVFPRndMode::RUP;
3023 case ISD::FROUND:
3024 case ISD::STRICT_FROUND:
3025 case ISD::VP_FROUND:
3026 return RISCVFPRndMode::RMM;
3027 case ISD::FRINT:
3028 return RISCVFPRndMode::DYN;
3029 }
3030
3032}
3033
3034// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3035// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3036// the integer domain and back. Taking care to avoid converting values that are
3037// nan or already correct.
3038static SDValue
3040 const RISCVSubtarget &Subtarget) {
3041 MVT VT = Op.getSimpleValueType();
3042 assert(VT.isVector() && "Unexpected type");
3043
3044 SDLoc DL(Op);
3045
3046 SDValue Src = Op.getOperand(0);
3047
3048 MVT ContainerVT = VT;
3049 if (VT.isFixedLengthVector()) {
3050 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3051 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3052 }
3053
3054 SDValue Mask, VL;
3055 if (Op->isVPOpcode()) {
3056 Mask = Op.getOperand(1);
3057 if (VT.isFixedLengthVector())
3058 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3059 Subtarget);
3060 VL = Op.getOperand(2);
3061 } else {
3062 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3063 }
3064
3065 // Freeze the source since we are increasing the number of uses.
3066 Src = DAG.getFreeze(Src);
3067
3068 // We do the conversion on the absolute value and fix the sign at the end.
3069 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3070
3071 // Determine the largest integer that can be represented exactly. This and
3072 // values larger than it don't have any fractional bits so don't need to
3073 // be converted.
3074 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3075 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3076 APFloat MaxVal = APFloat(FltSem);
3077 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3078 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3079 SDValue MaxValNode =
3080 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3081 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3082 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3083
3084 // If abs(Src) was larger than MaxVal or nan, keep it.
3085 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3086 Mask =
3087 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3088 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3089 Mask, Mask, VL});
3090
3091 // Truncate to integer and convert back to FP.
3092 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3093 MVT XLenVT = Subtarget.getXLenVT();
3094 SDValue Truncated;
3095
3096 switch (Op.getOpcode()) {
3097 default:
3098 llvm_unreachable("Unexpected opcode");
3099 case ISD::FCEIL:
3100 case ISD::VP_FCEIL:
3101 case ISD::FFLOOR:
3102 case ISD::VP_FFLOOR:
3103 case ISD::FROUND:
3104 case ISD::FROUNDEVEN:
3105 case ISD::VP_FROUND:
3106 case ISD::VP_FROUNDEVEN:
3107 case ISD::VP_FROUNDTOZERO: {
3110 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3111 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3112 break;
3113 }
3114 case ISD::FTRUNC:
3115 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3116 Mask, VL);
3117 break;
3118 case ISD::FRINT:
3119 case ISD::VP_FRINT:
3120 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3121 break;
3122 case ISD::FNEARBYINT:
3123 case ISD::VP_FNEARBYINT:
3124 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3125 Mask, VL);
3126 break;
3127 }
3128
3129 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3130 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3131 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3132 Mask, VL);
3133
3134 // Restore the original sign so that -0.0 is preserved.
3135 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3136 Src, Src, Mask, VL);
3137
3138 if (!VT.isFixedLengthVector())
3139 return Truncated;
3140
3141 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3142}
3143
3144// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3145// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3146// qNan and coverting the new source to integer and back to FP.
3147static SDValue
3149 const RISCVSubtarget &Subtarget) {
3150 SDLoc DL(Op);
3151 MVT VT = Op.getSimpleValueType();
3152 SDValue Chain = Op.getOperand(0);
3153 SDValue Src = Op.getOperand(1);
3154
3155 MVT ContainerVT = VT;
3156 if (VT.isFixedLengthVector()) {
3157 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3158 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3159 }
3160
3161 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3162
3163 // Freeze the source since we are increasing the number of uses.
3164 Src = DAG.getFreeze(Src);
3165
3166 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3167 MVT MaskVT = Mask.getSimpleValueType();
3169 DAG.getVTList(MaskVT, MVT::Other),
3170 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3171 DAG.getUNDEF(MaskVT), Mask, VL});
3172 Chain = Unorder.getValue(1);
3174 DAG.getVTList(ContainerVT, MVT::Other),
3175 {Chain, Src, Src, Src, Unorder, VL});
3176 Chain = Src.getValue(1);
3177
3178 // We do the conversion on the absolute value and fix the sign at the end.
3179 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3180
3181 // Determine the largest integer that can be represented exactly. This and
3182 // values larger than it don't have any fractional bits so don't need to
3183 // be converted.
3184 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3185 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3186 APFloat MaxVal = APFloat(FltSem);
3187 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3188 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3189 SDValue MaxValNode =
3190 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3191 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3192 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3193
3194 // If abs(Src) was larger than MaxVal or nan, keep it.
3195 Mask = DAG.getNode(
3196 RISCVISD::SETCC_VL, DL, MaskVT,
3197 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3198
3199 // Truncate to integer and convert back to FP.
3200 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3201 MVT XLenVT = Subtarget.getXLenVT();
3202 SDValue Truncated;
3203
3204 switch (Op.getOpcode()) {
3205 default:
3206 llvm_unreachable("Unexpected opcode");
3207 case ISD::STRICT_FCEIL:
3208 case ISD::STRICT_FFLOOR:
3209 case ISD::STRICT_FROUND:
3213 Truncated = DAG.getNode(
3214 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3215 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3216 break;
3217 }
3218 case ISD::STRICT_FTRUNC:
3219 Truncated =
3221 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3222 break;
3225 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3226 Mask, VL);
3227 break;
3228 }
3229 Chain = Truncated.getValue(1);
3230
3231 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3232 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3233 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3234 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3235 Truncated, Mask, VL);
3236 Chain = Truncated.getValue(1);
3237 }
3238
3239 // Restore the original sign so that -0.0 is preserved.
3240 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3241 Src, Src, Mask, VL);
3242
3243 if (VT.isFixedLengthVector())
3244 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3245 return DAG.getMergeValues({Truncated, Chain}, DL);
3246}
3247
3248static SDValue
3250 const RISCVSubtarget &Subtarget) {
3251 MVT VT = Op.getSimpleValueType();
3252 if (VT.isVector())
3253 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3254
3255 if (DAG.shouldOptForSize())
3256 return SDValue();
3257
3258 SDLoc DL(Op);
3259 SDValue Src = Op.getOperand(0);
3260
3261 // Create an integer the size of the mantissa with the MSB set. This and all
3262 // values larger than it don't have any fractional bits so don't need to be
3263 // converted.
3264 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3265 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3266 APFloat MaxVal = APFloat(FltSem);
3267 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3268 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3269 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3270
3272 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3273 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3274}
3275
3276// Expand vector LRINT and LLRINT by converting to the integer domain.
3278 const RISCVSubtarget &Subtarget) {
3279 MVT VT = Op.getSimpleValueType();
3280 assert(VT.isVector() && "Unexpected type");
3281
3282 SDLoc DL(Op);
3283 SDValue Src = Op.getOperand(0);
3284 MVT ContainerVT = VT;
3285
3286 if (VT.isFixedLengthVector()) {
3287 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3288 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3289 }
3290
3291 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3292 SDValue Truncated =
3293 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3294
3295 if (!VT.isFixedLengthVector())
3296 return Truncated;
3297
3298 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3299}
3300
3301static SDValue
3303 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3304 SDValue Offset, SDValue Mask, SDValue VL,
3306 if (Merge.isUndef())
3308 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3309 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3310 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3311}
3312
3313static SDValue
3314getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3316 SDValue VL,
3318 if (Merge.isUndef())
3320 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3321 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3322 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3323}
3324
3325static MVT getLMUL1VT(MVT VT) {
3327 "Unexpected vector MVT");
3331}
3332
3336 int64_t Addend;
3337};
3338
3339static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3341 // We will use a SINT_TO_FP to materialize this constant so we should use a
3342 // signed APSInt here.
3343 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3344 // We use an arbitrary rounding mode here. If a floating-point is an exact
3345 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3346 // the rounding mode changes the output value, then it is not an exact
3347 // integer.
3349 bool IsExact;
3350 // If it is out of signed integer range, it will return an invalid operation.
3351 // If it is not an exact integer, IsExact is false.
3352 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3354 !IsExact)
3355 return std::nullopt;
3356 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3357}
3358
3359// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3360// to the (non-zero) step S and start value X. This can be then lowered as the
3361// RVV sequence (VID * S) + X, for example.
3362// The step S is represented as an integer numerator divided by a positive
3363// denominator. Note that the implementation currently only identifies
3364// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3365// cannot detect 2/3, for example.
3366// Note that this method will also match potentially unappealing index
3367// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3368// determine whether this is worth generating code for.
3369static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3370 unsigned EltSizeInBits) {
3371 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3372 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3373 return std::nullopt;
3374 bool IsInteger = Op.getValueType().isInteger();
3375
3376 std::optional<unsigned> SeqStepDenom;
3377 std::optional<int64_t> SeqStepNum, SeqAddend;
3378 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3379 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3380
3381 // First extract the ops into a list of constant integer values. This may not
3382 // be possible for floats if they're not all representable as integers.
3384 const unsigned OpSize = Op.getScalarValueSizeInBits();
3385 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3386 if (Elt.isUndef()) {
3387 Elts[Idx] = std::nullopt;
3388 continue;
3389 }
3390 if (IsInteger) {
3391 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3392 } else {
3393 auto ExactInteger =
3394 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3395 if (!ExactInteger)
3396 return std::nullopt;
3397 Elts[Idx] = *ExactInteger;
3398 }
3399 }
3400
3401 for (auto [Idx, Elt] : enumerate(Elts)) {
3402 // Assume undef elements match the sequence; we just have to be careful
3403 // when interpolating across them.
3404 if (!Elt)
3405 continue;
3406
3407 if (PrevElt) {
3408 // Calculate the step since the last non-undef element, and ensure
3409 // it's consistent across the entire sequence.
3410 unsigned IdxDiff = Idx - PrevElt->second;
3411 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3412
3413 // A zero-value value difference means that we're somewhere in the middle
3414 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3415 // step change before evaluating the sequence.
3416 if (ValDiff == 0)
3417 continue;
3418
3419 int64_t Remainder = ValDiff % IdxDiff;
3420 // Normalize the step if it's greater than 1.
3421 if (Remainder != ValDiff) {
3422 // The difference must cleanly divide the element span.
3423 if (Remainder != 0)
3424 return std::nullopt;
3425 ValDiff /= IdxDiff;
3426 IdxDiff = 1;
3427 }
3428
3429 if (!SeqStepNum)
3430 SeqStepNum = ValDiff;
3431 else if (ValDiff != SeqStepNum)
3432 return std::nullopt;
3433
3434 if (!SeqStepDenom)
3435 SeqStepDenom = IdxDiff;
3436 else if (IdxDiff != *SeqStepDenom)
3437 return std::nullopt;
3438 }
3439
3440 // Record this non-undef element for later.
3441 if (!PrevElt || PrevElt->first != *Elt)
3442 PrevElt = std::make_pair(*Elt, Idx);
3443 }
3444
3445 // We need to have logged a step for this to count as a legal index sequence.
3446 if (!SeqStepNum || !SeqStepDenom)
3447 return std::nullopt;
3448
3449 // Loop back through the sequence and validate elements we might have skipped
3450 // while waiting for a valid step. While doing this, log any sequence addend.
3451 for (auto [Idx, Elt] : enumerate(Elts)) {
3452 if (!Elt)
3453 continue;
3454 uint64_t ExpectedVal =
3455 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3456 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3457 if (!SeqAddend)
3458 SeqAddend = Addend;
3459 else if (Addend != SeqAddend)
3460 return std::nullopt;
3461 }
3462
3463 assert(SeqAddend && "Must have an addend if we have a step");
3464
3465 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3466}
3467
3468// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3469// and lower it as a VRGATHER_VX_VL from the source vector.
3470static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3471 SelectionDAG &DAG,
3472 const RISCVSubtarget &Subtarget) {
3473 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3474 return SDValue();
3475 SDValue Vec = SplatVal.getOperand(0);
3476 // Only perform this optimization on vectors of the same size for simplicity.
3477 // Don't perform this optimization for i1 vectors.
3478 // FIXME: Support i1 vectors, maybe by promoting to i8?
3479 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3480 return SDValue();
3481 SDValue Idx = SplatVal.getOperand(1);
3482 // The index must be a legal type.
3483 if (Idx.getValueType() != Subtarget.getXLenVT())
3484 return SDValue();
3485
3486 MVT ContainerVT = VT;
3487 if (VT.isFixedLengthVector()) {
3488 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3489 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3490 }
3491
3492 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3493
3494 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3495 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3496
3497 if (!VT.isFixedLengthVector())
3498 return Gather;
3499
3500 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3501}
3502
3503
3504/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3505/// which constitute a large proportion of the elements. In such cases we can
3506/// splat a vector with the dominant element and make up the shortfall with
3507/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3508/// Note that this includes vectors of 2 elements by association. The
3509/// upper-most element is the "dominant" one, allowing us to use a splat to
3510/// "insert" the upper element, and an insert of the lower element at position
3511/// 0, which improves codegen.
3513 const RISCVSubtarget &Subtarget) {
3514 MVT VT = Op.getSimpleValueType();
3515 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3516
3517 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3518
3519 SDLoc DL(Op);
3520 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3521
3522 MVT XLenVT = Subtarget.getXLenVT();
3523 unsigned NumElts = Op.getNumOperands();
3524
3525 SDValue DominantValue;
3526 unsigned MostCommonCount = 0;
3527 DenseMap<SDValue, unsigned> ValueCounts;
3528 unsigned NumUndefElts =
3529 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3530
3531 // Track the number of scalar loads we know we'd be inserting, estimated as
3532 // any non-zero floating-point constant. Other kinds of element are either
3533 // already in registers or are materialized on demand. The threshold at which
3534 // a vector load is more desirable than several scalar materializion and
3535 // vector-insertion instructions is not known.
3536 unsigned NumScalarLoads = 0;
3537
3538 for (SDValue V : Op->op_values()) {
3539 if (V.isUndef())
3540 continue;
3541
3542 ValueCounts.insert(std::make_pair(V, 0));
3543 unsigned &Count = ValueCounts[V];
3544 if (0 == Count)
3545 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3546 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3547
3548 // Is this value dominant? In case of a tie, prefer the highest element as
3549 // it's cheaper to insert near the beginning of a vector than it is at the
3550 // end.
3551 if (++Count >= MostCommonCount) {
3552 DominantValue = V;
3553 MostCommonCount = Count;
3554 }
3555 }
3556
3557 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3558 unsigned NumDefElts = NumElts - NumUndefElts;
3559 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3560
3561 // Don't perform this optimization when optimizing for size, since
3562 // materializing elements and inserting them tends to cause code bloat.
3563 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3564 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3565 ((MostCommonCount > DominantValueCountThreshold) ||
3566 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3567 // Start by splatting the most common element.
3568 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3569
3570 DenseSet<SDValue> Processed{DominantValue};
3571
3572 // We can handle an insert into the last element (of a splat) via
3573 // v(f)slide1down. This is slightly better than the vslideup insert
3574 // lowering as it avoids the need for a vector group temporary. It
3575 // is also better than using vmerge.vx as it avoids the need to
3576 // materialize the mask in a vector register.
3577 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3578 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3579 LastOp != DominantValue) {
3580 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3581 auto OpCode =
3583 if (!VT.isFloatingPoint())
3584 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3585 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3586 LastOp, Mask, VL);
3587 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3588 Processed.insert(LastOp);
3589 }
3590
3591 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3592 for (const auto &OpIdx : enumerate(Op->ops())) {
3593 const SDValue &V = OpIdx.value();
3594 if (V.isUndef() || !Processed.insert(V).second)
3595 continue;
3596 if (ValueCounts[V] == 1) {
3597 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3598 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3599 } else {
3600 // Blend in all instances of this value using a VSELECT, using a
3601 // mask where each bit signals whether that element is the one
3602 // we're after.
3604 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3605 return DAG.getConstant(V == V1, DL, XLenVT);
3606 });
3607 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3608 DAG.getBuildVector(SelMaskTy, DL, Ops),
3609 DAG.getSplatBuildVector(VT, DL, V), Vec);
3610 }
3611 }
3612
3613 return Vec;
3614 }
3615
3616 return SDValue();
3617}
3618
3620 const RISCVSubtarget &Subtarget) {
3621 MVT VT = Op.getSimpleValueType();
3622 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3623
3624 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3625
3626 SDLoc DL(Op);
3627 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3628
3629 MVT XLenVT = Subtarget.getXLenVT();
3630 unsigned NumElts = Op.getNumOperands();
3631
3632 if (VT.getVectorElementType() == MVT::i1) {
3633 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3634 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3635 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3636 }
3637
3638 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3639 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3640 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3641 }
3642
3643 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3644 // scalar integer chunks whose bit-width depends on the number of mask
3645 // bits and XLEN.
3646 // First, determine the most appropriate scalar integer type to use. This
3647 // is at most XLenVT, but may be shrunk to a smaller vector element type
3648 // according to the size of the final vector - use i8 chunks rather than
3649 // XLenVT if we're producing a v8i1. This results in more consistent
3650 // codegen across RV32 and RV64.
3651 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3652 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3653 // If we have to use more than one INSERT_VECTOR_ELT then this
3654 // optimization is likely to increase code size; avoid peforming it in
3655 // such a case. We can use a load from a constant pool in this case.
3656 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3657 return SDValue();
3658 // Now we can create our integer vector type. Note that it may be larger
3659 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3660 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3661 MVT IntegerViaVecVT =
3662 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3663 IntegerViaVecElts);
3664
3665 uint64_t Bits = 0;
3666 unsigned BitPos = 0, IntegerEltIdx = 0;
3667 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3668
3669 for (unsigned I = 0; I < NumElts;) {
3670 SDValue V = Op.getOperand(I);
3671 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3672 Bits |= ((uint64_t)BitValue << BitPos);
3673 ++BitPos;
3674 ++I;
3675
3676 // Once we accumulate enough bits to fill our scalar type or process the
3677 // last element, insert into our vector and clear our accumulated data.
3678 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3679 if (NumViaIntegerBits <= 32)
3680 Bits = SignExtend64<32>(Bits);
3681 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3682 Elts[IntegerEltIdx] = Elt;
3683 Bits = 0;
3684 BitPos = 0;
3685 IntegerEltIdx++;
3686 }
3687 }
3688
3689 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3690
3691 if (NumElts < NumViaIntegerBits) {
3692 // If we're producing a smaller vector than our minimum legal integer
3693 // type, bitcast to the equivalent (known-legal) mask type, and extract
3694 // our final mask.
3695 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3696 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3697 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3698 DAG.getConstant(0, DL, XLenVT));
3699 } else {
3700 // Else we must have produced an integer type with the same size as the
3701 // mask type; bitcast for the final result.
3702 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3703 Vec = DAG.getBitcast(VT, Vec);
3704 }
3705
3706 return Vec;
3707 }
3708
3709 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3710 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3712 if (!VT.isFloatingPoint())
3713 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3714 Splat =
3715 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3716 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3717 }
3718
3719 // Try and match index sequences, which we can lower to the vid instruction
3720 // with optional modifications. An all-undef vector is matched by
3721 // getSplatValue, above.
3722 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3723 int64_t StepNumerator = SimpleVID->StepNumerator;
3724 unsigned StepDenominator = SimpleVID->StepDenominator;
3725 int64_t Addend = SimpleVID->Addend;
3726
3727 assert(StepNumerator != 0 && "Invalid step");
3728 bool Negate = false;
3729 int64_t SplatStepVal = StepNumerator;
3730 unsigned StepOpcode = ISD::MUL;
3731 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3732 // anyway as the shift of 63 won't fit in uimm5.
3733 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3734 isPowerOf2_64(std::abs(StepNumerator))) {
3735 Negate = StepNumerator < 0;
3736 StepOpcode = ISD::SHL;
3737 SplatStepVal = Log2_64(std::abs(StepNumerator));
3738 }
3739
3740 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3741 // threshold since it's the immediate value many RVV instructions accept.
3742 // There is no vmul.vi instruction so ensure multiply constant can fit in
3743 // a single addi instruction.
3744 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3745 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3746 isPowerOf2_32(StepDenominator) &&
3747 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3748 MVT VIDVT =
3750 MVT VIDContainerVT =
3751 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3752 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3753 // Convert right out of the scalable type so we can use standard ISD
3754 // nodes for the rest of the computation. If we used scalable types with
3755 // these, we'd lose the fixed-length vector info and generate worse
3756 // vsetvli code.
3757 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3758 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3759 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3760 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3761 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3762 }
3763 if (StepDenominator != 1) {
3764 SDValue SplatStep =
3765 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3766 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3767 }
3768 if (Addend != 0 || Negate) {
3769 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3770 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3771 VID);
3772 }
3773 if (VT.isFloatingPoint()) {
3774 // TODO: Use vfwcvt to reduce register pressure.
3775 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3776 }
3777 return VID;
3778 }
3779 }
3780
3781 // For very small build_vectors, use a single scalar insert of a constant.
3782 // TODO: Base this on constant rematerialization cost, not size.
3783 const unsigned EltBitSize = VT.getScalarSizeInBits();
3784 if (VT.getSizeInBits() <= 32 &&
3786 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3787 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3788 "Unexpected sequence type");
3789 // If we can use the original VL with the modified element type, this
3790 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3791 // be moved into InsertVSETVLI?
3792 unsigned ViaVecLen =
3793 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3794 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3795
3796 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3797 uint64_t SplatValue = 0;
3798 // Construct the amalgamated value at this larger vector type.
3799 for (const auto &OpIdx : enumerate(Op->op_values())) {
3800 const auto &SeqV = OpIdx.value();
3801 if (!SeqV.isUndef())
3802 SplatValue |=
3803 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3804 }
3805
3806 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3807 // achieve better constant materializion.
3808 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3809 SplatValue = SignExtend64<32>(SplatValue);
3810
3811 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3812 DAG.getUNDEF(ViaVecVT),
3813 DAG.getConstant(SplatValue, DL, XLenVT),
3814 DAG.getVectorIdxConstant(0, DL));
3815 if (ViaVecLen != 1)
3817 MVT::getVectorVT(ViaIntVT, 1), Vec,
3818 DAG.getConstant(0, DL, XLenVT));
3819 return DAG.getBitcast(VT, Vec);
3820 }
3821
3822
3823 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3824 // when re-interpreted as a vector with a larger element type. For example,
3825 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3826 // could be instead splat as
3827 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3828 // TODO: This optimization could also work on non-constant splats, but it
3829 // would require bit-manipulation instructions to construct the splat value.
3830 SmallVector<SDValue> Sequence;
3831 const auto *BV = cast<BuildVectorSDNode>(Op);
3832 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3834 BV->getRepeatedSequence(Sequence) &&
3835 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3836 unsigned SeqLen = Sequence.size();
3837 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3838 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3839 ViaIntVT == MVT::i64) &&
3840 "Unexpected sequence type");
3841
3842 // If we can use the original VL with the modified element type, this
3843 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3844 // be moved into InsertVSETVLI?
3845 const unsigned RequiredVL = NumElts / SeqLen;
3846 const unsigned ViaVecLen =
3847 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3848 NumElts : RequiredVL;
3849 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3850
3851 unsigned EltIdx = 0;
3852 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3853 uint64_t SplatValue = 0;
3854 // Construct the amalgamated value which can be splatted as this larger
3855 // vector type.
3856 for (const auto &SeqV : Sequence) {
3857 if (!SeqV.isUndef())
3858 SplatValue |=
3859 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3860 EltIdx++;
3861 }
3862
3863 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3864 // achieve better constant materializion.
3865 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3866 SplatValue = SignExtend64<32>(SplatValue);
3867
3868 // Since we can't introduce illegal i64 types at this stage, we can only
3869 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3870 // way we can use RVV instructions to splat.
3871 assert((ViaIntVT.bitsLE(XLenVT) ||
3872 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3873 "Unexpected bitcast sequence");
3874 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3875 SDValue ViaVL =
3876 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3877 MVT ViaContainerVT =
3878 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3879 SDValue Splat =
3880 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3881 DAG.getUNDEF(ViaContainerVT),
3882 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3883 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3884 if (ViaVecLen != RequiredVL)
3886 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3887 DAG.getConstant(0, DL, XLenVT));
3888 return DAG.getBitcast(VT, Splat);
3889 }
3890 }
3891
3892 // If the number of signbits allows, see if we can lower as a <N x i8>.
3893 // Our main goal here is to reduce LMUL (and thus work) required to
3894 // build the constant, but we will also narrow if the resulting
3895 // narrow vector is known to materialize cheaply.
3896 // TODO: We really should be costing the smaller vector. There are
3897 // profitable cases this misses.
3898 if (EltBitSize > 8 && VT.isInteger() &&
3899 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3900 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3901 if (EltBitSize - SignBits < 8) {
3902 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3903 DL, Op->ops());
3904 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3905 Source, DAG, Subtarget);
3906 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3907 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3908 }
3909 }
3910
3911 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3912 return Res;
3913
3914 // For constant vectors, use generic constant pool lowering. Otherwise,
3915 // we'd have to materialize constants in GPRs just to move them into the
3916 // vector.
3917 return SDValue();
3918}
3919
3920static unsigned getPACKOpcode(unsigned DestBW,
3921 const RISCVSubtarget &Subtarget) {
3922 switch (DestBW) {
3923 default:
3924 llvm_unreachable("Unsupported pack size");
3925 case 16:
3926 return RISCV::PACKH;
3927 case 32:
3928 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3929 case 64:
3930 assert(Subtarget.is64Bit());
3931 return RISCV::PACK;
3932 }
3933}
3934
3935/// Double the element size of the build vector to reduce the number
3936/// of vslide1down in the build vector chain. In the worst case, this
3937/// trades three scalar operations for 1 vector operation. Scalar
3938/// operations are generally lower latency, and for out-of-order cores
3939/// we also benefit from additional parallelism.
3941 const RISCVSubtarget &Subtarget) {
3942 SDLoc DL(Op);
3943 MVT VT = Op.getSimpleValueType();
3944 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3945 MVT ElemVT = VT.getVectorElementType();
3946 if (!ElemVT.isInteger())
3947 return SDValue();
3948
3949 // TODO: Relax these architectural restrictions, possibly with costing
3950 // of the actual instructions required.
3951 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3952 return SDValue();
3953
3954 unsigned NumElts = VT.getVectorNumElements();
3955 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
3956 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
3957 NumElts % 2 != 0)
3958 return SDValue();
3959
3960 // Produce [B,A] packed into a type twice as wide. Note that all
3961 // scalars are XLenVT, possibly masked (see below).
3962 MVT XLenVT = Subtarget.getXLenVT();
3963 SDValue Mask = DAG.getConstant(
3964 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
3965 auto pack = [&](SDValue A, SDValue B) {
3966 // Bias the scheduling of the inserted operations to near the
3967 // definition of the element - this tends to reduce register
3968 // pressure overall.
3969 SDLoc ElemDL(B);
3970 if (Subtarget.hasStdExtZbkb())
3971 // Note that we're relying on the high bits of the result being
3972 // don't care. For PACKW, the result is *sign* extended.
3973 return SDValue(
3974 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
3975 ElemDL, XLenVT, A, B),
3976 0);
3977
3978 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
3979 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
3980 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
3981 SDNodeFlags Flags;
3982 Flags.setDisjoint(true);
3983 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
3984 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);
3985 };
3986
3987 SmallVector<SDValue> NewOperands;
3988 NewOperands.reserve(NumElts / 2);
3989 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
3990 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
3991 assert(NumElts == NewOperands.size() * 2);
3992 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
3993 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
3994 return DAG.getNode(ISD::BITCAST, DL, VT,
3995 DAG.getBuildVector(WideVecVT, DL, NewOperands));
3996}
3997
3998// Convert to an vXf16 build_vector to vXi16 with bitcasts.
4000 MVT VT = Op.getSimpleValueType();
4001 MVT IVT = VT.changeVectorElementType(MVT::i16);
4003 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
4004 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4005 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
4006 return DAG.getBitcast(VT, Res);
4007}
4008
4010 const RISCVSubtarget &Subtarget) {
4011 MVT VT = Op.getSimpleValueType();
4012 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4013
4014 // If we don't have scalar f16, we need to bitcast to an i16 vector.
4015 if (VT.getVectorElementType() == MVT::f16 &&
4016 !Subtarget.hasStdExtZfhmin())
4017 return lowerBUILD_VECTORvXf16(Op, DAG);
4018
4019 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4021 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4022
4023 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4024
4025 SDLoc DL(Op);
4026 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4027
4028 MVT XLenVT = Subtarget.getXLenVT();
4029
4030 if (VT.getVectorElementType() == MVT::i1) {
4031 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4032 // vector type, we have a legal equivalently-sized i8 type, so we can use
4033 // that.
4034 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4035 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4036
4037 SDValue WideVec;
4038 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4039 // For a splat, perform a scalar truncate before creating the wider
4040 // vector.
4041 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4042 DAG.getConstant(1, DL, Splat.getValueType()));
4043 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4044 } else {
4045 SmallVector<SDValue, 8> Ops(Op->op_values());
4046 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4047 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4048 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4049 }
4050
4051 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4052 }
4053
4054 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4055 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4056 return Gather;
4057 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4059 if (!VT.isFloatingPoint())
4060 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4061 Splat =
4062 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4063 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4064 }
4065
4066 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4067 return Res;
4068
4069 // If we're compiling for an exact VLEN value, we can split our work per
4070 // register in the register group.
4071 if (const auto VLen = Subtarget.getRealVLen();
4072 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4073 MVT ElemVT = VT.getVectorElementType();
4074 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4075 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4076 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4077 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4078 assert(M1VT == getLMUL1VT(M1VT));
4079
4080 // The following semantically builds up a fixed length concat_vector
4081 // of the component build_vectors. We eagerly lower to scalable and
4082 // insert_subvector here to avoid DAG combining it back to a large
4083 // build_vector.
4084 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
4085 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4086 SDValue Vec = DAG.getUNDEF(ContainerVT);
4087 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4088 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4089 SDValue SubBV =
4090 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4091 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4092 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4093 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4094 DAG.getVectorIdxConstant(InsertIdx, DL));
4095 }
4096 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4097 }
4098
4099 // If we're about to resort to vslide1down (or stack usage), pack our
4100 // elements into the widest scalar type we can. This will force a VL/VTYPE
4101 // toggle, but reduces the critical path, the number of vslide1down ops
4102 // required, and possibly enables scalar folds of the values.
4103 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4104 return Res;
4105
4106 // For m1 vectors, if we have non-undef values in both halves of our vector,
4107 // split the vector into low and high halves, build them separately, then
4108 // use a vselect to combine them. For long vectors, this cuts the critical
4109 // path of the vslide1down sequence in half, and gives us an opportunity
4110 // to special case each half independently. Note that we don't change the
4111 // length of the sub-vectors here, so if both fallback to the generic
4112 // vslide1down path, we should be able to fold the vselect into the final
4113 // vslidedown (for the undef tail) for the first half w/ masking.
4114 unsigned NumElts = VT.getVectorNumElements();
4115 unsigned NumUndefElts =
4116 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4117 unsigned NumDefElts = NumElts - NumUndefElts;
4118 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4119 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4120 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4121 SmallVector<SDValue> MaskVals;
4122 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4123 SubVecAOps.reserve(NumElts);
4124 SubVecBOps.reserve(NumElts);
4125 for (unsigned i = 0; i < NumElts; i++) {
4126 SDValue Elem = Op->getOperand(i);
4127 if (i < NumElts / 2) {
4128 SubVecAOps.push_back(Elem);
4129 SubVecBOps.push_back(UndefElem);
4130 } else {
4131 SubVecAOps.push_back(UndefElem);
4132 SubVecBOps.push_back(Elem);
4133 }
4134 bool SelectMaskVal = (i < NumElts / 2);
4135 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4136 }
4137 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4138 MaskVals.size() == NumElts);
4139
4140 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4141 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4142 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4143 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4144 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4145 }
4146
4147 // Cap the cost at a value linear to the number of elements in the vector.
4148 // The default lowering is to use the stack. The vector store + scalar loads
4149 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4150 // being (at least) linear in LMUL. As a result, using the vslidedown
4151 // lowering for every element ends up being VL*LMUL..
4152 // TODO: Should we be directly costing the stack alternative? Doing so might
4153 // give us a more accurate upper bound.
4154 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4155
4156 // TODO: unify with TTI getSlideCost.
4157 InstructionCost PerSlideCost = 1;
4158 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4159 default: break;
4161 PerSlideCost = 2;
4162 break;
4164 PerSlideCost = 4;
4165 break;
4167 PerSlideCost = 8;
4168 break;
4169 }
4170
4171 // TODO: Should we be using the build instseq then cost + evaluate scheme
4172 // we use for integer constants here?
4173 unsigned UndefCount = 0;
4174 for (const SDValue &V : Op->ops()) {
4175 if (V.isUndef()) {
4176 UndefCount++;
4177 continue;
4178 }
4179 if (UndefCount) {
4180 LinearBudget -= PerSlideCost;
4181 UndefCount = 0;
4182 }
4183 LinearBudget -= PerSlideCost;
4184 }
4185 if (UndefCount) {
4186 LinearBudget -= PerSlideCost;
4187 }
4188
4189 if (LinearBudget < 0)
4190 return SDValue();
4191
4192 assert((!VT.isFloatingPoint() ||
4193 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4194 "Illegal type which will result in reserved encoding");
4195
4196 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4197
4198 SDValue Vec;
4199 UndefCount = 0;
4200 for (SDValue V : Op->ops()) {
4201 if (V.isUndef()) {
4202 UndefCount++;
4203 continue;
4204 }
4205
4206 // Start our sequence with a TA splat in the hopes that hardware is able to
4207 // recognize there's no dependency on the prior value of our temporary
4208 // register.
4209 if (!Vec) {
4210 Vec = DAG.getSplatVector(VT, DL, V);
4211 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4212 UndefCount = 0;
4213 continue;
4214 }
4215
4216 if (UndefCount) {
4217 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4218 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4219 Vec, Offset, Mask, VL, Policy);
4220 UndefCount = 0;
4221 }
4222 auto OpCode =
4224 if (!VT.isFloatingPoint())
4225 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4226 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4227 V, Mask, VL);
4228 }
4229 if (UndefCount) {
4230 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4231 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4232 Vec, Offset, Mask, VL, Policy);
4233 }
4234 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4235}
4236
4237static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4239 SelectionDAG &DAG) {
4240 if (!Passthru)
4241 Passthru = DAG.getUNDEF(VT);
4242 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4243 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4244 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4245 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4246 // node in order to try and match RVV vector/scalar instructions.
4247 if ((LoC >> 31) == HiC)
4248 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4249
4250 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4251 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4252 // vlmax vsetvli or vsetivli to change the VL.
4253 // FIXME: Support larger constants?
4254 // FIXME: Support non-constant VLs by saturating?
4255 if (LoC == HiC) {
4256 SDValue NewVL;
4257 if (isAllOnesConstant(VL) ||
4258 (isa<RegisterSDNode>(VL) &&
4259 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4260 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4261 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4262 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4263
4264 if (NewVL) {
4265 MVT InterVT =
4266 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4267 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4268 DAG.getUNDEF(InterVT), Lo, NewVL);
4269 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4270 }
4271 }
4272 }
4273
4274 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4275 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4276 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4277 Hi.getConstantOperandVal(1) == 31)
4278 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4279
4280 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4281 // even if it might be sign extended.
4282 if (Hi.isUndef())
4283 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4284
4285 // Fall back to a stack store and stride x0 vector load.
4286 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4287 Hi, VL);
4288}
4289
4290// Called by type legalization to handle splat of i64 on RV32.
4291// FIXME: We can optimize this when the type has sign or zero bits in one
4292// of the halves.
4293static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4294 SDValue Scalar, SDValue VL,
4295 SelectionDAG &DAG) {
4296 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4297 SDValue Lo, Hi;
4298 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4299 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4300}
4301
4302// This function lowers a splat of a scalar operand Splat with the vector
4303// length VL. It ensures the final sequence is type legal, which is useful when
4304// lowering a splat after type legalization.
4305static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4306 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4307 const RISCVSubtarget &Subtarget) {
4308 bool HasPassthru = Passthru && !Passthru.isUndef();
4309 if (!HasPassthru && !Passthru)
4310 Passthru = DAG.getUNDEF(VT);
4311 if (VT.isFloatingPoint())
4312 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4313
4314 MVT XLenVT = Subtarget.getXLenVT();
4315
4316 // Simplest case is that the operand needs to be promoted to XLenVT.
4317 if (Scalar.getValueType().bitsLE(XLenVT)) {
4318 // If the operand is a constant, sign extend to increase our chances
4319 // of being able to use a .vi instruction. ANY_EXTEND would become a
4320 // a zero extend and the simm5 check in isel would fail.
4321 // FIXME: Should we ignore the upper bits in isel instead?
4322 unsigned ExtOpc =
4323 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4324 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4325 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4326 }
4327
4328 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4329 "Unexpected scalar for splat lowering!");
4330
4331 if (isOneConstant(VL) && isNullConstant(Scalar))
4332 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4333 DAG.getConstant(0, DL, XLenVT), VL);
4334
4335 // Otherwise use the more complicated splatting algorithm.
4336 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4337}
4338
4339// This function lowers an insert of a scalar operand Scalar into lane
4340// 0 of the vector regardless of the value of VL. The contents of the
4341// remaining lanes of the result vector are unspecified. VL is assumed
4342// to be non-zero.
4344 const SDLoc &DL, SelectionDAG &DAG,
4345 const RISCVSubtarget &Subtarget) {
4346 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4347
4348 const MVT XLenVT = Subtarget.getXLenVT();
4349 SDValue Passthru = DAG.getUNDEF(VT);
4350
4351 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4352 isNullConstant(Scalar.getOperand(1))) {
4353 SDValue ExtractedVal = Scalar.getOperand(0);
4354 // The element types must be the same.
4355 if (ExtractedVal.getValueType().getVectorElementType() ==
4356 VT.getVectorElementType()) {
4357 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4358 MVT ExtractedContainerVT = ExtractedVT;
4359 if (ExtractedContainerVT.isFixedLengthVector()) {
4360 ExtractedContainerVT = getContainerForFixedLengthVector(
4361 DAG, ExtractedContainerVT, Subtarget);
4362 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4363 ExtractedVal, DAG, Subtarget);
4364 }
4365 if (ExtractedContainerVT.bitsLE(VT))
4366 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4367 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4368 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4369 DAG.getVectorIdxConstant(0, DL));
4370 }
4371 }
4372
4373
4374 if (VT.isFloatingPoint())
4375 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4376 DAG.getUNDEF(VT), Scalar, VL);
4377
4378 // Avoid the tricky legalization cases by falling back to using the
4379 // splat code which already handles it gracefully.
4380 if (!Scalar.getValueType().bitsLE(XLenVT))
4381 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4382 DAG.getConstant(1, DL, XLenVT),
4383 VT, DL, DAG, Subtarget);
4384
4385 // If the operand is a constant, sign extend to increase our chances
4386 // of being able to use a .vi instruction. ANY_EXTEND would become a
4387 // a zero extend and the simm5 check in isel would fail.
4388 // FIXME: Should we ignore the upper bits in isel instead?
4389 unsigned ExtOpc =
4390 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4391 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4392 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4393 DAG.getUNDEF(VT), Scalar, VL);
4394}
4395
4396// Is this a shuffle extracts either the even or odd elements of a vector?
4397// That is, specifically, either (a) or (b) below.
4398// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4399// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4400// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4401// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4402// Returns {Src Vector, Even Elements} om success
4403static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4404 SDValue V2, ArrayRef<int> Mask,
4405 const RISCVSubtarget &Subtarget) {
4406 // Need to be able to widen the vector.
4407 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4408 return false;
4409
4410 // Both input must be extracts.
4411 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4412 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4413 return false;
4414
4415 // Extracting from the same source.
4416 SDValue Src = V1.getOperand(0);
4417 if (Src != V2.getOperand(0))
4418 return false;
4419
4420 // Src needs to have twice the number of elements.
4421 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4422 return false;
4423
4424 // The extracts must extract the two halves of the source.
4425 if (V1.getConstantOperandVal(1) != 0 ||
4426 V2.getConstantOperandVal(1) != Mask.size())
4427 return false;
4428
4429 // First index must be the first even or odd element from V1.
4430 if (Mask[0] != 0 && Mask[0] != 1)
4431 return false;
4432
4433 // The others must increase by 2 each time.
4434 // TODO: Support undef elements?
4435 for (unsigned i = 1; i != Mask.size(); ++i)
4436 if (Mask[i] != Mask[i - 1] + 2)
4437 return false;
4438
4439 return true;
4440}
4441
4442/// Is this shuffle interleaving contiguous elements from one vector into the
4443/// even elements and contiguous elements from another vector into the odd
4444/// elements. \p EvenSrc will contain the element that should be in the first
4445/// even element. \p OddSrc will contain the element that should be in the first
4446/// odd element. These can be the first element in a source or the element half
4447/// way through the source.
4448static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4449 int &OddSrc, const RISCVSubtarget &Subtarget) {
4450 // We need to be able to widen elements to the next larger integer type.
4451 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4452 return false;
4453
4454 int Size = Mask.size();
4455 int NumElts = VT.getVectorNumElements();
4456 assert(Size == (int)NumElts && "Unexpected mask size");
4457
4458 SmallVector<unsigned, 2> StartIndexes;
4459 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4460 return false;
4461
4462 EvenSrc = StartIndexes[0];
4463 OddSrc = StartIndexes[1];
4464
4465 // One source should be low half of first vector.
4466 if (EvenSrc != 0 && OddSrc != 0)
4467 return false;
4468
4469 // Subvectors will be subtracted from either at the start of the two input
4470 // vectors, or at the start and middle of the first vector if it's an unary
4471 // interleave.
4472 // In both cases, HalfNumElts will be extracted.
4473 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4474 // we'll create an illegal extract_subvector.
4475 // FIXME: We could support other values using a slidedown first.
4476 int HalfNumElts = NumElts / 2;
4477 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4478}
4479
4480/// Match shuffles that concatenate two vectors, rotate the concatenation,
4481/// and then extract the original number of elements from the rotated result.
4482/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4483/// returned rotation amount is for a rotate right, where elements move from
4484/// higher elements to lower elements. \p LoSrc indicates the first source
4485/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4486/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4487/// 0 or 1 if a rotation is found.
4488///
4489/// NOTE: We talk about rotate to the right which matches how bit shift and
4490/// rotate instructions are described where LSBs are on the right, but LLVM IR
4491/// and the table below write vectors with the lowest elements on the left.
4492static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4493 int Size = Mask.size();
4494
4495 // We need to detect various ways of spelling a rotation:
4496 // [11, 12, 13, 14, 15, 0, 1, 2]
4497 // [-1, 12, 13, 14, -1, -1, 1, -1]
4498 // [-1, -1, -1, -1, -1, -1, 1, 2]
4499 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4500 // [-1, 4, 5, 6, -1, -1, 9, -1]
4501 // [-1, 4, 5, 6, -1, -1, -1, -1]
4502 int Rotation = 0;
4503 LoSrc = -1;
4504 HiSrc = -1;
4505 for (int i = 0; i != Size; ++i) {
4506 int M = Mask[i];
4507 if (M < 0)
4508 continue;
4509
4510 // Determine where a rotate vector would have started.
4511 int StartIdx = i - (M % Size);
4512 // The identity rotation isn't interesting, stop.
4513 if (StartIdx == 0)
4514 return -1;
4515
4516 // If we found the tail of a vector the rotation must be the missing
4517 // front. If we found the head of a vector, it must be how much of the
4518 // head.
4519 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4520
4521 if (Rotation == 0)
4522 Rotation = CandidateRotation;
4523 else if (Rotation != CandidateRotation)
4524 // The rotations don't match, so we can't match this mask.
4525 return -1;
4526
4527 // Compute which value this mask is pointing at.
4528 int MaskSrc = M < Size ? 0 : 1;
4529
4530 // Compute which of the two target values this index should be assigned to.
4531 // This reflects whether the high elements are remaining or the low elemnts
4532 // are remaining.
4533 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4534
4535 // Either set up this value if we've not encountered it before, or check
4536 // that it remains consistent.
4537 if (TargetSrc < 0)
4538 TargetSrc = MaskSrc;
4539 else if (TargetSrc != MaskSrc)
4540 // This may be a rotation, but it pulls from the inputs in some
4541 // unsupported interleaving.
4542 return -1;
4543 }
4544
4545 // Check that we successfully analyzed the mask, and normalize the results.
4546 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4547 assert((LoSrc >= 0 || HiSrc >= 0) &&
4548 "Failed to find a rotated input vector!");
4549
4550 return Rotation;
4551}
4552
4553// Lower a deinterleave shuffle to vnsrl.
4554// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4555// -> [p, q, r, s] (EvenElts == false)
4556// VT is the type of the vector to return, <[vscale x ]n x ty>
4557// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4559 bool EvenElts,
4560 const RISCVSubtarget &Subtarget,
4561 SelectionDAG &DAG) {
4562 // The result is a vector of type <m x n x ty>
4563 MVT ContainerVT = VT;
4564 // Convert fixed vectors to scalable if needed
4565 if (ContainerVT.isFixedLengthVector()) {
4566 assert(Src.getSimpleValueType().isFixedLengthVector());
4567 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4568
4569 // The source is a vector of type <m x n*2 x ty>
4570 MVT SrcContainerVT =
4572 ContainerVT.getVectorElementCount() * 2);
4573 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4574 }
4575
4576 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4577
4578 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4579 // This also converts FP to int.
4580 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4581 MVT WideSrcContainerVT = MVT::getVectorVT(
4582 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4583 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4584
4585 // The integer version of the container type.
4586 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4587
4588 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4589 // the original element size.
4590 unsigned Shift = EvenElts ? 0 : EltBits;
4591 SDValue SplatShift = DAG.getNode(
4592 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4593 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4594 SDValue Res =
4595 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4596 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4597 // Cast back to FP if needed.
4598 Res = DAG.getBitcast(ContainerVT, Res);
4599
4600 if (VT.isFixedLengthVector())
4601 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4602 return Res;
4603}
4604
4605// Lower the following shuffle to vslidedown.
4606// a)
4607// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4608// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4609// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4610// b)
4611// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4612// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4613// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4614// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4615// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4616// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4618 SDValue V1, SDValue V2,
4619 ArrayRef<int> Mask,
4620 const RISCVSubtarget &Subtarget,
4621 SelectionDAG &DAG) {
4622 auto findNonEXTRACT_SUBVECTORParent =
4623 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4624 uint64_t Offset = 0;
4625 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4626 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4627 // a scalable vector. But we don't want to match the case.
4628 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4629 Offset += Parent.getConstantOperandVal(1);
4630 Parent = Parent.getOperand(0);
4631 }
4632 return std::make_pair(Parent, Offset);
4633 };
4634
4635 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4636 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4637
4638 // Extracting from the same source.
4639 SDValue Src = V1Src;
4640 if (Src != V2Src)
4641 return SDValue();
4642
4643 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4644 SmallVector<int, 16> NewMask(Mask);
4645 for (size_t i = 0; i != NewMask.size(); ++i) {
4646 if (NewMask[i] == -1)
4647 continue;
4648
4649 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4650 NewMask[i] = NewMask[i] + V1IndexOffset;
4651 } else {
4652 // Minus NewMask.size() is needed. Otherwise, the b case would be
4653 // <5,6,7,12> instead of <5,6,7,8>.
4654 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4655 }
4656 }
4657
4658 // First index must be known and non-zero. It will be used as the slidedown
4659 // amount.
4660 if (NewMask[0] <= 0)
4661 return SDValue();
4662
4663 // NewMask is also continuous.
4664 for (unsigned i = 1; i != NewMask.size(); ++i)
4665 if (NewMask[i - 1] + 1 != NewMask[i])
4666 return SDValue();
4667
4668 MVT XLenVT = Subtarget.getXLenVT();
4669 MVT SrcVT = Src.getSimpleValueType();
4670 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4671 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4672 SDValue Slidedown =
4673 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4674 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4675 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4676 return DAG.getNode(
4678 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4679 DAG.getConstant(0, DL, XLenVT));
4680}
4681
4682// Because vslideup leaves the destination elements at the start intact, we can
4683// use it to perform shuffles that insert subvectors:
4684//
4685// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4686// ->
4687// vsetvli zero, 8, e8, mf2, ta, ma
4688// vslideup.vi v8, v9, 4
4689//
4690// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4691// ->
4692// vsetvli zero, 5, e8, mf2, tu, ma
4693// vslideup.v1 v8, v9, 2
4695 SDValue V1, SDValue V2,
4696 ArrayRef<int> Mask,
4697 const RISCVSubtarget &Subtarget,
4698 SelectionDAG &DAG) {
4699 unsigned NumElts = VT.getVectorNumElements();
4700 int NumSubElts, Index;
4701 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4702 Index))
4703 return SDValue();
4704
4705 bool OpsSwapped = Mask[Index] < (int)NumElts;
4706 SDValue InPlace = OpsSwapped ? V2 : V1;
4707 SDValue ToInsert = OpsSwapped ? V1 : V2;
4708
4709 MVT XLenVT = Subtarget.getXLenVT();
4710 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4711 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4712 // We slide up by the index that the subvector is being inserted at, and set
4713 // VL to the index + the number of elements being inserted.
4715 // If the we're adding a suffix to the in place vector, i.e. inserting right
4716 // up to the very end of it, then we don't actually care about the tail.
4717 if (NumSubElts + Index >= (int)NumElts)
4718 Policy |= RISCVII::TAIL_AGNOSTIC;
4719
4720 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4721 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4722 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4723
4724 SDValue Res;
4725 // If we're inserting into the lowest elements, use a tail undisturbed
4726 // vmv.v.v.
4727 if (Index == 0)
4728 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4729 VL);
4730 else
4731 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4732 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4733 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4734}
4735
4736/// Match v(f)slide1up/down idioms. These operations involve sliding
4737/// N-1 elements to make room for an inserted scalar at one end.
4739 SDValue V1, SDValue V2,
4740 ArrayRef<int> Mask,
4741 const RISCVSubtarget &Subtarget,
4742 SelectionDAG &DAG) {
4743 bool OpsSwapped = false;
4744 if (!isa<BuildVectorSDNode>(V1)) {
4745 if (!isa<BuildVectorSDNode>(V2))
4746 return SDValue();
4747 std::swap(V1, V2);
4748 OpsSwapped = true;
4749 }
4750 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4751 if (!Splat)
4752 return SDValue();
4753
4754 // Return true if the mask could describe a slide of Mask.size() - 1
4755 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4756 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4757 const unsigned S = (Offset > 0) ? 0 : -Offset;
4758 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4759 for (unsigned i = S; i != E; ++i)
4760 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4761 return false;
4762 return true;
4763 };
4764
4765 const unsigned NumElts = VT.getVectorNumElements();
4766 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4767 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4768 return SDValue();
4769
4770 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4771 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4772 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4773 return SDValue();
4774
4775 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4776 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4777 auto OpCode = IsVSlidedown ?
4780 if (!VT.isFloatingPoint())
4781 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4782 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4783 DAG.getUNDEF(ContainerVT),
4784 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4785 Splat, TrueMask, VL);
4786 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4787}
4788
4789// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4790// to create an interleaved vector of <[vscale x] n*2 x ty>.
4791// This requires that the size of ty is less than the subtarget's maximum ELEN.
4793 const SDLoc &DL, SelectionDAG &DAG,
4794 const RISCVSubtarget &Subtarget) {
4795 MVT VecVT = EvenV.getSimpleValueType();
4796 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4797 // Convert fixed vectors to scalable if needed
4798 if (VecContainerVT.isFixedLengthVector()) {
4799 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4800 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4801 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4802 }
4803
4804 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4805
4806 // We're working with a vector of the same size as the resulting
4807 // interleaved vector, but with half the number of elements and
4808 // twice the SEW (Hence the restriction on not using the maximum
4809 // ELEN)
4810 MVT WideVT =
4812 VecVT.getVectorElementCount());
4813 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4814 if (WideContainerVT.isFixedLengthVector())
4815 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4816
4817 // Bitcast the input vectors to integers in case they are FP
4818 VecContainerVT = VecContainerVT.changeTypeToInteger();
4819 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4820 OddV = DAG.getBitcast(VecContainerVT, OddV);
4821
4822 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4823 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4824
4825 SDValue Interleaved;
4826 if (OddV.isUndef()) {
4827 // If OddV is undef, this is a zero extend.
4828 // FIXME: Not only does this optimize the code, it fixes some correctness
4829 // issues because MIR does not have freeze.
4830 Interleaved =
4831 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4832 } else if (Subtarget.hasStdExtZvbb()) {
4833 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4834 SDValue OffsetVec =
4835 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4836 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4837 OffsetVec, Passthru, Mask, VL);
4838 if (!EvenV.isUndef())
4839 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4840 Interleaved, EvenV, Passthru, Mask, VL);
4841 } else if (EvenV.isUndef()) {
4842 Interleaved =
4843 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4844
4845 SDValue OffsetVec =
4846 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4847 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4848 Interleaved, OffsetVec, Passthru, Mask, VL);
4849 } else {
4850 // FIXME: We should freeze the odd vector here. We already handled the case
4851 // of provably undef/poison above.
4852
4853 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4854 // vwaddu.vv
4855 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4856 OddV, Passthru, Mask, VL);
4857
4858 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4859 SDValue AllOnesVec = DAG.getSplatVector(
4860 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4861 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4862 OddV, AllOnesVec, Passthru, Mask, VL);
4863
4864 // Add the two together so we get
4865 // (OddV * 0xff...ff) + (OddV + EvenV)
4866 // = (OddV * 0x100...00) + EvenV
4867 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4868 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4869 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4870 Interleaved, OddsMul, Passthru, Mask, VL);
4871 }
4872
4873 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4874 MVT ResultContainerVT = MVT::getVectorVT(
4875 VecVT.getVectorElementType(), // Make sure to use original type
4876 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4877 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4878
4879 // Convert back to a fixed vector if needed
4880 MVT ResultVT =
4883 if (ResultVT.isFixedLengthVector())
4884 Interleaved =
4885 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4886
4887 return Interleaved;
4888}
4889
4890// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4891// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4893 SelectionDAG &DAG,
4894 const RISCVSubtarget &Subtarget) {
4895 SDLoc DL(SVN);
4896 MVT VT = SVN->getSimpleValueType(0);
4897 SDValue V = SVN->getOperand(0);
4898 unsigned NumElts = VT.getVectorNumElements();
4899
4900 assert(VT.getVectorElementType() == MVT::i1);
4901
4903 SVN->getMask().size()) ||
4904 !SVN->getOperand(1).isUndef())
4905 return SDValue();
4906
4907 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4908 EVT ViaVT = EVT::getVectorVT(
4909 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4910 EVT ViaBitVT =
4911 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4912
4913 // If we don't have zvbb or the larger element type > ELEN, the operation will
4914 // be illegal.
4916 ViaVT) ||
4917 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4918 return SDValue();
4919
4920 // If the bit vector doesn't fit exactly into the larger element type, we need
4921 // to insert it into the larger vector and then shift up the reversed bits
4922 // afterwards to get rid of the gap introduced.
4923 if (ViaEltSize > NumElts)
4924 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4925 V, DAG.getVectorIdxConstant(0, DL));
4926
4927 SDValue Res =
4928 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4929
4930 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4931 // element type.
4932 if (ViaEltSize > NumElts)
4933 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4934 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4935
4936 Res = DAG.getBitcast(ViaBitVT, Res);
4937
4938 if (ViaEltSize > NumElts)
4939 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4940 DAG.getVectorIdxConstant(0, DL));
4941 return Res;
4942}
4943
4945 SelectionDAG &DAG,
4946 const RISCVSubtarget &Subtarget,
4947 MVT &RotateVT, unsigned &RotateAmt) {
4948 SDLoc DL(SVN);
4949
4950 EVT VT = SVN->getValueType(0);
4951 unsigned NumElts = VT.getVectorNumElements();
4952 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4953 unsigned NumSubElts;
4954 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4955 NumElts, NumSubElts, RotateAmt))
4956 return false;
4957 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4958 NumElts / NumSubElts);
4959
4960 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4961 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4962}
4963
4964// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4965// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4966// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4968 SelectionDAG &DAG,
4969 const RISCVSubtarget &Subtarget) {
4970 SDLoc DL(SVN);
4971
4972 EVT VT = SVN->getValueType(0);
4973 unsigned RotateAmt;
4974 MVT RotateVT;
4975 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4976 return SDValue();
4977
4978 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4979
4980 SDValue Rotate;
4981 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4982 // so canonicalize to vrev8.
4983 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4984 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4985 else
4986 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4987 DAG.getConstant(RotateAmt, DL, RotateVT));
4988
4989 return DAG.getBitcast(VT, Rotate);
4990}
4991
4992// If compiling with an exactly known VLEN, see if we can split a
4993// shuffle on m2 or larger into a small number of m1 sized shuffles
4994// which write each destination registers exactly once.
4996 SelectionDAG &DAG,
4997 const RISCVSubtarget &Subtarget) {
4998 SDLoc DL(SVN);
4999 MVT VT = SVN->getSimpleValueType(0);
5000 SDValue V1 = SVN->getOperand(0);
5001 SDValue V2 = SVN->getOperand(1);
5002 ArrayRef<int> Mask = SVN->getMask();
5003 unsigned NumElts = VT.getVectorNumElements();
5004
5005 // If we don't know exact data layout, not much we can do. If this
5006 // is already m1 or smaller, no point in splitting further.
5007 const auto VLen = Subtarget.getRealVLen();
5008 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5009 return SDValue();
5010
5011 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5012 // expansion for.
5013 unsigned RotateAmt;
5014 MVT RotateVT;
5015 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5016 return SDValue();
5017
5018 MVT ElemVT = VT.getVectorElementType();
5019 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5020 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5021
5023 OutMasks(VRegsPerSrc, {-1, {}});
5024
5025 // Check if our mask can be done as a 1-to-1 mapping from source
5026 // to destination registers in the group without needing to
5027 // write each destination more than once.
5028 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5029 int DstVecIdx = DstIdx / ElemsPerVReg;
5030 int DstSubIdx = DstIdx % ElemsPerVReg;
5031 int SrcIdx = Mask[DstIdx];
5032 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5033 continue;
5034 int SrcVecIdx = SrcIdx / ElemsPerVReg;
5035 int SrcSubIdx = SrcIdx % ElemsPerVReg;
5036 if (OutMasks[DstVecIdx].first == -1)
5037 OutMasks[DstVecIdx].first = SrcVecIdx;
5038 if (OutMasks[DstVecIdx].first != SrcVecIdx)
5039 // Note: This case could easily be handled by keeping track of a chain
5040 // of source values and generating two element shuffles below. This is
5041 // less an implementation question, and more a profitability one.
5042 return SDValue();
5043
5044 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5045 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5046 }
5047
5048 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5049 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5050 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5051 assert(M1VT == getLMUL1VT(M1VT));
5052 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5053 SDValue Vec = DAG.getUNDEF(ContainerVT);
5054 // The following semantically builds up a fixed length concat_vector
5055 // of the component shuffle_vectors. We eagerly lower to scalable here
5056 // to avoid DAG combining it back to a large shuffle_vector again.
5057 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5058 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5059 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5060 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5061 if (SrcVecIdx == -1)
5062 continue;
5063 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5064 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5065 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5066 DAG.getVectorIdxConstant(ExtractIdx, DL));
5067 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5068 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5069 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5070 unsigned InsertIdx = DstVecIdx * NumOpElts;
5071 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5072 DAG.getVectorIdxConstant(InsertIdx, DL));
5073 }
5074 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5075}
5076
5078 const RISCVSubtarget &Subtarget) {
5079 SDValue V1 = Op.getOperand(0);
5080 SDValue V2 = Op.getOperand(1);
5081 SDLoc DL(Op);
5082 MVT XLenVT = Subtarget.getXLenVT();
5083 MVT VT = Op.getSimpleValueType();
5084 unsigned NumElts = VT.getVectorNumElements();
5085 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5086
5087 if (VT.getVectorElementType() == MVT::i1) {
5088 // Lower to a vror.vi of a larger element type if possible before we promote
5089 // i1s to i8s.
5090 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5091 return V;
5092 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5093 return V;
5094
5095 // Promote i1 shuffle to i8 shuffle.
5096 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5097 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5098 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5099 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5100 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5101 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5102 ISD::SETNE);
5103 }
5104
5105 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5106
5107 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5108
5109 if (SVN->isSplat()) {
5110 const int Lane = SVN->getSplatIndex();
5111 if (Lane >= 0) {
5112 MVT SVT = VT.getVectorElementType();
5113
5114 // Turn splatted vector load into a strided load with an X0 stride.
5115 SDValue V = V1;
5116 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5117 // with undef.
5118 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5119 int Offset = Lane;
5120 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5121 int OpElements =
5122 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5123 V = V.getOperand(Offset / OpElements);
5124 Offset %= OpElements;
5125 }
5126
5127 // We need to ensure the load isn't atomic or volatile.
5128 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5129 auto *Ld = cast<LoadSDNode>(V);
5130 Offset *= SVT.getStoreSize();
5131 SDValue NewAddr = DAG.getMemBasePlusOffset(
5132 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5133
5134 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5135 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5136 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5137 SDValue IntID =
5138 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5139 SDValue Ops[] = {Ld->getChain(),
5140 IntID,
5141 DAG.getUNDEF(ContainerVT),
5142 NewAddr,
5143 DAG.getRegister(RISCV::X0, XLenVT),
5144 VL};
5145 SDValue NewLoad = DAG.getMemIntrinsicNode(
5146 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5148 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5149 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5150 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5151 }
5152
5153 MVT SplatVT = ContainerVT;
5154
5155 // If we don't have Zfh, we need to use an integer scalar load.
5156 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5157 SVT = MVT::i16;
5158 SplatVT = ContainerVT.changeVectorElementType(SVT);
5159 }
5160
5161 // Otherwise use a scalar load and splat. This will give the best
5162 // opportunity to fold a splat into the operation. ISel can turn it into
5163 // the x0 strided load if we aren't able to fold away the select.
5164 if (SVT.isFloatingPoint())
5165 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5166 Ld->getPointerInfo().getWithOffset(Offset),
5167 Ld->getOriginalAlign(),
5168 Ld->getMemOperand()->getFlags());
5169 else
5170 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5171 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5172 Ld->getOriginalAlign(),
5173 Ld->getMemOperand()->getFlags());
5175
5176 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5178 SDValue Splat =
5179 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5180 Splat = DAG.getBitcast(ContainerVT, Splat);
5181 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5182 }
5183
5184 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5185 assert(Lane < (int)NumElts && "Unexpected lane!");
5186 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5187 V1, DAG.getConstant(Lane, DL, XLenVT),
5188 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5189 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5190 }
5191 }
5192
5193 // For exact VLEN m2 or greater, try to split to m1 operations if we
5194 // can split cleanly.
5195 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5196 return V;
5197
5198 ArrayRef<int> Mask = SVN->getMask();
5199
5200 if (SDValue V =
5201 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5202 return V;
5203
5204 if (SDValue V =
5205 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5206 return V;
5207
5208 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5209 // available.
5210 if (Subtarget.hasStdExtZvkb())
5211 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5212 return V;
5213
5214 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5215 // be undef which can be handled with a single SLIDEDOWN/UP.
5216 int LoSrc, HiSrc;
5217 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5218 if (Rotation > 0) {
5219 SDValue LoV, HiV;
5220 if (LoSrc >= 0) {
5221 LoV = LoSrc == 0 ? V1 : V2;
5222 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5223 }
5224 if (HiSrc >= 0) {
5225 HiV = HiSrc == 0 ? V1 : V2;
5226 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5227 }
5228
5229 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5230 // to slide LoV up by (NumElts - Rotation).
5231 unsigned InvRotate = NumElts - Rotation;
5232
5233 SDValue Res = DAG.getUNDEF(ContainerVT);
5234 if (HiV) {
5235 // Even though we could use a smaller VL, don't to avoid a vsetivli
5236 // toggle.
5237 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5238 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5239 }
5240 if (LoV)
5241 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5242 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5244
5245 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5246 }
5247
5248 // If this is a deinterleave and we can widen the vector, then we can use
5249 // vnsrl to deinterleave.
5250 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5251 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5252 Subtarget, DAG);
5253 }
5254
5255 if (SDValue V =
5256 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5257 return V;
5258
5259 // Detect an interleave shuffle and lower to
5260 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5261 int EvenSrc, OddSrc;
5262 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5263 // Extract the halves of the vectors.
5264 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5265
5266 int Size = Mask.size();
5267 SDValue EvenV, OddV;
5268 assert(EvenSrc >= 0 && "Undef source?");
5269 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5270 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5271 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5272
5273 assert(OddSrc >= 0 && "Undef source?");
5274 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5275 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5276 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5277
5278 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5279 }
5280
5281
5282 // Handle any remaining single source shuffles
5283 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5284 if (V2.isUndef()) {
5285 // We might be able to express the shuffle as a bitrotate. But even if we
5286 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5287 // shifts and a vor will have a higher throughput than a vrgather.
5288 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5289 return V;
5290
5291 if (VT.getScalarSizeInBits() == 8 &&
5292 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5293 // On such a vector we're unable to use i8 as the index type.
5294 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5295 // may involve vector splitting if we're already at LMUL=8, or our
5296 // user-supplied maximum fixed-length LMUL.
5297 return SDValue();
5298 }
5299
5300 // Base case for the two operand recursion below - handle the worst case
5301 // single source shuffle.
5302 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5303 MVT IndexVT = VT.changeTypeToInteger();
5304 // Since we can't introduce illegal index types at this stage, use i16 and
5305 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5306 // than XLenVT.
5307 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5308 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5309 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5310 }
5311
5312 // If the mask allows, we can do all the index computation in 16 bits. This
5313 // requires less work and less register pressure at high LMUL, and creates
5314 // smaller constants which may be cheaper to materialize.
5315 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5316 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5317 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5318 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5319 }
5320
5321 MVT IndexContainerVT =
5322 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5323
5324 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5325 SmallVector<SDValue> GatherIndicesLHS;
5326 for (int MaskIndex : Mask) {
5327 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5328 GatherIndicesLHS.push_back(IsLHSIndex
5329 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5330 : DAG.getUNDEF(XLenVT));
5331 }
5332 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5333 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5334 Subtarget);
5335 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5336 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5337 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5338 }
5339
5340 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5341 // merged with a second vrgather.
5342 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5343
5344 // Now construct the mask that will be used by the blended vrgather operation.
5345 // Construct the appropriate indices into each vector.
5346 for (int MaskIndex : Mask) {
5347 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5348 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5349 ? MaskIndex : -1);
5350 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5351 }
5352
5353 // Try to pick a profitable operand order.
5354 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5355 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5356
5357 // Recursively invoke lowering for each operand if we had two
5358 // independent single source shuffles, and then combine the result via a
5359 // vselect. Note that the vselect will likely be folded back into the
5360 // second permute (vrgather, or other) by the post-isel combine.
5361 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5362 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5363
5364 SmallVector<SDValue> MaskVals;
5365 for (int MaskIndex : Mask) {
5366 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5367 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5368 }
5369
5370 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5371 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5372 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5373
5374 if (SwapOps)
5375 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5376 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5377}
5378
5380 // Support splats for any type. These should type legalize well.
5381 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5382 return true;
5383
5384 // Only support legal VTs for other shuffles for now.
5385 if (!isTypeLegal(VT))
5386 return false;
5387
5388 MVT SVT = VT.getSimpleVT();
5389
5390 // Not for i1 vectors.
5391 if (SVT.getScalarType() == MVT::i1)
5392 return false;
5393
5394 int Dummy1, Dummy2;
5395 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5396 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5397}
5398
5399// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5400// the exponent.
5401SDValue
5402RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5403 SelectionDAG &DAG) const {
5404 MVT VT = Op.getSimpleValueType();
5405 unsigned EltSize = VT.getScalarSizeInBits();
5406 SDValue Src = Op.getOperand(0);
5407 SDLoc DL(Op);
5408 MVT ContainerVT = VT;
5409
5410 SDValue Mask, VL;
5411 if (Op->isVPOpcode()) {
5412 Mask = Op.getOperand(1);
5413 if (VT.isFixedLengthVector())
5414 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5415 Subtarget);
5416 VL = Op.getOperand(2);
5417 }
5418
5419 // We choose FP type that can represent the value if possible. Otherwise, we
5420 // use rounding to zero conversion for correct exponent of the result.
5421 // TODO: Use f16 for i8 when possible?
5422 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5423 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5424 FloatEltVT = MVT::f32;
5425 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5426
5427 // Legal types should have been checked in the RISCVTargetLowering
5428 // constructor.
5429 // TODO: Splitting may make sense in some cases.
5430 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5431 "Expected legal float type!");
5432
5433 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5434 // The trailing zero count is equal to log2 of this single bit value.
5435 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5436 SDValue Neg = DAG.getNegative(Src, DL, VT);
5437 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5438 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5439 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5440 Src, Mask, VL);
5441 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5442 }
5443
5444 // We have a legal FP type, convert to it.
5445 SDValue FloatVal;
5446 if (FloatVT.bitsGT(VT)) {
5447 if (Op->isVPOpcode())
5448 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5449 else
5450 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5451 } else {
5452 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5453 if (VT.isFixedLengthVector()) {
5454 ContainerVT = getContainerForFixedLengthVector(VT);
5455 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5456 }
5457 if (!Op->isVPOpcode())
5458 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5459 SDValue RTZRM =
5461 MVT ContainerFloatVT =
5462 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5463 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5464 Src, Mask, RTZRM, VL);
5465 if (VT.isFixedLengthVector())
5466 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5467 }
5468 // Bitcast to integer and shift the exponent to the LSB.
5469 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5470 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5471 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5472
5473 SDValue Exp;
5474 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5475 if (Op->isVPOpcode()) {
5476 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5477 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5478 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5479 } else {
5480 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5481 DAG.getConstant(ShiftAmt, DL, IntVT));
5482 if (IntVT.bitsLT(VT))
5483 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5484 else if (IntVT.bitsGT(VT))
5485 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5486 }
5487
5488 // The exponent contains log2 of the value in biased form.
5489 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5490 // For trailing zeros, we just need to subtract the bias.
5491 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5492 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5493 DAG.getConstant(ExponentBias, DL, VT));
5494 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5495 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5496 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5497
5498 // For leading zeros, we need to remove the bias and convert from log2 to
5499 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5500 unsigned Adjust = ExponentBias + (EltSize - 1);
5501 SDValue Res;
5502 if (Op->isVPOpcode())
5503 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5504 Mask, VL);
5505 else
5506 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5507
5508 // The above result with zero input equals to Adjust which is greater than
5509 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5510 if (Op.getOpcode() == ISD::CTLZ)
5511 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5512 else if (Op.getOpcode() == ISD::VP_CTLZ)
5513 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5514 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5515 return Res;
5516}
5517
5518SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5519 SelectionDAG &DAG) const {
5520 SDLoc DL(Op);
5521 MVT XLenVT = Subtarget.getXLenVT();
5522 SDValue Source = Op->getOperand(0);
5523 MVT SrcVT = Source.getSimpleValueType();
5524 SDValue Mask = Op->getOperand(1);
5525 SDValue EVL = Op->getOperand(2);
5526
5527 if (SrcVT.isFixedLengthVector()) {
5528 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5529 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5530 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5531 Subtarget);
5532 SrcVT = ContainerVT;
5533 }
5534
5535 // Convert to boolean vector.
5536 if (SrcVT.getScalarType() != MVT::i1) {
5537 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5538 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5539 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5540 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5541 DAG.getUNDEF(SrcVT), Mask, EVL});
5542 }
5543
5544 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5545 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5546 // In this case, we can interpret poison as -1, so nothing to do further.
5547 return Res;
5548
5549 // Convert -1 to VL.
5550 SDValue SetCC =
5551 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5552 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5553 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5554}
5555
5556// While RVV has alignment restrictions, we should always be able to load as a
5557// legal equivalently-sized byte-typed vector instead. This method is
5558// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5559// the load is already correctly-aligned, it returns SDValue().
5560SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5561 SelectionDAG &DAG) const {
5562 auto *Load = cast<LoadSDNode>(Op);
5563 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5564
5566 Load->getMemoryVT(),
5567 *Load->getMemOperand()))
5568 return SDValue();
5569
5570 SDLoc DL(Op);
5571 MVT VT = Op.getSimpleValueType();
5572 unsigned EltSizeBits = VT.getScalarSizeInBits();
5573 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5574 "Unexpected unaligned RVV load type");
5575 MVT NewVT =
5576 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5577 assert(NewVT.isValid() &&
5578 "Expecting equally-sized RVV vector types to be legal");
5579 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5580 Load->getPointerInfo(), Load->getOriginalAlign(),
5581 Load->getMemOperand()->getFlags());
5582 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5583}
5584
5585// While RVV has alignment restrictions, we should always be able to store as a
5586// legal equivalently-sized byte-typed vector instead. This method is
5587// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5588// returns SDValue() if the store is already correctly aligned.
5589SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5590 SelectionDAG &DAG) const {
5591 auto *Store = cast<StoreSDNode>(Op);
5592 assert(Store && Store->getValue().getValueType().isVector() &&
5593 "Expected vector store");
5594
5596 Store->getMemoryVT(),
5597 *Store->getMemOperand()))
5598 return SDValue();
5599
5600 SDLoc DL(Op);
5601 SDValue StoredVal = Store->getValue();
5602 MVT VT = StoredVal.getSimpleValueType();
5603 unsigned EltSizeBits = VT.getScalarSizeInBits();
5604 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5605 "Unexpected unaligned RVV store type");
5606 MVT NewVT =
5607 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5608 assert(NewVT.isValid() &&
5609 "Expecting equally-sized RVV vector types to be legal");
5610 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5611 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5612 Store->getPointerInfo(), Store->getOriginalAlign(),
5613 Store->getMemOperand()->getFlags());
5614}
5615
5617 const RISCVSubtarget &Subtarget) {
5618 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5619
5620 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5621
5622 // All simm32 constants should be handled by isel.
5623 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5624 // this check redundant, but small immediates are common so this check
5625 // should have better compile time.
5626 if (isInt<32>(Imm))
5627 return Op;
5628
5629 // We only need to cost the immediate, if constant pool lowering is enabled.
5630 if (!Subtarget.useConstantPoolForLargeInts())
5631 return Op;
5632
5634 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5635 return Op;
5636
5637 // Optimizations below are disabled for opt size. If we're optimizing for
5638 // size, use a constant pool.
5639 if (DAG.shouldOptForSize())
5640 return SDValue();
5641
5642 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5643 // that if it will avoid a constant pool.
5644 // It will require an extra temporary register though.
5645 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5646 // low and high 32 bits are the same and bit 31 and 63 are set.
5647 unsigned ShiftAmt, AddOpc;
5648 RISCVMatInt::InstSeq SeqLo =
5649 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5650 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5651 return Op;
5652
5653 return SDValue();
5654}
5655
5657 const RISCVSubtarget &Subtarget) {
5658 SDLoc dl(Op);
5659 AtomicOrdering FenceOrdering =
5660 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5661 SyncScope::ID FenceSSID =
5662 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5663
5664 if (Subtarget.hasStdExtZtso()) {
5665 // The only fence that needs an instruction is a sequentially-consistent
5666 // cross-thread fence.
5667 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5668 FenceSSID == SyncScope::System)
5669 return Op;
5670
5671 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5672 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5673 }
5674
5675 // singlethread fences only synchronize with signal handlers on the same
5676 // thread and thus only need to preserve instruction order, not actually
5677 // enforce memory ordering.
5678 if (FenceSSID == SyncScope::SingleThread)
5679 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5680 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5681
5682 return Op;
5683}
5684
5686 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5687 "Unexpected custom legalisation");
5688
5689 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5690 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5691 SDLoc DL(Op);
5692 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5693 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5694 SDValue Result =
5695 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5696
5697 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5698 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5699 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5700 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5701 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5702 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5703 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5704}
5705
5707 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5708 "Unexpected custom legalisation");
5709
5710 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5711 // sign extend allows overflow of the lower 32 bits to be detected on
5712 // the promoted size.
5713 SDLoc DL(Op);
5714 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5715 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5716 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5717 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5718}
5719
5720// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5722 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5723 "Unexpected custom legalisation");
5724 if (isa<ConstantSDNode>(Op.getOperand(1)))
5725 return SDValue();
5726
5727 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5728 SDLoc DL(Op);
5729 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5730 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5731 SDValue WideOp =
5732 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5733 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5734 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5735 DAG.getValueType(MVT::i32));
5736 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5737 ISD::SETNE);
5738 return DAG.getMergeValues({Res, Ovf}, DL);
5739}
5740
5741// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5743 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5744 "Unexpected custom legalisation");
5745 SDLoc DL(Op);
5746 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5747 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5748 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5749 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5750 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5751 DAG.getValueType(MVT::i32));
5752 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5753 ISD::SETNE);
5754 return DAG.getMergeValues({Res, Ovf}, DL);
5755}
5756
5757SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5758 SelectionDAG &DAG) const {
5759 SDLoc DL(Op);
5760 MVT VT = Op.getSimpleValueType();
5761 MVT XLenVT = Subtarget.getXLenVT();
5762 unsigned Check = Op.getConstantOperandVal(1);
5763 unsigned TDCMask = 0;
5764 if (Check & fcSNan)
5765 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5766 if (Check & fcQNan)
5767 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5768 if (Check & fcPosInf)
5770 if (Check & fcNegInf)
5772 if (Check & fcPosNormal)
5774 if (Check & fcNegNormal)
5776 if (Check & fcPosSubnormal)
5778 if (Check & fcNegSubnormal)
5780 if (Check & fcPosZero)
5781 TDCMask |= RISCV::FPMASK_Positive_Zero;
5782 if (Check & fcNegZero)
5783 TDCMask |= RISCV::FPMASK_Negative_Zero;
5784
5785 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5786
5787 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5788
5789 if (VT.isVector()) {
5790 SDValue Op0 = Op.getOperand(0);
5791 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5792
5793 if (VT.isScalableVector()) {
5795 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5796 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5797 Mask = Op.getOperand(2);
5798 VL = Op.getOperand(3);
5799 }
5800 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5801 VL, Op->getFlags());
5802 if (IsOneBitMask)
5803 return DAG.getSetCC(DL, VT, FPCLASS,
5804 DAG.getConstant(TDCMask, DL, DstVT),
5806 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5807 DAG.getConstant(TDCMask, DL, DstVT));
5808 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5809 ISD::SETNE);
5810 }
5811
5812 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5813 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5814 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5815 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5816 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5817 Mask = Op.getOperand(2);
5818 MVT MaskContainerVT =
5819 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5820 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5821 VL = Op.getOperand(3);
5822 }
5823 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5824
5825 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5826 Mask, VL, Op->getFlags());
5827
5828 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5829 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5830 if (IsOneBitMask) {
5831 SDValue VMSEQ =
5832 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5833 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5834 DAG.getUNDEF(ContainerVT), Mask, VL});
5835 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5836 }
5837 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5838 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5839
5840 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5841 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5842 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5843
5844 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5845 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5846 DAG.getUNDEF(ContainerVT), Mask, VL});
5847 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5848 }
5849
5850 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5851 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5852 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5854 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5855}
5856
5857// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5858// operations propagate nans.
5860 const RISCVSubtarget &Subtarget) {
5861 SDLoc DL(Op);
5862 MVT VT = Op.getSimpleValueType();
5863
5864 SDValue X = Op.getOperand(0);
5865 SDValue Y = Op.getOperand(1);
5866
5867 if (!VT.isVector()) {
5868 MVT XLenVT = Subtarget.getXLenVT();
5869
5870 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5871 // ensures that when one input is a nan, the other will also be a nan
5872 // allowing the nan to propagate. If both inputs are nan, this will swap the
5873 // inputs which is harmless.
5874
5875 SDValue NewY = Y;
5876 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5877 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5878 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5879 }
5880
5881 SDValue NewX = X;
5882 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5883 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5884 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5885 }
5886
5887 unsigned Opc =
5888 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5889 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5890 }
5891
5892 // Check no NaNs before converting to fixed vector scalable.
5893 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5894 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5895
5896 MVT ContainerVT = VT;
5897 if (VT.isFixedLengthVector()) {
5898 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5899 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5900 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5901 }
5902
5903 SDValue Mask, VL;
5904 if (Op->isVPOpcode()) {
5905 Mask = Op.getOperand(2);
5906 if (VT.isFixedLengthVector())
5907 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5908 Subtarget);
5909 VL = Op.getOperand(3);
5910 } else {
5911 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5912 }
5913
5914 SDValue NewY = Y;
5915 if (!XIsNeverNan) {
5916 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5917 {X, X, DAG.getCondCode(ISD::SETOEQ),
5918 DAG.getUNDEF(ContainerVT), Mask, VL});
5919 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5920 DAG.getUNDEF(ContainerVT), VL);
5921 }
5922
5923 SDValue NewX = X;
5924 if (!YIsNeverNan) {
5925 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5926 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5927 DAG.getUNDEF(ContainerVT), Mask, VL});
5928 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5929 DAG.getUNDEF(ContainerVT), VL);
5930 }
5931
5932 unsigned Opc =
5933 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5936 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5937 DAG.getUNDEF(ContainerVT), Mask, VL);
5938 if (VT.isFixedLengthVector())
5939 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5940 return Res;
5941}
5942
5943/// Get a RISC-V target specified VL op for a given SDNode.
5944static unsigned getRISCVVLOp(SDValue Op) {
5945#define OP_CASE(NODE) \
5946 case ISD::NODE: \
5947 return RISCVISD::NODE##_VL;
5948#define VP_CASE(NODE) \
5949 case ISD::VP_##NODE: \
5950 return RISCVISD::NODE##_VL;
5951 // clang-format off
5952 switch (Op.getOpcode()) {
5953 default:
5954 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5955 OP_CASE(ADD)
5956 OP_CASE(SUB)
5957 OP_CASE(MUL)
5958 OP_CASE(MULHS)
5959 OP_CASE(MULHU)
5960 OP_CASE(SDIV)
5961 OP_CASE(SREM)
5962 OP_CASE(UDIV)
5963 OP_CASE(UREM)
5964 OP_CASE(SHL)
5965 OP_CASE(SRA)
5966 OP_CASE(SRL)
5967 OP_CASE(ROTL)
5968 OP_CASE(ROTR)
5969 OP_CASE(BSWAP)
5970 OP_CASE(CTTZ)
5971 OP_CASE(CTLZ)
5972 OP_CASE(CTPOP)
5973 OP_CASE(BITREVERSE)
5974 OP_CASE(SADDSAT)
5975 OP_CASE(UADDSAT)
5976 OP_CASE(SSUBSAT)
5977 OP_CASE(USUBSAT)
5978 OP_CASE(AVGFLOORS)
5979 OP_CASE(AVGFLOORU)
5980 OP_CASE(AVGCEILS)
5981 OP_CASE(AVGCEILU)
5982 OP_CASE(FADD)
5983 OP_CASE(FSUB)
5984 OP_CASE(FMUL)
5985 OP_CASE(FDIV)
5986 OP_CASE(FNEG)
5987 OP_CASE(FABS)
5988 OP_CASE(FSQRT)
5989 OP_CASE(SMIN)
5990 OP_CASE(SMAX)
5991 OP_CASE(UMIN)
5992 OP_CASE(UMAX)
5993 OP_CASE(STRICT_FADD)
5994 OP_CASE(STRICT_FSUB)
5995 OP_CASE(STRICT_FMUL)
5996 OP_CASE(STRICT_FDIV)
5997 OP_CASE(STRICT_FSQRT)
5998 VP_CASE(ADD) // VP_ADD
5999 VP_CASE(SUB) // VP_SUB
6000 VP_CASE(MUL) // VP_MUL
6001 VP_CASE(SDIV) // VP_SDIV
6002 VP_CASE(SREM) // VP_SREM
6003 VP_CASE(UDIV) // VP_UDIV
6004 VP_CASE(UREM) // VP_UREM
6005 VP_CASE(SHL) // VP_SHL
6006 VP_CASE(FADD) // VP_FADD
6007 VP_CASE(FSUB) // VP_FSUB
6008 VP_CASE(FMUL) // VP_FMUL
6009 VP_CASE(FDIV) // VP_FDIV
6010 VP_CASE(FNEG) // VP_FNEG
6011 VP_CASE(FABS) // VP_FABS
6012 VP_CASE(SMIN) // VP_SMIN
6013 VP_CASE(SMAX) // VP_SMAX
6014 VP_CASE(UMIN) // VP_UMIN
6015 VP_CASE(UMAX) // VP_UMAX
6016 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6017 VP_CASE(SETCC) // VP_SETCC
6018 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6019 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6020 VP_CASE(BITREVERSE) // VP_BITREVERSE
6021 VP_CASE(SADDSAT) // VP_SADDSAT
6022 VP_CASE(UADDSAT) // VP_UADDSAT
6023 VP_CASE(SSUBSAT) // VP_SSUBSAT
6024 VP_CASE(USUBSAT) // VP_USUBSAT
6025 VP_CASE(BSWAP) // VP_BSWAP
6026 VP_CASE(CTLZ) // VP_CTLZ
6027 VP_CASE(CTTZ) // VP_CTTZ
6028 VP_CASE(CTPOP) // VP_CTPOP
6030 case ISD::VP_CTLZ_ZERO_UNDEF:
6031 return RISCVISD::CTLZ_VL;
6033 case ISD::VP_CTTZ_ZERO_UNDEF:
6034 return RISCVISD::CTTZ_VL;
6035 case ISD::FMA:
6036 case ISD::VP_FMA:
6037 return RISCVISD::VFMADD_VL;
6038 case ISD::STRICT_FMA:
6040 case ISD::AND:
6041 case ISD::VP_AND:
6042 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6043 return RISCVISD::VMAND_VL;
6044 return RISCVISD::AND_VL;
6045 case ISD::OR:
6046 case ISD::VP_OR:
6047 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6048 return RISCVISD::VMOR_VL;
6049 return RISCVISD::OR_VL;
6050 case ISD::XOR:
6051 case ISD::VP_XOR:
6052 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6053 return RISCVISD::VMXOR_VL;
6054 return RISCVISD::XOR_VL;
6055 case ISD::VP_SELECT:
6056 case ISD::VP_MERGE:
6057 return RISCVISD::VMERGE_VL;
6058 case ISD::VP_SRA:
6059 return RISCVISD::SRA_VL;
6060 case ISD::VP_SRL:
6061 return RISCVISD::SRL_VL;
6062 case ISD::VP_SQRT:
6063 return RISCVISD::FSQRT_VL;
6064 case ISD::VP_SIGN_EXTEND:
6065 return RISCVISD::VSEXT_VL;
6066 case ISD::VP_ZERO_EXTEND:
6067 return RISCVISD::VZEXT_VL;
6068 case ISD::VP_FP_TO_SINT:
6070 case ISD::VP_FP_TO_UINT:
6072 case ISD::FMINNUM:
6073 case ISD::VP_FMINNUM:
6074 return RISCVISD::VFMIN_VL;
6075 case ISD::FMAXNUM:
6076 case ISD::VP_FMAXNUM:
6077 return RISCVISD::VFMAX_VL;
6078 case ISD::LRINT:
6079 case ISD::VP_LRINT:
6080 case ISD::LLRINT:
6081 case ISD::VP_LLRINT:
6083 }
6084 // clang-format on
6085#undef OP_CASE
6086#undef VP_CASE
6087}
6088
6089/// Return true if a RISC-V target specified op has a merge operand.
6090static bool hasMergeOp(unsigned Opcode) {
6091 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6093 "not a RISC-V target specific op");
6095 130 &&
6098 21 &&
6099 "adding target specific op should update this function");
6100 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6101 return true;
6102 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6103 return true;
6104 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6105 return true;
6106 if (Opcode == RISCVISD::SETCC_VL)
6107 return true;
6108 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6109 return true;
6110 if (Opcode == RISCVISD::VMERGE_VL)
6111 return true;
6112 return false;
6113}
6114
6115/// Return true if a RISC-V target specified op has a mask operand.
6116static bool hasMaskOp(unsigned Opcode) {
6117 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6119 "not a RISC-V target specific op");
6121 130 &&
6124 21 &&
6125 "adding target specific op should update this function");
6126 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6127 return true;
6128 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6129 return true;
6130 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6132 return true;
6133 return false;
6134}
6135
6137 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6138 SDLoc DL(Op);
6139
6142
6143 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6144 if (!Op.getOperand(j).getValueType().isVector()) {
6145 LoOperands[j] = Op.getOperand(j);
6146 HiOperands[j] = Op.getOperand(j);
6147 continue;
6148 }
6149 std::tie(LoOperands[j], HiOperands[j]) =
6150 DAG.SplitVector(Op.getOperand(j), DL);
6151 }
6152
6153 SDValue LoRes =
6154 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6155 SDValue HiRes =
6156 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6157
6158 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6159}
6160
6162 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6163 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6164 SDLoc DL(Op);
6165
6168
6169 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6170 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6171 std::tie(LoOperands[j], HiOperands[j]) =
6172 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6173 continue;
6174 }
6175 if (!Op.getOperand(j).getValueType().isVector()) {
6176 LoOperands[j] = Op.getOperand(j);
6177 HiOperands[j] = Op.getOperand(j);
6178 continue;
6179 }
6180 std::tie(LoOperands[j], HiOperands[j]) =
6181 DAG.SplitVector(Op.getOperand(j), DL);
6182 }
6183
6184 SDValue LoRes =
6185 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6186 SDValue HiRes =
6187 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6188
6189 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6190}
6191
6193 SDLoc DL(Op);
6194
6195 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6196 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6197 auto [EVLLo, EVLHi] =
6198 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6199
6200 SDValue ResLo =
6201 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6202 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6203 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6204 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6205}
6206
6208
6209 assert(Op->isStrictFPOpcode());
6210
6211 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6212
6213 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6214 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6215
6216 SDLoc DL(Op);
6217
6220
6221 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6222 if (!Op.getOperand(j).getValueType().isVector()) {
6223 LoOperands[j] = Op.getOperand(j);
6224 HiOperands[j] = Op.getOperand(j);
6225 continue;
6226 }
6227 std::tie(LoOperands[j], HiOperands[j]) =
6228 DAG.SplitVector(Op.getOperand(j), DL);
6229 }
6230
6231 SDValue LoRes =
6232 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6233 HiOperands[0] = LoRes.getValue(1);
6234 SDValue HiRes =
6235 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6236
6237 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6238 LoRes.getValue(0), HiRes.getValue(0));
6239 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6240}
6241
6243 SelectionDAG &DAG) const {
6244 switch (Op.getOpcode()) {
6245 default:
6246 report_fatal_error("unimplemented operand");
6247 case ISD::ATOMIC_FENCE:
6248 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6249 case ISD::GlobalAddress:
6250 return lowerGlobalAddress(Op, DAG);
6251 case ISD::BlockAddress:
6252 return lowerBlockAddress(Op, DAG);
6253 case ISD::ConstantPool:
6254 return lowerConstantPool(Op, DAG);
6255 case ISD::JumpTable:
6256 return lowerJumpTable(Op, DAG);
6258 return lowerGlobalTLSAddress(Op, DAG);
6259 case ISD::Constant:
6260 return lowerConstant(Op, DAG, Subtarget);
6261 case ISD::SELECT:
6262 return lowerSELECT(Op, DAG);
6263 case ISD::BRCOND:
6264 return lowerBRCOND(Op, DAG);
6265 case ISD::VASTART:
6266 return lowerVASTART(Op, DAG);
6267 case ISD::FRAMEADDR:
6268 return lowerFRAMEADDR(Op, DAG);
6269 case ISD::RETURNADDR:
6270 return lowerRETURNADDR(Op, DAG);
6271 case ISD::SADDO:
6272 case ISD::SSUBO:
6273 return lowerSADDO_SSUBO(Op, DAG);
6274 case ISD::SMULO:
6275 return lowerSMULO(Op, DAG);
6276 case ISD::SHL_PARTS:
6277 return lowerShiftLeftParts(Op, DAG);
6278 case ISD::SRA_PARTS:
6279 return lowerShiftRightParts(Op, DAG, true);
6280 case ISD::SRL_PARTS:
6281 return lowerShiftRightParts(Op, DAG, false);
6282 case ISD::ROTL:
6283 case ISD::ROTR:
6284 if (Op.getValueType().isFixedLengthVector()) {
6285 assert(Subtarget.hasStdExtZvkb());
6286 return lowerToScalableOp(Op, DAG);
6287 }
6288 assert(Subtarget.hasVendorXTHeadBb() &&
6289 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6290 "Unexpected custom legalization");
6291 // XTHeadBb only supports rotate by constant.
6292 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6293 return SDValue();
6294 return Op;
6295 case ISD::BITCAST: {
6296 SDLoc DL(Op);
6297 EVT VT = Op.getValueType();
6298 SDValue Op0 = Op.getOperand(0);
6299 EVT Op0VT = Op0.getValueType();
6300 MVT XLenVT = Subtarget.getXLenVT();
6301 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6302 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6303 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6304 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6305 return FPConv;
6306 }
6307 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6308 Subtarget.hasStdExtZfbfmin()) {
6309 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6310 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6311 return FPConv;
6312 }
6313 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6314 Subtarget.hasStdExtFOrZfinx()) {
6315 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6316 SDValue FPConv =
6317 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6318 return FPConv;
6319 }
6320 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6321 SDValue Lo, Hi;
6322 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6323 SDValue RetReg =
6324 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6325 return RetReg;
6326 }
6327
6328 // Consider other scalar<->scalar casts as legal if the types are legal.
6329 // Otherwise expand them.
6330 if (!VT.isVector() && !Op0VT.isVector()) {
6331 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6332 return Op;
6333 return SDValue();
6334 }
6335
6336 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6337 "Unexpected types");
6338
6339 if (VT.isFixedLengthVector()) {
6340 // We can handle fixed length vector bitcasts with a simple replacement
6341 // in isel.
6342 if (Op0VT.isFixedLengthVector())
6343 return Op;
6344 // When bitcasting from scalar to fixed-length vector, insert the scalar
6345 // into a one-element vector of the result type, and perform a vector
6346 // bitcast.
6347 if (!Op0VT.isVector()) {
6348 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6349 if (!isTypeLegal(BVT))
6350 return SDValue();
6351 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6352 DAG.getUNDEF(BVT), Op0,
6353 DAG.getVectorIdxConstant(0, DL)));
6354 }
6355 return SDValue();
6356 }
6357 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6358 // thus: bitcast the vector to a one-element vector type whose element type
6359 // is the same as the result type, and extract the first element.
6360 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6361 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6362 if (!isTypeLegal(BVT))
6363 return SDValue();
6364 SDValue BVec = DAG.getBitcast(BVT, Op0);
6365 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6366 DAG.getVectorIdxConstant(0, DL));
6367 }
6368 return SDValue();
6369 }
6371 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6373 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6375 return LowerINTRINSIC_VOID(Op, DAG);
6376 case ISD::IS_FPCLASS:
6377 return LowerIS_FPCLASS(Op, DAG);
6378 case ISD::BITREVERSE: {
6379 MVT VT = Op.getSimpleValueType();
6380 if (VT.isFixedLengthVector()) {
6381 assert(Subtarget.hasStdExtZvbb());
6382 return lowerToScalableOp(Op, DAG);
6383 }
6384 SDLoc DL(Op);
6385 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6386 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6387 // Expand bitreverse to a bswap(rev8) followed by brev8.
6388 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6389 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6390 }
6391 case ISD::TRUNCATE:
6392 // Only custom-lower vector truncates
6393 if (!Op.getSimpleValueType().isVector())
6394 return Op;
6395 return lowerVectorTruncLike(Op, DAG);
6396 case ISD::ANY_EXTEND:
6397 case ISD::ZERO_EXTEND:
6398 if (Op.getOperand(0).getValueType().isVector() &&
6399 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6400 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6401 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6402 case ISD::SIGN_EXTEND:
6403 if (Op.getOperand(0).getValueType().isVector() &&
6404 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6405 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6406 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6408 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6410 return lowerINSERT_VECTOR_ELT(Op, DAG);
6412 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6413 case ISD::SCALAR_TO_VECTOR: {
6414 MVT VT = Op.getSimpleValueType();
6415 SDLoc DL(Op);
6416 SDValue Scalar = Op.getOperand(0);
6417 if (VT.getVectorElementType() == MVT::i1) {
6418 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6419 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6420 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6421 }
6422 MVT ContainerVT = VT;
6423 if (VT.isFixedLengthVector())
6424 ContainerVT = getContainerForFixedLengthVector(VT);
6425 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6426 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6427 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6428 DAG.getUNDEF(ContainerVT), Scalar, VL);
6429 if (VT.isFixedLengthVector())
6430 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6431 return V;
6432 }
6433 case ISD::VSCALE: {
6434 MVT XLenVT = Subtarget.getXLenVT();
6435 MVT VT = Op.getSimpleValueType();
6436 SDLoc DL(Op);
6437 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6438 // We define our scalable vector types for lmul=1 to use a 64 bit known
6439 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6440 // vscale as VLENB / 8.
6441 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6442 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6443 report_fatal_error("Support for VLEN==32 is incomplete.");
6444 // We assume VLENB is a multiple of 8. We manually choose the best shift
6445 // here because SimplifyDemandedBits isn't always able to simplify it.
6446 uint64_t Val = Op.getConstantOperandVal(0);
6447 if (isPowerOf2_64(Val)) {
6448 uint64_t Log2 = Log2_64(Val);
6449 if (Log2 < 3)
6450 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6451 DAG.getConstant(3 - Log2, DL, VT));
6452 else if (Log2 > 3)
6453 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6454 DAG.getConstant(Log2 - 3, DL, XLenVT));
6455 } else if ((Val % 8) == 0) {
6456 // If the multiplier is a multiple of 8, scale it down to avoid needing
6457 // to shift the VLENB value.
6458 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6459 DAG.getConstant(Val / 8, DL, XLenVT));
6460 } else {
6461 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6462 DAG.getConstant(3, DL, XLenVT));
6463 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6464 DAG.getConstant(Val, DL, XLenVT));
6465 }
6466 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6467 }
6468 case ISD::FPOWI: {
6469 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6470 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6471 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6472 Op.getOperand(1).getValueType() == MVT::i32) {
6473 SDLoc DL(Op);
6474 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6475 SDValue Powi =
6476 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6477 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6478 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6479 }
6480 return SDValue();
6481 }
6482 case ISD::FMAXIMUM:
6483 case ISD::FMINIMUM:
6484 if (Op.getValueType() == MVT::nxv32f16 &&
6485 (Subtarget.hasVInstructionsF16Minimal() &&
6486 !Subtarget.hasVInstructionsF16()))
6487 return SplitVectorOp(Op, DAG);
6488 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6489 case ISD::FP_EXTEND: {
6490 SDLoc DL(Op);
6491 EVT VT = Op.getValueType();
6492 SDValue Op0 = Op.getOperand(0);
6493 EVT Op0VT = Op0.getValueType();
6494 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6495 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6496 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6497 SDValue FloatVal =
6498 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6499 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6500 }
6501
6502 if (!Op.getValueType().isVector())
6503 return Op;
6504 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6505 }
6506 case ISD::FP_ROUND: {
6507 SDLoc DL(Op);
6508 EVT VT = Op.getValueType();
6509 SDValue Op0 = Op.getOperand(0);
6510 EVT Op0VT = Op0.getValueType();
6511 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6512 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6513 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6514 Subtarget.hasStdExtDOrZdinx()) {
6515 SDValue FloatVal =
6516 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6517 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6518 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6519 }
6520
6521 if (!Op.getValueType().isVector())
6522 return Op;
6523 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6524 }
6527 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6528 case ISD::SINT_TO_FP:
6529 case ISD::UINT_TO_FP:
6530 if (Op.getValueType().isVector() &&
6531 Op.getValueType().getScalarType() == MVT::f16 &&
6532 (Subtarget.hasVInstructionsF16Minimal() &&
6533 !Subtarget.hasVInstructionsF16())) {
6534 if (Op.getValueType() == MVT::nxv32f16)
6535 return SplitVectorOp(Op, DAG);
6536 // int -> f32
6537 SDLoc DL(Op);
6538 MVT NVT =
6539 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6540 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6541 // f32 -> f16
6542 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6543 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6544 }
6545 [[fallthrough]];
6546 case ISD::FP_TO_SINT:
6547 case ISD::FP_TO_UINT:
6548 if (SDValue Op1 = Op.getOperand(0);
6549 Op1.getValueType().isVector() &&
6550 Op1.getValueType().getScalarType() == MVT::f16 &&
6551 (Subtarget.hasVInstructionsF16Minimal() &&
6552 !Subtarget.hasVInstructionsF16())) {
6553 if (Op1.getValueType() == MVT::nxv32f16)
6554 return SplitVectorOp(Op, DAG);
6555 // f16 -> f32
6556 SDLoc DL(Op);
6557 MVT NVT = MVT::getVectorVT(MVT::f32,
6558 Op1.getValueType().getVectorElementCount());
6559 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6560 // f32 -> int
6561 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6562 }
6563 [[fallthrough]];
6568 // RVV can only do fp<->int conversions to types half/double the size as
6569 // the source. We custom-lower any conversions that do two hops into
6570 // sequences.
6571 MVT VT = Op.getSimpleValueType();
6572 if (!VT.isVector())
6573 return Op;
6574 SDLoc DL(Op);
6575 bool IsStrict = Op->isStrictFPOpcode();
6576 SDValue Src = Op.getOperand(0 + IsStrict);
6577 MVT EltVT = VT.getVectorElementType();
6578 MVT SrcVT = Src.getSimpleValueType();
6579 MVT SrcEltVT = SrcVT.getVectorElementType();
6580 unsigned EltSize = EltVT.getSizeInBits();
6581 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6582 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6583 "Unexpected vector element types");
6584
6585 bool IsInt2FP = SrcEltVT.isInteger();
6586 // Widening conversions
6587 if (EltSize > (2 * SrcEltSize)) {
6588 if (IsInt2FP) {
6589 // Do a regular integer sign/zero extension then convert to float.
6590 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6592 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6593 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6596 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6597 if (IsStrict)
6598 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6599 Op.getOperand(0), Ext);
6600 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6601 }
6602 // FP2Int
6603 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6604 // Do one doubling fp_extend then complete the operation by converting
6605 // to int.
6606 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6607 if (IsStrict) {
6608 auto [FExt, Chain] =
6609 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6610 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6611 }
6612 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6613 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6614 }
6615
6616 // Narrowing conversions
6617 if (SrcEltSize > (2 * EltSize)) {
6618 if (IsInt2FP) {
6619 // One narrowing int_to_fp, then an fp_round.
6620 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6621 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6622 if (IsStrict) {
6623 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6624 DAG.getVTList(InterimFVT, MVT::Other),
6625 Op.getOperand(0), Src);
6626 SDValue Chain = Int2FP.getValue(1);
6627 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6628 }
6629 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6630 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6631 }
6632 // FP2Int
6633 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6634 // representable by the integer, the result is poison.
6635 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6637 if (IsStrict) {
6638 SDValue FP2Int =
6639 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6640 Op.getOperand(0), Src);
6641 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6642 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6643 }
6644 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6645 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6646 }
6647
6648 // Scalable vectors can exit here. Patterns will handle equally-sized
6649 // conversions halving/doubling ones.
6650 if (!VT.isFixedLengthVector())
6651 return Op;
6652
6653 // For fixed-length vectors we lower to a custom "VL" node.
6654 unsigned RVVOpc = 0;
6655 switch (Op.getOpcode()) {
6656 default:
6657 llvm_unreachable("Impossible opcode");
6658 case ISD::FP_TO_SINT:
6660 break;
6661 case ISD::FP_TO_UINT:
6663 break;
6664 case ISD::SINT_TO_FP:
6665 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6666 break;
6667 case ISD::UINT_TO_FP:
6668 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6669 break;
6672 break;
6675 break;
6678 break;
6681 break;
6682 }
6683
6684 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6685 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6686 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6687 "Expected same element count");
6688
6689 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6690
6691 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6692 if (IsStrict) {
6693 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6694 Op.getOperand(0), Src, Mask, VL);
6695 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6696 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6697 }
6698 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6699 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6700 }
6703 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6704 case ISD::FP_TO_BF16: {
6705 // Custom lower to ensure the libcall return is passed in an FPR on hard
6706 // float ABIs.
6707 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6708 SDLoc DL(Op);
6709 MakeLibCallOptions CallOptions;
6710 RTLIB::Libcall LC =
6711 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6712 SDValue Res =
6713 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6714 if (Subtarget.is64Bit() && !RV64LegalI32)
6715 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6716 return DAG.getBitcast(MVT::i32, Res);
6717 }
6718 case ISD::BF16_TO_FP: {
6719 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6720 MVT VT = Op.getSimpleValueType();
6721 SDLoc DL(Op);
6722 Op = DAG.getNode(
6723 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6724 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6725 SDValue Res = Subtarget.is64Bit()
6726 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6727 : DAG.getBitcast(MVT::f32, Op);
6728 // fp_extend if the target VT is bigger than f32.
6729 if (VT != MVT::f32)
6730 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6731 return Res;
6732 }
6733 case ISD::FP_TO_FP16: {
6734 // Custom lower to ensure the libcall return is passed in an FPR on hard
6735 // float ABIs.
6736 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6737 SDLoc DL(Op);
6738 MakeLibCallOptions CallOptions;
6739 RTLIB::Libcall LC =
6740 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6741 SDValue Res =
6742 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6743 if (Subtarget.is64Bit() && !RV64LegalI32)
6744 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6745 return DAG.getBitcast(MVT::i32, Res);
6746 }
6747 case ISD::FP16_TO_FP: {
6748 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6749 // float ABIs.
6750 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6751 SDLoc DL(Op);
6752 MakeLibCallOptions CallOptions;
6753 SDValue Arg = Subtarget.is64Bit()
6754 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6755 Op.getOperand(0))
6756 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6757 SDValue Res =
6758 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6759 .first;
6760 return Res;
6761 }
6762 case ISD::FTRUNC:
6763 case ISD::FCEIL:
6764 case ISD::FFLOOR:
6765 case ISD::FNEARBYINT:
6766 case ISD::FRINT:
6767 case ISD::FROUND:
6768 case ISD::FROUNDEVEN:
6769 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6770 case ISD::LRINT:
6771 case ISD::LLRINT:
6772 return lowerVectorXRINT(Op, DAG, Subtarget);
6773 case ISD::VECREDUCE_ADD:
6778 return lowerVECREDUCE(Op, DAG);
6779 case ISD::VECREDUCE_AND:
6780 case ISD::VECREDUCE_OR:
6781 case ISD::VECREDUCE_XOR:
6782 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6783 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6784 return lowerVECREDUCE(Op, DAG);
6791 return lowerFPVECREDUCE(Op, DAG);
6792 case ISD::VP_REDUCE_ADD:
6793 case ISD::VP_REDUCE_UMAX:
6794 case ISD::VP_REDUCE_SMAX:
6795 case ISD::VP_REDUCE_UMIN:
6796 case ISD::VP_REDUCE_SMIN:
6797 case ISD::VP_REDUCE_FADD:
6798 case ISD::VP_REDUCE_SEQ_FADD:
6799 case ISD::VP_REDUCE_FMIN:
6800 case ISD::VP_REDUCE_FMAX:
6801 case ISD::VP_REDUCE_FMINIMUM:
6802 case ISD::VP_REDUCE_FMAXIMUM:
6803 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6804 (Subtarget.hasVInstructionsF16Minimal() &&
6805 !Subtarget.hasVInstructionsF16()))
6806 return SplitVectorReductionOp(Op, DAG);
6807 return lowerVPREDUCE(Op, DAG);
6808 case ISD::VP_REDUCE_AND:
6809 case ISD::VP_REDUCE_OR:
6810 case ISD::VP_REDUCE_XOR:
6811 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6812 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6813 return lowerVPREDUCE(Op, DAG);
6814 case ISD::VP_CTTZ_ELTS:
6815 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6816 return lowerVPCttzElements(Op, DAG);
6817 case ISD::UNDEF: {
6818 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6819 return convertFromScalableVector(Op.getSimpleValueType(),
6820 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6821 }
6823 return lowerINSERT_SUBVECTOR(Op, DAG);
6825 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6827 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6829 return lowerVECTOR_INTERLEAVE(Op, DAG);
6830 case ISD::STEP_VECTOR:
6831 return lowerSTEP_VECTOR(Op, DAG);
6833 return lowerVECTOR_REVERSE(Op, DAG);
6834 case ISD::VECTOR_SPLICE:
6835 return lowerVECTOR_SPLICE(Op, DAG);
6836 case ISD::BUILD_VECTOR:
6837 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6838 case ISD::SPLAT_VECTOR:
6839 if ((Op.getValueType().getScalarType() == MVT::f16 &&
6840 (Subtarget.hasVInstructionsF16Minimal() &&
6841 Subtarget.hasStdExtZfhminOrZhinxmin() &&
6842 !Subtarget.hasVInstructionsF16())) ||
6843 (Op.getValueType().getScalarType() == MVT::bf16 &&
6844 (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) {
6845 if (Op.getValueType() == MVT::nxv32f16 ||
6846 Op.getValueType() == MVT::nxv32bf16)
6847 return SplitVectorOp(Op, DAG);
6848 SDLoc DL(Op);
6849 SDValue NewScalar =
6850 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6851 SDValue NewSplat = DAG.getNode(
6853 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6854 NewScalar);
6855 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6856 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6857 }
6858 if (Op.getValueType().getVectorElementType() == MVT::i1)
6859 return lowerVectorMaskSplat(Op, DAG);
6860 return SDValue();
6862 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6863 case ISD::CONCAT_VECTORS: {
6864 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6865 // better than going through the stack, as the default expansion does.
6866 SDLoc DL(Op);
6867 MVT VT = Op.getSimpleValueType();
6868 MVT ContainerVT = VT;
6869 if (VT.isFixedLengthVector())
6870 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6871
6872 // Recursively split concat_vectors with more than 2 operands:
6873 //
6874 // concat_vector op1, op2, op3, op4
6875 // ->
6876 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6877 //
6878 // This reduces the length of the chain of vslideups and allows us to
6879 // perform the vslideups at a smaller LMUL, limited to MF2.
6880 if (Op.getNumOperands() > 2 &&
6881 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6882 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6884 size_t HalfNumOps = Op.getNumOperands() / 2;
6885 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6886 Op->ops().take_front(HalfNumOps));
6887 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6888 Op->ops().drop_front(HalfNumOps));
6889 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6890 }
6891
6892 unsigned NumOpElts =
6893 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6894 SDValue Vec = DAG.getUNDEF(VT);
6895 for (const auto &OpIdx : enumerate(Op->ops())) {
6896 SDValue SubVec = OpIdx.value();
6897 // Don't insert undef subvectors.
6898 if (SubVec.isUndef())
6899 continue;
6900 Vec =
6901 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6902 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6903 }
6904 return Vec;
6905 }
6906 case ISD::LOAD:
6907 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6908 return V;
6909 if (Op.getValueType().isFixedLengthVector())
6910 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6911 return Op;
6912 case ISD::STORE:
6913 if (auto V = expandUnalignedRVVStore(Op, DAG))
6914 return V;
6915 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6916 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6917 return Op;
6918 case ISD::MLOAD:
6919 case ISD::VP_LOAD:
6920 return lowerMaskedLoad(Op, DAG);
6921 case ISD::MSTORE:
6922 case ISD::VP_STORE:
6923 return lowerMaskedStore(Op, DAG);
6924 case ISD::SELECT_CC: {
6925 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6926 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6927 // into separate SETCC+SELECT just like LegalizeDAG.
6928 SDValue Tmp1 = Op.getOperand(0);
6929 SDValue Tmp2 = Op.getOperand(1);
6930 SDValue True = Op.getOperand(2);
6931 SDValue False = Op.getOperand(3);
6932 EVT VT = Op.getValueType();
6933 SDValue CC = Op.getOperand(4);
6934 EVT CmpVT = Tmp1.getValueType();
6935 EVT CCVT =
6936 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6937 SDLoc DL(Op);
6938 SDValue Cond =
6939 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6940 return DAG.getSelect(DL, VT, Cond, True, False);
6941 }
6942 case ISD::SETCC: {
6943 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6944 if (OpVT.isScalarInteger()) {
6945 MVT VT = Op.getSimpleValueType();
6946 SDValue LHS = Op.getOperand(0);
6947 SDValue RHS = Op.getOperand(1);
6948 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6949 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6950 "Unexpected CondCode");
6951
6952 SDLoc DL(Op);
6953
6954 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6955 // convert this to the equivalent of (set(u)ge X, C+1) by using
6956 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6957 // in a register.
6958 if (isa<ConstantSDNode>(RHS)) {
6959 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6960 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6961 // If this is an unsigned compare and the constant is -1, incrementing
6962 // the constant would change behavior. The result should be false.
6963 if (CCVal == ISD::SETUGT && Imm == -1)
6964 return DAG.getConstant(0, DL, VT);
6965 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6966 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6967 SDValue SetCC = DAG.getSetCC(
6968 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6969 return DAG.getLogicalNOT(DL, SetCC, VT);
6970 }
6971 }
6972
6973 // Not a constant we could handle, swap the operands and condition code to
6974 // SETLT/SETULT.
6975 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6976 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6977 }
6978
6979 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6980 (Subtarget.hasVInstructionsF16Minimal() &&
6981 !Subtarget.hasVInstructionsF16()))
6982 return SplitVectorOp(Op, DAG);
6983
6984 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6985 }
6986 case ISD::ADD:
6987 case ISD::SUB:
6988 case ISD::MUL:
6989 case ISD::MULHS:
6990 case ISD::MULHU:
6991 case ISD::AND:
6992 case ISD::OR:
6993 case ISD::XOR:
6994 case ISD::SDIV:
6995 case ISD::SREM:
6996 case ISD::UDIV:
6997 case ISD::UREM:
6998 case ISD::BSWAP:
6999 case ISD::CTPOP:
7000 return lowerToScalableOp(Op, DAG);
7001 case ISD::SHL:
7002 case ISD::SRA:
7003 case ISD::SRL:
7004 if (Op.getSimpleValueType().isFixedLengthVector())
7005 return lowerToScalableOp(Op, DAG);
7006 // This can be called for an i32 shift amount that needs to be promoted.
7007 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7008 "Unexpected custom legalisation");
7009 return SDValue();
7010 case ISD::FADD:
7011 case ISD::FSUB:
7012 case ISD::FMUL:
7013 case ISD::FDIV:
7014 case ISD::FNEG:
7015 case ISD::FABS:
7016 case ISD::FSQRT:
7017 case ISD::FMA:
7018 case ISD::FMINNUM:
7019 case ISD::FMAXNUM:
7020 if (Op.getValueType() == MVT::nxv32f16 &&
7021 (Subtarget.hasVInstructionsF16Minimal() &&
7022 !Subtarget.hasVInstructionsF16()))
7023 return SplitVectorOp(Op, DAG);
7024 [[fallthrough]];
7025 case ISD::AVGFLOORS:
7026 case ISD::AVGFLOORU:
7027 case ISD::AVGCEILS:
7028 case ISD::AVGCEILU:
7029 case ISD::SMIN:
7030 case ISD::SMAX:
7031 case ISD::UMIN:
7032 case ISD::UMAX:
7033 return lowerToScalableOp(Op, DAG);
7034 case ISD::UADDSAT:
7035 case ISD::USUBSAT:
7036 if (!Op.getValueType().isVector())
7037 return lowerUADDSAT_USUBSAT(Op, DAG);
7038 return lowerToScalableOp(Op, DAG);
7039 case ISD::SADDSAT:
7040 case ISD::SSUBSAT:
7041 if (!Op.getValueType().isVector())
7042 return lowerSADDSAT_SSUBSAT(Op, DAG);
7043 return lowerToScalableOp(Op, DAG);
7044 case ISD::ABDS:
7045 case ISD::ABDU: {
7046 SDLoc dl(Op);
7047 EVT VT = Op->getValueType(0);
7048 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7049 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7050 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7051
7052 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7053 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7054 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7055 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7056 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7057 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7058 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7059 }
7060 case ISD::ABS:
7061 case ISD::VP_ABS:
7062 return lowerABS(Op, DAG);
7063 case ISD::CTLZ:
7065 case ISD::CTTZ:
7067 if (Subtarget.hasStdExtZvbb())
7068 return lowerToScalableOp(Op, DAG);
7069 assert(Op.getOpcode() != ISD::CTTZ);
7070 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7071 case ISD::VSELECT:
7072 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7073 case ISD::FCOPYSIGN:
7074 if (Op.getValueType() == MVT::nxv32f16 &&
7075 (Subtarget.hasVInstructionsF16Minimal() &&
7076 !Subtarget.hasVInstructionsF16()))
7077 return SplitVectorOp(Op, DAG);
7078 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7079 case ISD::STRICT_FADD:
7080 case ISD::STRICT_FSUB:
7081 case ISD::STRICT_FMUL:
7082 case ISD::STRICT_FDIV:
7083 case ISD::STRICT_FSQRT:
7084 case ISD::STRICT_FMA:
7085 if (Op.getValueType() == MVT::nxv32f16 &&
7086 (Subtarget.hasVInstructionsF16Minimal() &&
7087 !Subtarget.hasVInstructionsF16()))
7088 return SplitStrictFPVectorOp(Op, DAG);
7089 return lowerToScalableOp(Op, DAG);
7090 case ISD::STRICT_FSETCC:
7092 return lowerVectorStrictFSetcc(Op, DAG);
7093 case ISD::STRICT_FCEIL:
7094 case ISD::STRICT_FRINT:
7095 case ISD::STRICT_FFLOOR:
7096 case ISD::STRICT_FTRUNC:
7098 case ISD::STRICT_FROUND:
7100 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7101 case ISD::MGATHER:
7102 case ISD::VP_GATHER:
7103 return lowerMaskedGather(Op, DAG);
7104 case ISD::MSCATTER:
7105 case ISD::VP_SCATTER:
7106 return lowerMaskedScatter(Op, DAG);
7107 case ISD::GET_ROUNDING:
7108 return lowerGET_ROUNDING(Op, DAG);
7109 case ISD::SET_ROUNDING:
7110 return lowerSET_ROUNDING(Op, DAG);
7111 case ISD::EH_DWARF_CFA:
7112 return lowerEH_DWARF_CFA(Op, DAG);
7113 case ISD::VP_SELECT:
7114 case ISD::VP_MERGE:
7115 case ISD::VP_ADD:
7116 case ISD::VP_SUB:
7117 case ISD::VP_MUL:
7118 case ISD::VP_SDIV:
7119 case ISD::VP_UDIV:
7120 case ISD::VP_SREM:
7121 case ISD::VP_UREM:
7122 case ISD::VP_UADDSAT:
7123 case ISD::VP_USUBSAT:
7124 case ISD::VP_SADDSAT:
7125 case ISD::VP_SSUBSAT:
7126 case ISD::VP_LRINT:
7127 case ISD::VP_LLRINT:
7128 return lowerVPOp(Op, DAG);
7129 case ISD::VP_AND:
7130 case ISD::VP_OR:
7131 case ISD::VP_XOR:
7132 return lowerLogicVPOp(Op, DAG);
7133 case ISD::VP_FADD:
7134 case ISD::VP_FSUB:
7135 case ISD::VP_FMUL:
7136 case ISD::VP_FDIV:
7137 case ISD::VP_FNEG:
7138 case ISD::VP_FABS:
7139 case ISD::VP_SQRT:
7140 case ISD::VP_FMA:
7141 case ISD::VP_FMINNUM:
7142 case ISD::VP_FMAXNUM:
7143 case ISD::VP_FCOPYSIGN:
7144 if (Op.getValueType() == MVT::nxv32f16 &&
7145 (Subtarget.hasVInstructionsF16Minimal() &&
7146 !Subtarget.hasVInstructionsF16()))
7147 return SplitVPOp(Op, DAG);
7148 [[fallthrough]];
7149 case ISD::VP_SRA:
7150 case ISD::VP_SRL:
7151 case ISD::VP_SHL:
7152 return lowerVPOp(Op, DAG);
7153 case ISD::VP_IS_FPCLASS:
7154 return LowerIS_FPCLASS(Op, DAG);
7155 case ISD::VP_SIGN_EXTEND:
7156 case ISD::VP_ZERO_EXTEND:
7157 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7158 return lowerVPExtMaskOp(Op, DAG);
7159 return lowerVPOp(Op, DAG);
7160 case ISD::VP_TRUNCATE:
7161 return lowerVectorTruncLike(Op, DAG);
7162 case ISD::VP_FP_EXTEND:
7163 case ISD::VP_FP_ROUND:
7164 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7165 case ISD::VP_SINT_TO_FP:
7166 case ISD::VP_UINT_TO_FP:
7167 if (Op.getValueType().isVector() &&
7168 Op.getValueType().getScalarType() == MVT::f16 &&
7169 (Subtarget.hasVInstructionsF16Minimal() &&
7170 !Subtarget.hasVInstructionsF16())) {
7171 if (Op.getValueType() == MVT::nxv32f16)
7172 return SplitVPOp(Op, DAG);
7173 // int -> f32
7174 SDLoc DL(Op);
7175 MVT NVT =
7176 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7177 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7178 // f32 -> f16
7179 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7180 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7181 }
7182 [[fallthrough]];
7183 case ISD::VP_FP_TO_SINT:
7184 case ISD::VP_FP_TO_UINT:
7185 if (SDValue Op1 = Op.getOperand(0);
7186 Op1.getValueType().isVector() &&
7187 Op1.getValueType().getScalarType() == MVT::f16 &&
7188 (Subtarget.hasVInstructionsF16Minimal() &&
7189 !Subtarget.hasVInstructionsF16())) {
7190 if (Op1.getValueType() == MVT::nxv32f16)
7191 return SplitVPOp(Op, DAG);
7192 // f16 -> f32
7193 SDLoc DL(Op);
7194 MVT NVT = MVT::getVectorVT(MVT::f32,
7195 Op1.getValueType().getVectorElementCount());
7196 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7197 // f32 -> int
7198 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7199 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7200 }
7201 return lowerVPFPIntConvOp(Op, DAG);
7202 case ISD::VP_SETCC:
7203 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7204 (Subtarget.hasVInstructionsF16Minimal() &&
7205 !Subtarget.hasVInstructionsF16()))
7206 return SplitVPOp(Op, DAG);
7207 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7208 return lowerVPSetCCMaskOp(Op, DAG);
7209 [[fallthrough]];
7210 case ISD::VP_SMIN:
7211 case ISD::VP_SMAX:
7212 case ISD::VP_UMIN:
7213 case ISD::VP_UMAX:
7214 case ISD::VP_BITREVERSE:
7215 case ISD::VP_BSWAP:
7216 return lowerVPOp(Op, DAG);
7217 case ISD::VP_CTLZ:
7218 case ISD::VP_CTLZ_ZERO_UNDEF:
7219 if (Subtarget.hasStdExtZvbb())
7220 return lowerVPOp(Op, DAG);
7221 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7222 case ISD::VP_CTTZ:
7223 case ISD::VP_CTTZ_ZERO_UNDEF:
7224 if (Subtarget.hasStdExtZvbb())
7225 return lowerVPOp(Op, DAG);
7226 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7227 case ISD::VP_CTPOP:
7228 return lowerVPOp(Op, DAG);
7229 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7230 return lowerVPStridedLoad(Op, DAG);
7231 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7232 return lowerVPStridedStore(Op, DAG);
7233 case ISD::VP_FCEIL:
7234 case ISD::VP_FFLOOR:
7235 case ISD::VP_FRINT:
7236 case ISD::VP_FNEARBYINT:
7237 case ISD::VP_FROUND:
7238 case ISD::VP_FROUNDEVEN:
7239 case ISD::VP_FROUNDTOZERO:
7240 if (Op.getValueType() == MVT::nxv32f16 &&
7241 (Subtarget.hasVInstructionsF16Minimal() &&
7242 !Subtarget.hasVInstructionsF16()))
7243 return SplitVPOp(Op, DAG);
7244 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7245 case ISD::VP_FMAXIMUM:
7246 case ISD::VP_FMINIMUM:
7247 if (Op.getValueType() == MVT::nxv32f16 &&
7248 (Subtarget.hasVInstructionsF16Minimal() &&
7249 !Subtarget.hasVInstructionsF16()))
7250 return SplitVPOp(Op, DAG);
7251 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7252 case ISD::EXPERIMENTAL_VP_SPLICE:
7253 return lowerVPSpliceExperimental(Op, DAG);
7254 case ISD::EXPERIMENTAL_VP_REVERSE:
7255 return lowerVPReverseExperimental(Op, DAG);
7256 case ISD::EXPERIMENTAL_VP_SPLAT:
7257 return lowerVPSplatExperimental(Op, DAG);
7258 case ISD::CLEAR_CACHE: {
7259 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7260 "llvm.clear_cache only needs custom lower on Linux targets");
7261 SDLoc DL(Op);
7262 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7263 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7264 Op.getOperand(2), Flags, DL);
7265 }
7266 }
7267}
7268
7269SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7270 SDValue Start, SDValue End,
7271 SDValue Flags, SDLoc DL) const {
7272 MakeLibCallOptions CallOptions;
7273 std::pair<SDValue, SDValue> CallResult =
7274 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7275 {Start, End, Flags}, CallOptions, DL, InChain);
7276
7277 // This function returns void so only the out chain matters.
7278 return CallResult.second;
7279}
7280
7282 SelectionDAG &DAG, unsigned Flags) {
7283 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7284}
7285
7287 SelectionDAG &DAG, unsigned Flags) {
7288 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7289 Flags);
7290}
7291
7293 SelectionDAG &DAG, unsigned Flags) {
7294 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7295 N->getOffset(), Flags);
7296}
7297
7299 SelectionDAG &DAG, unsigned Flags) {
7300 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7301}
7302
7303template <class NodeTy>
7304SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7305 bool IsLocal, bool IsExternWeak) const {
7306 SDLoc DL(N);
7307 EVT Ty = getPointerTy(DAG.getDataLayout());
7308
7309 // When HWASAN is used and tagging of global variables is enabled
7310 // they should be accessed via the GOT, since the tagged address of a global
7311 // is incompatible with existing code models. This also applies to non-pic
7312 // mode.
7313 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7314 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7315 if (IsLocal && !Subtarget.allowTaggedGlobals())
7316 // Use PC-relative addressing to access the symbol. This generates the
7317 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7318 // %pcrel_lo(auipc)).
7319 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7320
7321 // Use PC-relative addressing to access the GOT for this symbol, then load
7322 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7323 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7324 SDValue Load =
7325 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7331 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7332 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7333 return Load;
7334 }
7335
7336 switch (getTargetMachine().getCodeModel()) {
7337 default:
7338 report_fatal_error("Unsupported code model for lowering");
7339 case CodeModel::Small: {
7340 // Generate a sequence for accessing addresses within the first 2 GiB of
7341 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7342 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7343 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7344 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7345 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7346 }
7347 case CodeModel::Medium: {
7348 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7349 if (IsExternWeak) {
7350 // An extern weak symbol may be undefined, i.e. have value 0, which may
7351 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7352 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7353 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7354 SDValue Load =
7355 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7361 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7362 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7363 return Load;
7364 }
7365
7366 // Generate a sequence for accessing addresses within any 2GiB range within
7367 // the address space. This generates the pattern (PseudoLLA sym), which
7368 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7369 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7370 }
7371 }
7372}
7373
7374SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7375 SelectionDAG &DAG) const {
7376 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7377 assert(N->getOffset() == 0 && "unexpected offset in global node");
7378 const GlobalValue *GV = N->getGlobal();
7379 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7380}
7381
7382SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7383 SelectionDAG &DAG) const {
7384 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7385
7386 return getAddr(N, DAG);
7387}
7388
7389SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7390 SelectionDAG &DAG) const {
7391 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7392
7393 return getAddr(N, DAG);
7394}
7395
7396SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7397 SelectionDAG &DAG) const {
7398 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7399
7400 return getAddr(N, DAG);
7401}
7402
7403SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7404 SelectionDAG &DAG,
7405 bool UseGOT) const {
7406 SDLoc DL(N);
7407 EVT Ty = getPointerTy(DAG.getDataLayout());
7408 const GlobalValue *GV = N->getGlobal();
7409 MVT XLenVT = Subtarget.getXLenVT();
7410
7411 if (UseGOT) {
7412 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7413 // load the address from the GOT and add the thread pointer. This generates
7414 // the pattern (PseudoLA_TLS_IE sym), which expands to
7415 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7416 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7417 SDValue Load =
7418 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7424 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7425 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7426
7427 // Add the thread pointer.
7428 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7429 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7430 }
7431
7432 // Generate a sequence for accessing the address relative to the thread
7433 // pointer, with the appropriate adjustment for the thread pointer offset.
7434 // This generates the pattern
7435 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7436 SDValue AddrHi =
7438 SDValue AddrAdd =
7440 SDValue AddrLo =
7442
7443 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7444 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7445 SDValue MNAdd =
7446 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7447 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7448}
7449
7450SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7451 SelectionDAG &DAG) const {
7452 SDLoc DL(N);
7453 EVT Ty = getPointerTy(DAG.getDataLayout());
7454 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7455 const GlobalValue *GV = N->getGlobal();
7456
7457 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7458 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7459 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7460 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7461 SDValue Load =
7462 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7463
7464 // Prepare argument list to generate call.
7466 ArgListEntry Entry;
7467 Entry.Node = Load;
7468 Entry.Ty = CallTy;
7469 Args.push_back(Entry);
7470
7471 // Setup call to __tls_get_addr.
7473 CLI.setDebugLoc(DL)
7474 .setChain(DAG.getEntryNode())
7475 .setLibCallee(CallingConv::C, CallTy,
7476 DAG.getExternalSymbol("__tls_get_addr", Ty),
7477 std::move(Args));
7478
7479 return LowerCallTo(CLI).first;
7480}
7481
7482SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7483 SelectionDAG &DAG) const {
7484 SDLoc DL(N);
7485 EVT Ty = getPointerTy(DAG.getDataLayout());
7486 const GlobalValue *GV = N->getGlobal();
7487
7488 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7489 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7490 //
7491 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7492 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7493 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7494 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7495 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7496 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7497}
7498
7499SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7500 SelectionDAG &DAG) const {
7501 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7502 assert(N->getOffset() == 0 && "unexpected offset in global node");
7503
7504 if (DAG.getTarget().useEmulatedTLS())
7505 return LowerToTLSEmulatedModel(N, DAG);
7506
7508
7511 report_fatal_error("In GHC calling convention TLS is not supported");
7512
7513 SDValue Addr;
7514 switch (Model) {
7516 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7517 break;
7519 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7520 break;
7523 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7524 : getDynamicTLSAddr(N, DAG);
7525 break;
7526 }
7527
7528 return Addr;
7529}
7530
7531// Return true if Val is equal to (setcc LHS, RHS, CC).
7532// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7533// Otherwise, return std::nullopt.
7534static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7535 ISD::CondCode CC, SDValue Val) {
7536 assert(Val->getOpcode() == ISD::SETCC);
7537 SDValue LHS2 = Val.getOperand(0);
7538 SDValue RHS2 = Val.getOperand(1);
7539 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7540
7541 if (LHS == LHS2 && RHS == RHS2) {
7542 if (CC == CC2)
7543 return true;
7544 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7545 return false;
7546 } else if (LHS == RHS2 && RHS == LHS2) {
7548 if (CC == CC2)
7549 return true;
7550 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7551 return false;
7552 }
7553
7554 return std::nullopt;
7555}
7556
7558 const RISCVSubtarget &Subtarget) {
7559 SDValue CondV = N->getOperand(0);
7560 SDValue TrueV = N->getOperand(1);
7561 SDValue FalseV = N->getOperand(2);
7562 MVT VT = N->getSimpleValueType(0);
7563 SDLoc DL(N);
7564
7565 if (!Subtarget.hasConditionalMoveFusion()) {
7566 // (select c, -1, y) -> -c | y
7567 if (isAllOnesConstant(TrueV)) {
7568 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7569 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7570 }
7571 // (select c, y, -1) -> (c-1) | y
7572 if (isAllOnesConstant(FalseV)) {
7573 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7574 DAG.getAllOnesConstant(DL, VT));
7575 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7576 }
7577
7578 // (select c, 0, y) -> (c-1) & y
7579 if (isNullConstant(TrueV)) {
7580 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7581 DAG.getAllOnesConstant(DL, VT));
7582 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7583 }
7584 // (select c, y, 0) -> -c & y
7585 if (isNullConstant(FalseV)) {
7586 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7587 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7588 }
7589 }
7590
7591 // select c, ~x, x --> xor -c, x
7592 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7593 const APInt &TrueVal = TrueV->getAsAPIntVal();
7594 const APInt &FalseVal = FalseV->getAsAPIntVal();
7595 if (~TrueVal == FalseVal) {
7596 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7597 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7598 }
7599 }
7600
7601 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7602 // when both truev and falsev are also setcc.
7603 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7604 FalseV.getOpcode() == ISD::SETCC) {
7605 SDValue LHS = CondV.getOperand(0);
7606 SDValue RHS = CondV.getOperand(1);
7607 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7608
7609 // (select x, x, y) -> x | y
7610 // (select !x, x, y) -> x & y
7611 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7612 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7613 DAG.getFreeze(FalseV));
7614 }
7615 // (select x, y, x) -> x & y
7616 // (select !x, y, x) -> x | y
7617 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7618 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7619 DAG.getFreeze(TrueV), FalseV);
7620 }
7621 }
7622
7623 return SDValue();
7624}
7625
7626// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7627// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7628// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7629// being `0` or `-1`. In such cases we can replace `select` with `and`.
7630// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7631// than `c0`?
7632static SDValue
7634 const RISCVSubtarget &Subtarget) {
7635 if (Subtarget.hasShortForwardBranchOpt())
7636 return SDValue();
7637
7638 unsigned SelOpNo = 0;
7639 SDValue Sel = BO->getOperand(0);
7640 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7641 SelOpNo = 1;
7642 Sel = BO->getOperand(1);
7643 }
7644
7645 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7646 return SDValue();
7647
7648 unsigned ConstSelOpNo = 1;
7649 unsigned OtherSelOpNo = 2;
7650 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7651 ConstSelOpNo = 2;
7652 OtherSelOpNo = 1;
7653 }
7654 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7655 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7656 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7657 return SDValue();
7658
7659 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7660 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7661 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7662 return SDValue();
7663
7664 SDLoc DL(Sel);
7665 EVT VT = BO->getValueType(0);
7666
7667 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7668 if (SelOpNo == 1)
7669 std::swap(NewConstOps[0], NewConstOps[1]);
7670
7671 SDValue NewConstOp =
7672 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7673 if (!NewConstOp)
7674 return SDValue();
7675
7676 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7677 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7678 return SDValue();
7679
7680 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7681 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7682 if (SelOpNo == 1)
7683 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7684 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7685
7686 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7687 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7688 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7689}
7690
7691SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7692 SDValue CondV = Op.getOperand(0);
7693 SDValue TrueV = Op.getOperand(1);
7694 SDValue FalseV = Op.getOperand(2);
7695 SDLoc DL(Op);
7696 MVT VT = Op.getSimpleValueType();
7697 MVT XLenVT = Subtarget.getXLenVT();
7698
7699 // Lower vector SELECTs to VSELECTs by splatting the condition.
7700 if (VT.isVector()) {
7701 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7702 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7703 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7704 }
7705
7706 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7707 // nodes to implement the SELECT. Performing the lowering here allows for
7708 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7709 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7710 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7711 VT.isScalarInteger()) {
7712 // (select c, t, 0) -> (czero_eqz t, c)
7713 if (isNullConstant(FalseV))
7714 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7715 // (select c, 0, f) -> (czero_nez f, c)
7716 if (isNullConstant(TrueV))
7717 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7718
7719 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7720 if (TrueV.getOpcode() == ISD::AND &&
7721 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7722 return DAG.getNode(
7723 ISD::OR, DL, VT, TrueV,
7724 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7725 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7726 if (FalseV.getOpcode() == ISD::AND &&
7727 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7728 return DAG.getNode(
7729 ISD::OR, DL, VT, FalseV,
7730 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7731
7732 // Try some other optimizations before falling back to generic lowering.
7733 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7734 return V;
7735
7736 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7737 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7738 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7739 const APInt &TrueVal = TrueV->getAsAPIntVal();
7740 const APInt &FalseVal = FalseV->getAsAPIntVal();
7741 const int TrueValCost = RISCVMatInt::getIntMatCost(
7742 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7743 const int FalseValCost = RISCVMatInt::getIntMatCost(
7744 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7745 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7746 SDValue LHSVal = DAG.getConstant(
7747 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7748 SDValue RHSVal =
7749 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7750 SDValue CMOV =
7752 DL, VT, LHSVal, CondV);
7753 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7754 }
7755
7756 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7757 // Unless we have the short forward branch optimization.
7758 if (!Subtarget.hasConditionalMoveFusion())
7759 return DAG.getNode(
7760 ISD::OR, DL, VT,
7761 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7762 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7763 }
7764
7765 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7766 return V;
7767
7768 if (Op.hasOneUse()) {
7769 unsigned UseOpc = Op->use_begin()->getOpcode();
7770 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7771 SDNode *BinOp = *Op->use_begin();
7772 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7773 DAG, Subtarget)) {
7774 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7775 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
7776 // may return a constant node and cause crash in lowerSELECT.
7777 if (NewSel.getOpcode() == ISD::SELECT)
7778 return lowerSELECT(NewSel, DAG);
7779 return NewSel;
7780 }
7781 }
7782 }
7783
7784 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7785 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7786 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7787 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7788 if (FPTV && FPFV) {
7789 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7790 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7791 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7792 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7793 DAG.getConstant(1, DL, XLenVT));
7794 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7795 }
7796 }
7797
7798 // If the condition is not an integer SETCC which operates on XLenVT, we need
7799 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7800 // (select condv, truev, falsev)
7801 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7802 if (CondV.getOpcode() != ISD::SETCC ||
7803 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7804 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7805 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7806
7807 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7808
7809 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7810 }
7811
7812 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7813 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7814 // advantage of the integer compare+branch instructions. i.e.:
7815 // (select (setcc lhs, rhs, cc), truev, falsev)
7816 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7817 SDValue LHS = CondV.getOperand(0);
7818 SDValue RHS = CondV.getOperand(1);
7819 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7820
7821 // Special case for a select of 2 constants that have a diffence of 1.
7822 // Normally this is done by DAGCombine, but if the select is introduced by
7823 // type legalization or op legalization, we miss it. Restricting to SETLT
7824 // case for now because that is what signed saturating add/sub need.
7825 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7826 // but we would probably want to swap the true/false values if the condition
7827 // is SETGE/SETLE to avoid an XORI.
7828 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7829 CCVal == ISD::SETLT) {
7830 const APInt &TrueVal = TrueV->getAsAPIntVal();
7831 const APInt &FalseVal = FalseV->getAsAPIntVal();
7832 if (TrueVal - 1 == FalseVal)
7833 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7834 if (TrueVal + 1 == FalseVal)
7835 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7836 }
7837
7838 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7839 // 1 < x ? x : 1 -> 0 < x ? x : 1
7840 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7841 RHS == TrueV && LHS == FalseV) {
7842 LHS = DAG.getConstant(0, DL, VT);
7843 // 0 <u x is the same as x != 0.
7844 if (CCVal == ISD::SETULT) {
7845 std::swap(LHS, RHS);
7846 CCVal = ISD::SETNE;
7847 }
7848 }
7849
7850 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7851 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7852 RHS == FalseV) {
7853 RHS = DAG.getConstant(0, DL, VT);
7854 }
7855
7856 SDValue TargetCC = DAG.getCondCode(CCVal);
7857
7858 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7859 // (select (setcc lhs, rhs, CC), constant, falsev)
7860 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7861 std::swap(TrueV, FalseV);
7862 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7863 }
7864
7865 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7866 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7867}
7868
7869SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7870 SDValue CondV = Op.getOperand(1);
7871 SDLoc DL(Op);
7872 MVT XLenVT = Subtarget.getXLenVT();
7873
7874 if (CondV.getOpcode() == ISD::SETCC &&
7875 CondV.getOperand(0).getValueType() == XLenVT) {
7876 SDValue LHS = CondV.getOperand(0);
7877 SDValue RHS = CondV.getOperand(1);
7878 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7879
7880 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7881
7882 SDValue TargetCC = DAG.getCondCode(CCVal);
7883 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7884 LHS, RHS, TargetCC, Op.getOperand(2));
7885 }
7886
7887 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7888 CondV, DAG.getConstant(0, DL, XLenVT),
7889 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7890}
7891
7892SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7895
7896 SDLoc DL(Op);
7897 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7899
7900 // vastart just stores the address of the VarArgsFrameIndex slot into the
7901 // memory location argument.
7902 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7903 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7904 MachinePointerInfo(SV));
7905}
7906
7907SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7908 SelectionDAG &DAG) const {
7909 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7911 MachineFrameInfo &MFI = MF.getFrameInfo();
7912 MFI.setFrameAddressIsTaken(true);
7913 Register FrameReg = RI.getFrameRegister(MF);
7914 int XLenInBytes = Subtarget.getXLen() / 8;
7915
7916 EVT VT = Op.getValueType();
7917 SDLoc DL(Op);
7918 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7919 unsigned Depth = Op.getConstantOperandVal(0);
7920 while (Depth--) {
7921 int Offset = -(XLenInBytes * 2);
7922 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7924 FrameAddr =
7925 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7926 }
7927 return FrameAddr;
7928}
7929
7930SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7931 SelectionDAG &DAG) const {
7932 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7934 MachineFrameInfo &MFI = MF.getFrameInfo();
7935 MFI.setReturnAddressIsTaken(true);
7936 MVT XLenVT = Subtarget.getXLenVT();
7937 int XLenInBytes = Subtarget.getXLen() / 8;
7938
7940 return SDValue();
7941
7942 EVT VT = Op.getValueType();
7943 SDLoc DL(Op);
7944 unsigned Depth = Op.getConstantOperandVal(0);
7945 if (Depth) {
7946 int Off = -XLenInBytes;
7947 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7948 SDValue Offset = DAG.getConstant(Off, DL, VT);
7949 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7950 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7952 }
7953
7954 // Return the value of the return address register, marking it an implicit
7955 // live-in.
7956 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7957 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7958}
7959
7960SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7961 SelectionDAG &DAG) const {
7962 SDLoc DL(Op);
7963 SDValue Lo = Op.getOperand(0);
7964 SDValue Hi = Op.getOperand(1);
7965 SDValue Shamt = Op.getOperand(2);
7966 EVT VT = Lo.getValueType();
7967
7968 // if Shamt-XLEN < 0: // Shamt < XLEN
7969 // Lo = Lo << Shamt
7970 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7971 // else:
7972 // Lo = 0
7973 // Hi = Lo << (Shamt-XLEN)
7974
7975 SDValue Zero = DAG.getConstant(0, DL, VT);
7976 SDValue One = DAG.getConstant(1, DL, VT);
7977 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7978 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7979 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7980 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7981
7982 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7983 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7984 SDValue ShiftRightLo =
7985 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7986 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7987 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7988 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7989
7990 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7991
7992 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7993 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7994
7995 SDValue Parts[2] = {Lo, Hi};
7996 return DAG.getMergeValues(Parts, DL);
7997}
7998
7999SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8000 bool IsSRA) const {
8001 SDLoc DL(Op);
8002 SDValue Lo = Op.getOperand(0);
8003 SDValue Hi = Op.getOperand(1);
8004 SDValue Shamt = Op.getOperand(2);
8005 EVT VT = Lo.getValueType();
8006
8007 // SRA expansion:
8008 // if Shamt-XLEN < 0: // Shamt < XLEN
8009 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8010 // Hi = Hi >>s Shamt
8011 // else:
8012 // Lo = Hi >>s (Shamt-XLEN);
8013 // Hi = Hi >>s (XLEN-1)
8014 //
8015 // SRL expansion:
8016 // if Shamt-XLEN < 0: // Shamt < XLEN
8017 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8018 // Hi = Hi >>u Shamt
8019 // else:
8020 // Lo = Hi >>u (Shamt-XLEN);
8021 // Hi = 0;
8022
8023 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8024
8025 SDValue Zero = DAG.getConstant(0, DL, VT);
8026 SDValue One = DAG.getConstant(1, DL, VT);
8027 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
8028 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8029 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8030 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8031
8032 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8033 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8034 SDValue ShiftLeftHi =
8035 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8036 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8037 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8038 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8039 SDValue HiFalse =
8040 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8041
8042 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8043
8044 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8045 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8046
8047 SDValue Parts[2] = {Lo, Hi};
8048 return DAG.getMergeValues(Parts, DL);
8049}
8050
8051// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8052// legal equivalently-sized i8 type, so we can use that as a go-between.
8053SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8054 SelectionDAG &DAG) const {
8055 SDLoc DL(Op);
8056 MVT VT = Op.getSimpleValueType();
8057 SDValue SplatVal = Op.getOperand(0);
8058 // All-zeros or all-ones splats are handled specially.
8059 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8060 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8061 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8062 }
8063 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8064 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8065 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8066 }
8067 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8068 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8069 DAG.getConstant(1, DL, SplatVal.getValueType()));
8070 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8071 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8072 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8073}
8074
8075// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8076// illegal (currently only vXi64 RV32).
8077// FIXME: We could also catch non-constant sign-extended i32 values and lower
8078// them to VMV_V_X_VL.
8079SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8080 SelectionDAG &DAG) const {
8081 SDLoc DL(Op);
8082 MVT VecVT = Op.getSimpleValueType();
8083 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8084 "Unexpected SPLAT_VECTOR_PARTS lowering");
8085
8086 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8087 SDValue Lo = Op.getOperand(0);
8088 SDValue Hi = Op.getOperand(1);
8089
8090 MVT ContainerVT = VecVT;
8091 if (VecVT.isFixedLengthVector())
8092 ContainerVT = getContainerForFixedLengthVector(VecVT);
8093
8094 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8095
8096 SDValue Res =
8097 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8098
8099 if (VecVT.isFixedLengthVector())
8100 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8101
8102 return Res;
8103}
8104
8105// Custom-lower extensions from mask vectors by using a vselect either with 1
8106// for zero/any-extension or -1 for sign-extension:
8107// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8108// Note that any-extension is lowered identically to zero-extension.
8109SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8110 int64_t ExtTrueVal) const {
8111 SDLoc DL(Op);
8112 MVT VecVT = Op.getSimpleValueType();
8113 SDValue Src = Op.getOperand(0);
8114 // Only custom-lower extensions from mask types
8115 assert(Src.getValueType().isVector() &&
8116 Src.getValueType().getVectorElementType() == MVT::i1);
8117
8118 if (VecVT.isScalableVector()) {
8119 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8120 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
8121 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8122 }
8123
8124 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8125 MVT I1ContainerVT =
8126 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8127
8128 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8129
8130 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8131
8132 MVT XLenVT = Subtarget.getXLenVT();
8133 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8134 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
8135
8136 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8137 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8138 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8139 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8140 SDValue Select =
8141 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8142 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8143
8144 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8145}
8146
8147SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8148 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8149 MVT ExtVT = Op.getSimpleValueType();
8150 // Only custom-lower extensions from fixed-length vector types.
8151 if (!ExtVT.isFixedLengthVector())
8152 return Op;
8153 MVT VT = Op.getOperand(0).getSimpleValueType();
8154 // Grab the canonical container type for the extended type. Infer the smaller
8155 // type from that to ensure the same number of vector elements, as we know
8156 // the LMUL will be sufficient to hold the smaller type.
8157 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8158 // Get the extended container type manually to ensure the same number of
8159 // vector elements between source and dest.
8160 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8161 ContainerExtVT.getVectorElementCount());
8162
8163 SDValue Op1 =
8164 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8165
8166 SDLoc DL(Op);
8167 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8168
8169 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8170
8171 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8172}
8173
8174// Custom-lower truncations from vectors to mask vectors by using a mask and a
8175// setcc operation:
8176// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8177SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8178 SelectionDAG &DAG) const {
8179 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8180 SDLoc DL(Op);
8181 EVT MaskVT = Op.getValueType();
8182 // Only expect to custom-lower truncations to mask types
8183 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8184 "Unexpected type for vector mask lowering");
8185 SDValue Src = Op.getOperand(0);
8186 MVT VecVT = Src.getSimpleValueType();
8187 SDValue Mask, VL;
8188 if (IsVPTrunc) {
8189 Mask = Op.getOperand(1);
8190 VL = Op.getOperand(2);
8191 }
8192 // If this is a fixed vector, we need to convert it to a scalable vector.
8193 MVT ContainerVT = VecVT;
8194
8195 if (VecVT.isFixedLengthVector()) {
8196 ContainerVT = getContainerForFixedLengthVector(VecVT);
8197 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8198 if (IsVPTrunc) {
8199 MVT MaskContainerVT =
8200 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8201 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8202 }
8203 }
8204
8205 if (!IsVPTrunc) {
8206 std::tie(Mask, VL) =
8207 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8208 }
8209
8210 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8211 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8212
8213 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8214 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8215 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8216 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8217
8218 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8219 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8220 DAG.getUNDEF(ContainerVT), Mask, VL);
8221 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8222 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8223 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8224 if (MaskVT.isFixedLengthVector())
8225 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8226 return Trunc;
8227}
8228
8229SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8230 SelectionDAG &DAG) const {
8231 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8232 SDLoc DL(Op);
8233
8234 MVT VT = Op.getSimpleValueType();
8235 // Only custom-lower vector truncates
8236 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8237
8238 // Truncates to mask types are handled differently
8239 if (VT.getVectorElementType() == MVT::i1)
8240 return lowerVectorMaskTruncLike(Op, DAG);
8241
8242 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8243 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8244 // truncate by one power of two at a time.
8245 MVT DstEltVT = VT.getVectorElementType();
8246
8247 SDValue Src = Op.getOperand(0);
8248 MVT SrcVT = Src.getSimpleValueType();
8249 MVT SrcEltVT = SrcVT.getVectorElementType();
8250
8251 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8252 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8253 "Unexpected vector truncate lowering");
8254
8255 MVT ContainerVT = SrcVT;
8256 SDValue Mask, VL;
8257 if (IsVPTrunc) {
8258 Mask = Op.getOperand(1);
8259 VL = Op.getOperand(2);
8260 }
8261 if (SrcVT.isFixedLengthVector()) {
8262 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8263 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8264 if (IsVPTrunc) {
8265 MVT MaskVT = getMaskTypeFor(ContainerVT);
8266 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8267 }
8268 }
8269
8270 SDValue Result = Src;
8271 if (!IsVPTrunc) {
8272 std::tie(Mask, VL) =
8273 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8274 }
8275
8276 LLVMContext &Context = *DAG.getContext();
8277 const ElementCount Count = ContainerVT.getVectorElementCount();
8278 do {
8279 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8280 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8281 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8282 Mask, VL);
8283 } while (SrcEltVT != DstEltVT);
8284
8285 if (SrcVT.isFixedLengthVector())
8286 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8287
8288 return Result;
8289}
8290
8291SDValue
8292RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8293 SelectionDAG &DAG) const {
8294 SDLoc DL(Op);
8295 SDValue Chain = Op.getOperand(0);
8296 SDValue Src = Op.getOperand(1);
8297 MVT VT = Op.getSimpleValueType();
8298 MVT SrcVT = Src.getSimpleValueType();
8299 MVT ContainerVT = VT;
8300 if (VT.isFixedLengthVector()) {
8301 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8302 ContainerVT =
8303 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8304 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8305 }
8306
8307 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8308
8309 // RVV can only widen/truncate fp to types double/half the size as the source.
8310 if ((VT.getVectorElementType() == MVT::f64 &&
8311 (SrcVT.getVectorElementType() == MVT::f16 ||
8312 SrcVT.getVectorElementType() == MVT::bf16)) ||
8313 ((VT.getVectorElementType() == MVT::f16 ||
8314 VT.getVectorElementType() == MVT::bf16) &&
8315 SrcVT.getVectorElementType() == MVT::f64)) {
8316 // For double rounding, the intermediate rounding should be round-to-odd.
8317 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8320 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8321 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8322 Chain, Src, Mask, VL);
8323 Chain = Src.getValue(1);
8324 }
8325
8326 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8329 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8330 Chain, Src, Mask, VL);
8331 if (VT.isFixedLengthVector()) {
8332 // StrictFP operations have two result values. Their lowered result should
8333 // have same result count.
8334 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8335 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8336 }
8337 return Res;
8338}
8339
8340SDValue
8341RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8342 SelectionDAG &DAG) const {
8343 bool IsVP =
8344 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8345 bool IsExtend =
8346 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8347 // RVV can only do truncate fp to types half the size as the source. We
8348 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8349 // conversion instruction.
8350 SDLoc DL(Op);
8351 MVT VT = Op.getSimpleValueType();
8352
8353 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8354
8355 SDValue Src = Op.getOperand(0);
8356 MVT SrcVT = Src.getSimpleValueType();
8357
8358 bool IsDirectExtend =
8359 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8360 (SrcVT.getVectorElementType() != MVT::f16 &&
8361 SrcVT.getVectorElementType() != MVT::bf16));
8362 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8363 VT.getVectorElementType() != MVT::bf16) ||
8364 SrcVT.getVectorElementType() != MVT::f64);
8365
8366 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8367
8368 // Prepare any fixed-length vector operands.
8369 MVT ContainerVT = VT;
8370 SDValue Mask, VL;
8371 if (IsVP) {
8372 Mask = Op.getOperand(1);
8373 VL = Op.getOperand(2);
8374 }
8375 if (VT.isFixedLengthVector()) {
8376 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8377 ContainerVT =
8378 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8379 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8380 if (IsVP) {
8381 MVT MaskVT = getMaskTypeFor(ContainerVT);
8382 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8383 }
8384 }
8385
8386 if (!IsVP)
8387 std::tie(Mask, VL) =
8388 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8389
8390 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8391
8392 if (IsDirectConv) {
8393 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8394 if (VT.isFixedLengthVector())
8395 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8396 return Src;
8397 }
8398
8399 unsigned InterConvOpc =
8401
8402 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8403 SDValue IntermediateConv =
8404 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8405 SDValue Result =
8406 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8407 if (VT.isFixedLengthVector())
8408 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8409 return Result;
8410}
8411
8412// Given a scalable vector type and an index into it, returns the type for the
8413// smallest subvector that the index fits in. This can be used to reduce LMUL
8414// for operations like vslidedown.
8415//
8416// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8417static std::optional<MVT>
8418getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8419 const RISCVSubtarget &Subtarget) {
8420 assert(VecVT.isScalableVector());
8421 const unsigned EltSize = VecVT.getScalarSizeInBits();
8422 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8423 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8424 MVT SmallerVT;
8425 if (MaxIdx < MinVLMAX)
8426 SmallerVT = getLMUL1VT(VecVT);
8427 else if (MaxIdx < MinVLMAX * 2)
8428 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8429 else if (MaxIdx < MinVLMAX * 4)
8430 SmallerVT = getLMUL1VT(VecVT)
8433 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8434 return std::nullopt;
8435 return SmallerVT;
8436}
8437
8438// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8439// first position of a vector, and that vector is slid up to the insert index.
8440// By limiting the active vector length to index+1 and merging with the
8441// original vector (with an undisturbed tail policy for elements >= VL), we
8442// achieve the desired result of leaving all elements untouched except the one
8443// at VL-1, which is replaced with the desired value.
8444SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8445 SelectionDAG &DAG) const {
8446 SDLoc DL(Op);
8447 MVT VecVT = Op.getSimpleValueType();
8448 SDValue Vec = Op.getOperand(0);
8449 SDValue Val = Op.getOperand(1);
8450 SDValue Idx = Op.getOperand(2);
8451
8452 if (VecVT.getVectorElementType() == MVT::i1) {
8453 // FIXME: For now we just promote to an i8 vector and insert into that,
8454 // but this is probably not optimal.
8455 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8456 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8457 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8458 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8459 }
8460
8461 MVT ContainerVT = VecVT;
8462 // If the operand is a fixed-length vector, convert to a scalable one.
8463 if (VecVT.isFixedLengthVector()) {
8464 ContainerVT = getContainerForFixedLengthVector(VecVT);
8465 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8466 }
8467
8468 // If we know the index we're going to insert at, we can shrink Vec so that
8469 // we're performing the scalar inserts and slideup on a smaller LMUL.
8470 MVT OrigContainerVT = ContainerVT;
8471 SDValue OrigVec = Vec;
8472 SDValue AlignedIdx;
8473 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8474 const unsigned OrigIdx = IdxC->getZExtValue();
8475 // Do we know an upper bound on LMUL?
8476 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8477 DL, DAG, Subtarget)) {
8478 ContainerVT = *ShrunkVT;
8479 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8480 }
8481
8482 // If we're compiling for an exact VLEN value, we can always perform
8483 // the insert in m1 as we can determine the register corresponding to
8484 // the index in the register group.
8485 const MVT M1VT = getLMUL1VT(ContainerVT);
8486 if (auto VLEN = Subtarget.getRealVLen();
8487 VLEN && ContainerVT.bitsGT(M1VT)) {
8488 EVT ElemVT = VecVT.getVectorElementType();
8489 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8490 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8491 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8492 unsigned ExtractIdx =
8493 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8494 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8495 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8496 ContainerVT = M1VT;
8497 }
8498
8499 if (AlignedIdx)
8500 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8501 AlignedIdx);
8502 }
8503
8504 MVT XLenVT = Subtarget.getXLenVT();
8505
8506 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8507 // Even i64-element vectors on RV32 can be lowered without scalar
8508 // legalization if the most-significant 32 bits of the value are not affected
8509 // by the sign-extension of the lower 32 bits.
8510 // TODO: We could also catch sign extensions of a 32-bit value.
8511 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8512 const auto *CVal = cast<ConstantSDNode>(Val);
8513 if (isInt<32>(CVal->getSExtValue())) {
8514 IsLegalInsert = true;
8515 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8516 }
8517 }
8518
8519 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8520
8521 SDValue ValInVec;
8522
8523 if (IsLegalInsert) {
8524 unsigned Opc =
8526 if (isNullConstant(Idx)) {
8527 if (!VecVT.isFloatingPoint())
8528 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8529 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8530
8531 if (AlignedIdx)
8532 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8533 Vec, AlignedIdx);
8534 if (!VecVT.isFixedLengthVector())
8535 return Vec;
8536 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8537 }
8538 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8539 } else {
8540 // On RV32, i64-element vectors must be specially handled to place the
8541 // value at element 0, by using two vslide1down instructions in sequence on
8542 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8543 // this.
8544 SDValue ValLo, ValHi;
8545 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8546 MVT I32ContainerVT =
8547 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8548 SDValue I32Mask =
8549 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8550 // Limit the active VL to two.
8551 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8552 // If the Idx is 0 we can insert directly into the vector.
8553 if (isNullConstant(Idx)) {
8554 // First slide in the lo value, then the hi in above it. We use slide1down
8555 // to avoid the register group overlap constraint of vslide1up.
8556 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8557 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8558 // If the source vector is undef don't pass along the tail elements from
8559 // the previous slide1down.
8560 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8561 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8562 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8563 // Bitcast back to the right container type.
8564 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8565
8566 if (AlignedIdx)
8567 ValInVec =
8568 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8569 ValInVec, AlignedIdx);
8570 if (!VecVT.isFixedLengthVector())
8571 return ValInVec;
8572 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8573 }
8574
8575 // First slide in the lo value, then the hi in above it. We use slide1down
8576 // to avoid the register group overlap constraint of vslide1up.
8577 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8578 DAG.getUNDEF(I32ContainerVT),
8579 DAG.getUNDEF(I32ContainerVT), ValLo,
8580 I32Mask, InsertI64VL);
8581 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8582 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8583 I32Mask, InsertI64VL);
8584 // Bitcast back to the right container type.
8585 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8586 }
8587
8588 // Now that the value is in a vector, slide it into position.
8589 SDValue InsertVL =
8590 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8591
8592 // Use tail agnostic policy if Idx is the last index of Vec.
8594 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8595 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8596 Policy = RISCVII::TAIL_AGNOSTIC;
8597 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8598 Idx, Mask, InsertVL, Policy);
8599
8600 if (AlignedIdx)
8601 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8602 Slideup, AlignedIdx);
8603 if (!VecVT.isFixedLengthVector())
8604 return Slideup;
8605 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8606}
8607
8608// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8609// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8610// types this is done using VMV_X_S to allow us to glean information about the
8611// sign bits of the result.
8612SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8613 SelectionDAG &DAG) const {
8614 SDLoc DL(Op);
8615 SDValue Idx = Op.getOperand(1);
8616 SDValue Vec = Op.getOperand(0);
8617 EVT EltVT = Op.getValueType();
8618 MVT VecVT = Vec.getSimpleValueType();
8619 MVT XLenVT = Subtarget.getXLenVT();
8620
8621 if (VecVT.getVectorElementType() == MVT::i1) {
8622 // Use vfirst.m to extract the first bit.
8623 if (isNullConstant(Idx)) {
8624 MVT ContainerVT = VecVT;
8625 if (VecVT.isFixedLengthVector()) {
8626 ContainerVT = getContainerForFixedLengthVector(VecVT);
8627 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8628 }
8629 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8630 SDValue Vfirst =
8631 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8632 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8633 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8634 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8635 }
8636 if (VecVT.isFixedLengthVector()) {
8637 unsigned NumElts = VecVT.getVectorNumElements();
8638 if (NumElts >= 8) {
8639 MVT WideEltVT;
8640 unsigned WidenVecLen;
8641 SDValue ExtractElementIdx;
8642 SDValue ExtractBitIdx;
8643 unsigned MaxEEW = Subtarget.getELen();
8644 MVT LargestEltVT = MVT::getIntegerVT(
8645 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8646 if (NumElts <= LargestEltVT.getSizeInBits()) {
8647 assert(isPowerOf2_32(NumElts) &&
8648 "the number of elements should be power of 2");
8649 WideEltVT = MVT::getIntegerVT(NumElts);
8650 WidenVecLen = 1;
8651 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8652 ExtractBitIdx = Idx;
8653 } else {
8654 WideEltVT = LargestEltVT;
8655 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8656 // extract element index = index / element width
8657 ExtractElementIdx = DAG.getNode(
8658 ISD::SRL, DL, XLenVT, Idx,
8659 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8660 // mask bit index = index % element width
8661 ExtractBitIdx = DAG.getNode(
8662 ISD::AND, DL, XLenVT, Idx,
8663 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8664 }
8665 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8666 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8667 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8668 Vec, ExtractElementIdx);
8669 // Extract the bit from GPR.
8670 SDValue ShiftRight =
8671 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8672 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8673 DAG.getConstant(1, DL, XLenVT));
8674 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8675 }
8676 }
8677 // Otherwise, promote to an i8 vector and extract from that.
8678 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8679 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8680 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8681 }
8682
8683 // If this is a fixed vector, we need to convert it to a scalable vector.
8684 MVT ContainerVT = VecVT;
8685 if (VecVT.isFixedLengthVector()) {
8686 ContainerVT = getContainerForFixedLengthVector(VecVT);
8687 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8688 }
8689
8690 // If we're compiling for an exact VLEN value and we have a known
8691 // constant index, we can always perform the extract in m1 (or
8692 // smaller) as we can determine the register corresponding to
8693 // the index in the register group.
8694 const auto VLen = Subtarget.getRealVLen();
8695 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8696 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8697 MVT M1VT = getLMUL1VT(ContainerVT);
8698 unsigned OrigIdx = IdxC->getZExtValue();
8699 EVT ElemVT = VecVT.getVectorElementType();
8700 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8701 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8702 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8703 unsigned ExtractIdx =
8704 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8705 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8706 DAG.getVectorIdxConstant(ExtractIdx, DL));
8707 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8708 ContainerVT = M1VT;
8709 }
8710
8711 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8712 // contains our index.
8713 std::optional<uint64_t> MaxIdx;
8714 if (VecVT.isFixedLengthVector())
8715 MaxIdx = VecVT.getVectorNumElements() - 1;
8716 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8717 MaxIdx = IdxC->getZExtValue();
8718 if (MaxIdx) {
8719 if (auto SmallerVT =
8720 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8721 ContainerVT = *SmallerVT;
8722 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8723 DAG.getConstant(0, DL, XLenVT));
8724 }
8725 }
8726
8727 // If after narrowing, the required slide is still greater than LMUL2,
8728 // fallback to generic expansion and go through the stack. This is done
8729 // for a subtle reason: extracting *all* elements out of a vector is
8730 // widely expected to be linear in vector size, but because vslidedown
8731 // is linear in LMUL, performing N extracts using vslidedown becomes
8732 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8733 // seems to have the same problem (the store is linear in LMUL), but the
8734 // generic expansion *memoizes* the store, and thus for many extracts of
8735 // the same vector we end up with one store and a bunch of loads.
8736 // TODO: We don't have the same code for insert_vector_elt because we
8737 // have BUILD_VECTOR and handle the degenerate case there. Should we
8738 // consider adding an inverse BUILD_VECTOR node?
8739 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8740 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8741 return SDValue();
8742
8743 // If the index is 0, the vector is already in the right position.
8744 if (!isNullConstant(Idx)) {
8745 // Use a VL of 1 to avoid processing more elements than we need.
8746 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8747 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8748 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8749 }
8750
8751 if (!EltVT.isInteger()) {
8752 // Floating-point extracts are handled in TableGen.
8753 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8754 DAG.getVectorIdxConstant(0, DL));
8755 }
8756
8757 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8758 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8759}
8760
8761// Some RVV intrinsics may claim that they want an integer operand to be
8762// promoted or expanded.
8764 const RISCVSubtarget &Subtarget) {
8765 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8766 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8767 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8768 "Unexpected opcode");
8769
8770 if (!Subtarget.hasVInstructions())
8771 return SDValue();
8772
8773 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8774 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8775 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8776
8777 SDLoc DL(Op);
8778
8780 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8781 if (!II || !II->hasScalarOperand())
8782 return SDValue();
8783
8784 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8785 assert(SplatOp < Op.getNumOperands());
8786
8787 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8788 SDValue &ScalarOp = Operands[SplatOp];
8789 MVT OpVT = ScalarOp.getSimpleValueType();
8790 MVT XLenVT = Subtarget.getXLenVT();
8791
8792 // If this isn't a scalar, or its type is XLenVT we're done.
8793 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8794 return SDValue();
8795
8796 // Simplest case is that the operand needs to be promoted to XLenVT.
8797 if (OpVT.bitsLT(XLenVT)) {
8798 // If the operand is a constant, sign extend to increase our chances
8799 // of being able to use a .vi instruction. ANY_EXTEND would become a
8800 // a zero extend and the simm5 check in isel would fail.
8801 // FIXME: Should we ignore the upper bits in isel instead?
8802 unsigned ExtOpc =
8803 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8804 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8805 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8806 }
8807
8808 // Use the previous operand to get the vXi64 VT. The result might be a mask
8809 // VT for compares. Using the previous operand assumes that the previous
8810 // operand will never have a smaller element size than a scalar operand and
8811 // that a widening operation never uses SEW=64.
8812 // NOTE: If this fails the below assert, we can probably just find the
8813 // element count from any operand or result and use it to construct the VT.
8814 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8815 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8816
8817 // The more complex case is when the scalar is larger than XLenVT.
8818 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8819 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8820
8821 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8822 // instruction to sign-extend since SEW>XLEN.
8823 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8824 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8825 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8826 }
8827
8828 switch (IntNo) {
8829 case Intrinsic::riscv_vslide1up:
8830 case Intrinsic::riscv_vslide1down:
8831 case Intrinsic::riscv_vslide1up_mask:
8832 case Intrinsic::riscv_vslide1down_mask: {
8833 // We need to special case these when the scalar is larger than XLen.
8834 unsigned NumOps = Op.getNumOperands();
8835 bool IsMasked = NumOps == 7;
8836
8837 // Convert the vector source to the equivalent nxvXi32 vector.
8838 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8839 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8840 SDValue ScalarLo, ScalarHi;
8841 std::tie(ScalarLo, ScalarHi) =
8842 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8843
8844 // Double the VL since we halved SEW.
8845 SDValue AVL = getVLOperand(Op);
8846 SDValue I32VL;
8847
8848 // Optimize for constant AVL
8849 if (isa<ConstantSDNode>(AVL)) {
8850 const auto [MinVLMAX, MaxVLMAX] =
8852
8853 uint64_t AVLInt = AVL->getAsZExtVal();
8854 if (AVLInt <= MinVLMAX) {
8855 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8856 } else if (AVLInt >= 2 * MaxVLMAX) {
8857 // Just set vl to VLMAX in this situation
8859 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8860 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8861 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8862 SDValue SETVLMAX = DAG.getTargetConstant(
8863 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8864 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8865 LMUL);
8866 } else {
8867 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8868 // is related to the hardware implementation.
8869 // So let the following code handle
8870 }
8871 }
8872 if (!I32VL) {
8874 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8875 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8876 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8877 SDValue SETVL =
8878 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8879 // Using vsetvli instruction to get actually used length which related to
8880 // the hardware implementation
8881 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8882 SEW, LMUL);
8883 I32VL =
8884 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8885 }
8886
8887 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8888
8889 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8890 // instructions.
8891 SDValue Passthru;
8892 if (IsMasked)
8893 Passthru = DAG.getUNDEF(I32VT);
8894 else
8895 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8896
8897 if (IntNo == Intrinsic::riscv_vslide1up ||
8898 IntNo == Intrinsic::riscv_vslide1up_mask) {
8899 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8900 ScalarHi, I32Mask, I32VL);
8901 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8902 ScalarLo, I32Mask, I32VL);
8903 } else {
8904 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8905 ScalarLo, I32Mask, I32VL);
8906 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8907 ScalarHi, I32Mask, I32VL);
8908 }
8909
8910 // Convert back to nxvXi64.
8911 Vec = DAG.getBitcast(VT, Vec);
8912
8913 if (!IsMasked)
8914 return Vec;
8915 // Apply mask after the operation.
8916 SDValue Mask = Operands[NumOps - 3];
8917 SDValue MaskedOff = Operands[1];
8918 // Assume Policy operand is the last operand.
8919 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8920 // We don't need to select maskedoff if it's undef.
8921 if (MaskedOff.isUndef())
8922 return Vec;
8923 // TAMU
8924 if (Policy == RISCVII::TAIL_AGNOSTIC)
8925 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8926 DAG.getUNDEF(VT), AVL);
8927 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8928 // It's fine because vmerge does not care mask policy.
8929 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8930 MaskedOff, AVL);
8931 }
8932 }
8933
8934 // We need to convert the scalar to a splat vector.
8935 SDValue VL = getVLOperand(Op);
8936 assert(VL.getValueType() == XLenVT);
8937 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8938 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8939}
8940
8941// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8942// scalable vector llvm.get.vector.length for now.
8943//
8944// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8945// (vscale * VF). The vscale and VF are independent of element width. We use
8946// SEW=8 for the vsetvli because it is the only element width that supports all
8947// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8948// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8949// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8950// SEW and LMUL are better for the surrounding vector instructions.
8952 const RISCVSubtarget &Subtarget) {
8953 MVT XLenVT = Subtarget.getXLenVT();
8954
8955 // The smallest LMUL is only valid for the smallest element width.
8956 const unsigned ElementWidth = 8;
8957
8958 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8959 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8960 // We don't support VF==1 with ELEN==32.
8961 [[maybe_unused]] unsigned MinVF =
8962 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8963
8964 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8965 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8966 "Unexpected VF");
8967
8968 bool Fractional = VF < LMul1VF;
8969 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8970 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8971 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8972
8973 SDLoc DL(N);
8974
8975 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8976 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8977
8978 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8979
8980 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8981 SDValue Res =
8982 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8983 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8984}
8985
8987 const RISCVSubtarget &Subtarget) {
8988 SDValue Op0 = N->getOperand(1);
8989 MVT OpVT = Op0.getSimpleValueType();
8990 MVT ContainerVT = OpVT;
8991 if (OpVT.isFixedLengthVector()) {
8992 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8993 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8994 }
8995 MVT XLenVT = Subtarget.getXLenVT();
8996 SDLoc DL(N);
8997 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8998 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8999 if (isOneConstant(N->getOperand(2)))
9000 return Res;
9001
9002 // Convert -1 to VL.
9003 SDValue Setcc =
9004 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9005 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9006 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9007}
9008
9009static inline void promoteVCIXScalar(const SDValue &Op,
9011 SelectionDAG &DAG) {
9012 const RISCVSubtarget &Subtarget =
9014
9015 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9016 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9017 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9018 SDLoc DL(Op);
9019
9021 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9022 if (!II || !II->hasScalarOperand())
9023 return;
9024
9025 unsigned SplatOp = II->ScalarOperand + 1;
9026 assert(SplatOp < Op.getNumOperands());
9027
9028 SDValue &ScalarOp = Operands[SplatOp];
9029 MVT OpVT = ScalarOp.getSimpleValueType();
9030 MVT XLenVT = Subtarget.getXLenVT();
9031
9032 // The code below is partially copied from lowerVectorIntrinsicScalars.
9033 // If this isn't a scalar, or its type is XLenVT we're done.
9034 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9035 return;
9036
9037 // Manually emit promote operation for scalar operation.
9038 if (OpVT.bitsLT(XLenVT)) {
9039 unsigned ExtOpc =
9040 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9041 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9042 }
9043
9044 return;
9045}
9046
9047static void processVCIXOperands(SDValue &OrigOp,
9049 SelectionDAG &DAG) {
9050 promoteVCIXScalar(OrigOp, Operands, DAG);
9051 const RISCVSubtarget &Subtarget =
9053 for (SDValue &V : Operands) {
9054 EVT ValType = V.getValueType();
9055 if (ValType.isVector() && ValType.isFloatingPoint()) {
9056 MVT InterimIVT =
9057 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9058 ValType.getVectorElementCount());
9059 V = DAG.getBitcast(InterimIVT, V);
9060 }
9061 if (ValType.isFixedLengthVector()) {
9062 MVT OpContainerVT = getContainerForFixedLengthVector(
9063 DAG, V.getSimpleValueType(), Subtarget);
9064 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9065 }
9066 }
9067}
9068
9069// LMUL * VLEN should be greater than or equal to EGS * SEW
9070static inline bool isValidEGW(int EGS, EVT VT,
9071 const RISCVSubtarget &Subtarget) {
9072 return (Subtarget.getRealMinVLen() *
9074 EGS * VT.getScalarSizeInBits();
9075}
9076
9077SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9078 SelectionDAG &DAG) const {
9079 unsigned IntNo = Op.getConstantOperandVal(0);
9080 SDLoc DL(Op);
9081 MVT XLenVT = Subtarget.getXLenVT();
9082
9083 switch (IntNo) {
9084 default:
9085 break; // Don't custom lower most intrinsics.
9086 case Intrinsic::thread_pointer: {
9087 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9088 return DAG.getRegister(RISCV::X4, PtrVT);
9089 }
9090 case Intrinsic::riscv_orc_b:
9091 case Intrinsic::riscv_brev8:
9092 case Intrinsic::riscv_sha256sig0:
9093 case Intrinsic::riscv_sha256sig1:
9094 case Intrinsic::riscv_sha256sum0:
9095 case Intrinsic::riscv_sha256sum1:
9096 case Intrinsic::riscv_sm3p0:
9097 case Intrinsic::riscv_sm3p1: {
9098 unsigned Opc;
9099 switch (IntNo) {
9100 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9101 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9102 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9103 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9104 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9105 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9106 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9107 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9108 }
9109
9110 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9111 SDValue NewOp =
9112 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9113 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
9114 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9115 }
9116
9117 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9118 }
9119 case Intrinsic::riscv_sm4ks:
9120 case Intrinsic::riscv_sm4ed: {
9121 unsigned Opc =
9122 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9123
9124 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9125 SDValue NewOp0 =
9126 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9127 SDValue NewOp1 =
9128 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9129 SDValue Res =
9130 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
9131 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9132 }
9133
9134 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9135 Op.getOperand(3));
9136 }
9137 case Intrinsic::riscv_zip:
9138 case Intrinsic::riscv_unzip: {
9139 unsigned Opc =
9140 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9141 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9142 }
9143 case Intrinsic::riscv_mopr: {
9144 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9145 SDValue NewOp =
9146 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9147 SDValue Res = DAG.getNode(
9148 RISCVISD::MOPR, DL, MVT::i64, NewOp,
9149 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
9150 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9151 }
9152 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9153 Op.getOperand(2));
9154 }
9155
9156 case Intrinsic::riscv_moprr: {
9157 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9158 SDValue NewOp0 =
9159 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9160 SDValue NewOp1 =
9161 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9162 SDValue Res = DAG.getNode(
9163 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
9164 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
9165 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9166 }
9167 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9168 Op.getOperand(2), Op.getOperand(3));
9169 }
9170 case Intrinsic::riscv_clmul:
9171 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9172 SDValue NewOp0 =
9173 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9174 SDValue NewOp1 =
9175 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9176 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
9177 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9178 }
9179 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9180 Op.getOperand(2));
9181 case Intrinsic::riscv_clmulh:
9182 case Intrinsic::riscv_clmulr: {
9183 unsigned Opc =
9184 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9185 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9186 SDValue NewOp0 =
9187 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9188 SDValue NewOp1 =
9189 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9190 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
9191 DAG.getConstant(32, DL, MVT::i64));
9192 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
9193 DAG.getConstant(32, DL, MVT::i64));
9194 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
9195 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
9196 DAG.getConstant(32, DL, MVT::i64));
9197 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9198 }
9199
9200 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9201 }
9202 case Intrinsic::experimental_get_vector_length:
9203 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9204 case Intrinsic::experimental_cttz_elts:
9205 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9206 case Intrinsic::riscv_vmv_x_s: {
9207 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9208 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9209 }
9210 case Intrinsic::riscv_vfmv_f_s:
9211 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9212 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9213 case Intrinsic::riscv_vmv_v_x:
9214 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9215 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9216 Subtarget);
9217 case Intrinsic::riscv_vfmv_v_f:
9218 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9219 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9220 case Intrinsic::riscv_vmv_s_x: {
9221 SDValue Scalar = Op.getOperand(2);
9222
9223 if (Scalar.getValueType().bitsLE(XLenVT)) {
9224 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9225 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9226 Op.getOperand(1), Scalar, Op.getOperand(3));
9227 }
9228
9229 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9230
9231 // This is an i64 value that lives in two scalar registers. We have to
9232 // insert this in a convoluted way. First we build vXi64 splat containing
9233 // the two values that we assemble using some bit math. Next we'll use
9234 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9235 // to merge element 0 from our splat into the source vector.
9236 // FIXME: This is probably not the best way to do this, but it is
9237 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9238 // point.
9239 // sw lo, (a0)
9240 // sw hi, 4(a0)
9241 // vlse vX, (a0)
9242 //
9243 // vid.v vVid
9244 // vmseq.vx mMask, vVid, 0
9245 // vmerge.vvm vDest, vSrc, vVal, mMask
9246 MVT VT = Op.getSimpleValueType();
9247 SDValue Vec = Op.getOperand(1);
9248 SDValue VL = getVLOperand(Op);
9249
9250 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9251 if (Op.getOperand(1).isUndef())
9252 return SplattedVal;
9253 SDValue SplattedIdx =
9254 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9255 DAG.getConstant(0, DL, MVT::i32), VL);
9256
9257 MVT MaskVT = getMaskTypeFor(VT);
9258 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9259 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9260 SDValue SelectCond =
9261 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9262 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9263 DAG.getUNDEF(MaskVT), Mask, VL});
9264 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9265 Vec, DAG.getUNDEF(VT), VL);
9266 }
9267 case Intrinsic::riscv_vfmv_s_f:
9268 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9269 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9270 // EGS * EEW >= 128 bits
9271 case Intrinsic::riscv_vaesdf_vv:
9272 case Intrinsic::riscv_vaesdf_vs:
9273 case Intrinsic::riscv_vaesdm_vv:
9274 case Intrinsic::riscv_vaesdm_vs:
9275 case Intrinsic::riscv_vaesef_vv:
9276 case Intrinsic::riscv_vaesef_vs:
9277 case Intrinsic::riscv_vaesem_vv:
9278 case Intrinsic::riscv_vaesem_vs:
9279 case Intrinsic::riscv_vaeskf1:
9280 case Intrinsic::riscv_vaeskf2:
9281 case Intrinsic::riscv_vaesz_vs:
9282 case Intrinsic::riscv_vsm4k:
9283 case Intrinsic::riscv_vsm4r_vv:
9284 case Intrinsic::riscv_vsm4r_vs: {
9285 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9286 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9287 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9288 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9289 return Op;
9290 }
9291 // EGS * EEW >= 256 bits
9292 case Intrinsic::riscv_vsm3c:
9293 case Intrinsic::riscv_vsm3me: {
9294 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9295 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9296 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9297 return Op;
9298 }
9299 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9300 case Intrinsic::riscv_vsha2ch:
9301 case Intrinsic::riscv_vsha2cl:
9302 case Intrinsic::riscv_vsha2ms: {
9303 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9304 !Subtarget.hasStdExtZvknhb())
9305 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9306 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9307 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9308 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9309 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9310 return Op;
9311 }
9312 case Intrinsic::riscv_sf_vc_v_x:
9313 case Intrinsic::riscv_sf_vc_v_i:
9314 case Intrinsic::riscv_sf_vc_v_xv:
9315 case Intrinsic::riscv_sf_vc_v_iv:
9316 case Intrinsic::riscv_sf_vc_v_vv:
9317 case Intrinsic::riscv_sf_vc_v_fv:
9318 case Intrinsic::riscv_sf_vc_v_xvv:
9319 case Intrinsic::riscv_sf_vc_v_ivv:
9320 case Intrinsic::riscv_sf_vc_v_vvv:
9321 case Intrinsic::riscv_sf_vc_v_fvv:
9322 case Intrinsic::riscv_sf_vc_v_xvw:
9323 case Intrinsic::riscv_sf_vc_v_ivw:
9324 case Intrinsic::riscv_sf_vc_v_vvw:
9325 case Intrinsic::riscv_sf_vc_v_fvw: {
9326 MVT VT = Op.getSimpleValueType();
9327
9328 SmallVector<SDValue> Operands{Op->op_values()};
9330
9331 MVT RetVT = VT;
9332 if (VT.isFixedLengthVector())
9334 else if (VT.isFloatingPoint())
9337
9338 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9339
9340 if (VT.isFixedLengthVector())
9341 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9342 else if (VT.isFloatingPoint())
9343 NewNode = DAG.getBitcast(VT, NewNode);
9344
9345 if (Op == NewNode)
9346 break;
9347
9348 return NewNode;
9349 }
9350 }
9351
9352 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9353}
9354
9356 unsigned Type) {
9357 SDLoc DL(Op);
9358 SmallVector<SDValue> Operands{Op->op_values()};
9359 Operands.erase(Operands.begin() + 1);
9360
9361 const RISCVSubtarget &Subtarget =
9363 MVT VT = Op.getSimpleValueType();
9364 MVT RetVT = VT;
9365 MVT FloatVT = VT;
9366
9367 if (VT.isFloatingPoint()) {
9368 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9370 FloatVT = RetVT;
9371 }
9372 if (VT.isFixedLengthVector())
9374 Subtarget);
9375
9377
9378 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9379 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9380 SDValue Chain = NewNode.getValue(1);
9381
9382 if (VT.isFixedLengthVector())
9383 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9384 if (VT.isFloatingPoint())
9385 NewNode = DAG.getBitcast(VT, NewNode);
9386
9387 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9388
9389 return NewNode;
9390}
9391
9393 unsigned Type) {
9394 SmallVector<SDValue> Operands{Op->op_values()};
9395 Operands.erase(Operands.begin() + 1);
9397
9398 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9399}
9400
9401SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9402 SelectionDAG &DAG) const {
9403 unsigned IntNo = Op.getConstantOperandVal(1);
9404 switch (IntNo) {
9405 default:
9406 break;
9407 case Intrinsic::riscv_masked_strided_load: {
9408 SDLoc DL(Op);
9409 MVT XLenVT = Subtarget.getXLenVT();
9410
9411 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9412 // the selection of the masked intrinsics doesn't do this for us.
9413 SDValue Mask = Op.getOperand(5);
9414 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9415
9416 MVT VT = Op->getSimpleValueType(0);
9417 MVT ContainerVT = VT;
9418 if (VT.isFixedLengthVector())
9419 ContainerVT = getContainerForFixedLengthVector(VT);
9420
9421 SDValue PassThru = Op.getOperand(2);
9422 if (!IsUnmasked) {
9423 MVT MaskVT = getMaskTypeFor(ContainerVT);
9424 if (VT.isFixedLengthVector()) {
9425 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9426 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9427 }
9428 }
9429
9430 auto *Load = cast<MemIntrinsicSDNode>(Op);
9431 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9432 SDValue Ptr = Op.getOperand(3);
9433 SDValue Stride = Op.getOperand(4);
9434 SDValue Result, Chain;
9435
9436 // TODO: We restrict this to unmasked loads currently in consideration of
9437 // the complexity of handling all falses masks.
9438 MVT ScalarVT = ContainerVT.getVectorElementType();
9439 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9440 SDValue ScalarLoad =
9441 DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9442 ScalarVT, Load->getMemOperand());
9443 Chain = ScalarLoad.getValue(1);
9444 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9445 Subtarget);
9446 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9447 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9448 Load->getMemOperand());
9449 Chain = ScalarLoad.getValue(1);
9450 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9451 } else {
9452 SDValue IntID = DAG.getTargetConstant(
9453 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9454 XLenVT);
9455
9456 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9457 if (IsUnmasked)
9458 Ops.push_back(DAG.getUNDEF(ContainerVT));
9459 else
9460 Ops.push_back(PassThru);
9461 Ops.push_back(Ptr);
9462 Ops.push_back(Stride);
9463 if (!IsUnmasked)
9464 Ops.push_back(Mask);
9465 Ops.push_back(VL);
9466 if (!IsUnmasked) {
9467 SDValue Policy =
9469 Ops.push_back(Policy);
9470 }
9471
9472 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9473 Result =
9475 Load->getMemoryVT(), Load->getMemOperand());
9476 Chain = Result.getValue(1);
9477 }
9478 if (VT.isFixedLengthVector())
9479 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9480 return DAG.getMergeValues({Result, Chain}, DL);
9481 }
9482 case Intrinsic::riscv_seg2_load:
9483 case Intrinsic::riscv_seg3_load:
9484 case Intrinsic::riscv_seg4_load:
9485 case Intrinsic::riscv_seg5_load:
9486 case Intrinsic::riscv_seg6_load:
9487 case Intrinsic::riscv_seg7_load:
9488 case Intrinsic::riscv_seg8_load: {
9489 SDLoc DL(Op);
9490 static const Intrinsic::ID VlsegInts[7] = {
9491 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9492 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9493 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9494 Intrinsic::riscv_vlseg8};
9495 unsigned NF = Op->getNumValues() - 1;
9496 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9497 MVT XLenVT = Subtarget.getXLenVT();
9498 MVT VT = Op->getSimpleValueType(0);
9499 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9500
9501 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9502 Subtarget);
9503 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9504 auto *Load = cast<MemIntrinsicSDNode>(Op);
9505 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9506 ContainerVTs.push_back(MVT::Other);
9507 SDVTList VTs = DAG.getVTList(ContainerVTs);
9508 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9509 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9510 Ops.push_back(Op.getOperand(2));
9511 Ops.push_back(VL);
9512 SDValue Result =
9514 Load->getMemoryVT(), Load->getMemOperand());
9516 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9517 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9518 DAG, Subtarget));
9519 Results.push_back(Result.getValue(NF));
9520 return DAG.getMergeValues(Results, DL);
9521 }
9522 case Intrinsic::riscv_sf_vc_v_x_se:
9524 case Intrinsic::riscv_sf_vc_v_i_se:
9526 case Intrinsic::riscv_sf_vc_v_xv_se:
9528 case Intrinsic::riscv_sf_vc_v_iv_se:
9530 case Intrinsic::riscv_sf_vc_v_vv_se:
9532 case Intrinsic::riscv_sf_vc_v_fv_se:
9534 case Intrinsic::riscv_sf_vc_v_xvv_se:
9536 case Intrinsic::riscv_sf_vc_v_ivv_se:
9538 case Intrinsic::riscv_sf_vc_v_vvv_se:
9540 case Intrinsic::riscv_sf_vc_v_fvv_se:
9542 case Intrinsic::riscv_sf_vc_v_xvw_se:
9544 case Intrinsic::riscv_sf_vc_v_ivw_se:
9546 case Intrinsic::riscv_sf_vc_v_vvw_se:
9548 case Intrinsic::riscv_sf_vc_v_fvw_se:
9550 }
9551
9552 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9553}
9554
9555SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9556 SelectionDAG &DAG) const {
9557 unsigned IntNo = Op.getConstantOperandVal(1);
9558 switch (IntNo) {
9559 default:
9560 break;
9561 case Intrinsic::riscv_masked_strided_store: {
9562 SDLoc DL(Op);
9563 MVT XLenVT = Subtarget.getXLenVT();
9564
9565 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9566 // the selection of the masked intrinsics doesn't do this for us.
9567 SDValue Mask = Op.getOperand(5);
9568 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9569
9570 SDValue Val = Op.getOperand(2);
9571 MVT VT = Val.getSimpleValueType();
9572 MVT ContainerVT = VT;
9573 if (VT.isFixedLengthVector()) {
9574 ContainerVT = getContainerForFixedLengthVector(VT);
9575 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9576 }
9577 if (!IsUnmasked) {
9578 MVT MaskVT = getMaskTypeFor(ContainerVT);
9579 if (VT.isFixedLengthVector())
9580 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9581 }
9582
9583 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9584
9585 SDValue IntID = DAG.getTargetConstant(
9586 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9587 XLenVT);
9588
9589 auto *Store = cast<MemIntrinsicSDNode>(Op);
9590 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9591 Ops.push_back(Val);
9592 Ops.push_back(Op.getOperand(3)); // Ptr
9593 Ops.push_back(Op.getOperand(4)); // Stride
9594 if (!IsUnmasked)
9595 Ops.push_back(Mask);
9596 Ops.push_back(VL);
9597
9598 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9599 Ops, Store->getMemoryVT(),
9600 Store->getMemOperand());
9601 }
9602 case Intrinsic::riscv_seg2_store:
9603 case Intrinsic::riscv_seg3_store:
9604 case Intrinsic::riscv_seg4_store:
9605 case Intrinsic::riscv_seg5_store:
9606 case Intrinsic::riscv_seg6_store:
9607 case Intrinsic::riscv_seg7_store:
9608 case Intrinsic::riscv_seg8_store: {
9609 SDLoc DL(Op);
9610 static const Intrinsic::ID VssegInts[] = {
9611 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9612 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9613 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9614 Intrinsic::riscv_vsseg8};
9615 // Operands are (chain, int_id, vec*, ptr, vl)
9616 unsigned NF = Op->getNumOperands() - 4;
9617 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9618 MVT XLenVT = Subtarget.getXLenVT();
9619 MVT VT = Op->getOperand(2).getSimpleValueType();
9620 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9621
9622 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9623 Subtarget);
9624 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9625 SDValue Ptr = Op->getOperand(NF + 2);
9626
9627 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9628 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9629 for (unsigned i = 0; i < NF; i++)
9631 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9632 Ops.append({Ptr, VL});
9633
9634 return DAG.getMemIntrinsicNode(
9635 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9636 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9637 }
9638 case Intrinsic::riscv_sf_vc_xv_se:
9640 case Intrinsic::riscv_sf_vc_iv_se:
9642 case Intrinsic::riscv_sf_vc_vv_se:
9644 case Intrinsic::riscv_sf_vc_fv_se:
9646 case Intrinsic::riscv_sf_vc_xvv_se:
9648 case Intrinsic::riscv_sf_vc_ivv_se:
9650 case Intrinsic::riscv_sf_vc_vvv_se:
9652 case Intrinsic::riscv_sf_vc_fvv_se:
9654 case Intrinsic::riscv_sf_vc_xvw_se:
9656 case Intrinsic::riscv_sf_vc_ivw_se:
9658 case Intrinsic::riscv_sf_vc_vvw_se:
9660 case Intrinsic::riscv_sf_vc_fvw_se:
9662 }
9663
9664 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9665}
9666
9667static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9668 switch (ISDOpcode) {
9669 default:
9670 llvm_unreachable("Unhandled reduction");
9671 case ISD::VP_REDUCE_ADD:
9672 case ISD::VECREDUCE_ADD:
9674 case ISD::VP_REDUCE_UMAX:
9677 case ISD::VP_REDUCE_SMAX:
9680 case ISD::VP_REDUCE_UMIN:
9683 case ISD::VP_REDUCE_SMIN:
9686 case ISD::VP_REDUCE_AND:
9687 case ISD::VECREDUCE_AND:
9689 case ISD::VP_REDUCE_OR:
9690 case ISD::VECREDUCE_OR:
9692 case ISD::VP_REDUCE_XOR:
9693 case ISD::VECREDUCE_XOR:
9695 case ISD::VP_REDUCE_FADD:
9697 case ISD::VP_REDUCE_SEQ_FADD:
9699 case ISD::VP_REDUCE_FMAX:
9700 case ISD::VP_REDUCE_FMAXIMUM:
9702 case ISD::VP_REDUCE_FMIN:
9703 case ISD::VP_REDUCE_FMINIMUM:
9705 }
9706
9707}
9708
9709SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9710 SelectionDAG &DAG,
9711 bool IsVP) const {
9712 SDLoc DL(Op);
9713 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9714 MVT VecVT = Vec.getSimpleValueType();
9715 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9716 Op.getOpcode() == ISD::VECREDUCE_OR ||
9717 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9718 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9719 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9720 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9721 "Unexpected reduction lowering");
9722
9723 MVT XLenVT = Subtarget.getXLenVT();
9724
9725 MVT ContainerVT = VecVT;
9726 if (VecVT.isFixedLengthVector()) {
9727 ContainerVT = getContainerForFixedLengthVector(VecVT);
9728 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9729 }
9730
9731 SDValue Mask, VL;
9732 if (IsVP) {
9733 Mask = Op.getOperand(2);
9734 VL = Op.getOperand(3);
9735 } else {
9736 std::tie(Mask, VL) =
9737 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9738 }
9739
9740 unsigned BaseOpc;
9742 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9743
9744 switch (Op.getOpcode()) {
9745 default:
9746 llvm_unreachable("Unhandled reduction");
9747 case ISD::VECREDUCE_AND:
9748 case ISD::VP_REDUCE_AND: {
9749 // vcpop ~x == 0
9750 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9751 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9752 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9753 CC = ISD::SETEQ;
9754 BaseOpc = ISD::AND;
9755 break;
9756 }
9757 case ISD::VECREDUCE_OR:
9758 case ISD::VP_REDUCE_OR:
9759 // vcpop x != 0
9760 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9761 CC = ISD::SETNE;
9762 BaseOpc = ISD::OR;
9763 break;
9764 case ISD::VECREDUCE_XOR:
9765 case ISD::VP_REDUCE_XOR: {
9766 // ((vcpop x) & 1) != 0
9767 SDValue One = DAG.getConstant(1, DL, XLenVT);
9768 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9769 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9770 CC = ISD::SETNE;
9771 BaseOpc = ISD::XOR;
9772 break;
9773 }
9774 }
9775
9776 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9777 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9778
9779 if (!IsVP)
9780 return SetCC;
9781
9782 // Now include the start value in the operation.
9783 // Note that we must return the start value when no elements are operated
9784 // upon. The vcpop instructions we've emitted in each case above will return
9785 // 0 for an inactive vector, and so we've already received the neutral value:
9786 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9787 // can simply include the start value.
9788 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9789}
9790
9791static bool isNonZeroAVL(SDValue AVL) {
9792 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9793 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9794 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9795 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9796}
9797
9798/// Helper to lower a reduction sequence of the form:
9799/// scalar = reduce_op vec, scalar_start
9800static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9801 SDValue StartValue, SDValue Vec, SDValue Mask,
9802 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9803 const RISCVSubtarget &Subtarget) {
9804 const MVT VecVT = Vec.getSimpleValueType();
9805 const MVT M1VT = getLMUL1VT(VecVT);
9806 const MVT XLenVT = Subtarget.getXLenVT();
9807 const bool NonZeroAVL = isNonZeroAVL(VL);
9808
9809 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9810 // or the original VT if fractional.
9811 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9812 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9813 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9814 // be the result of the reduction operation.
9815 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9816 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9817 DAG, Subtarget);
9818 if (M1VT != InnerVT)
9819 InitialValue =
9820 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9821 InitialValue, DAG.getVectorIdxConstant(0, DL));
9822 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9823 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9824 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9825 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9826 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9827 DAG.getVectorIdxConstant(0, DL));
9828}
9829
9830SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9831 SelectionDAG &DAG) const {
9832 SDLoc DL(Op);
9833 SDValue Vec = Op.getOperand(0);
9834 EVT VecEVT = Vec.getValueType();
9835
9836 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9837
9838 // Due to ordering in legalize types we may have a vector type that needs to
9839 // be split. Do that manually so we can get down to a legal type.
9840 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9842 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9843 VecEVT = Lo.getValueType();
9844 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9845 }
9846
9847 // TODO: The type may need to be widened rather than split. Or widened before
9848 // it can be split.
9849 if (!isTypeLegal(VecEVT))
9850 return SDValue();
9851
9852 MVT VecVT = VecEVT.getSimpleVT();
9853 MVT VecEltVT = VecVT.getVectorElementType();
9854 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9855
9856 MVT ContainerVT = VecVT;
9857 if (VecVT.isFixedLengthVector()) {
9858 ContainerVT = getContainerForFixedLengthVector(VecVT);
9859 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9860 }
9861
9862 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9863
9864 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9865 switch (BaseOpc) {
9866 case ISD::AND:
9867 case ISD::OR:
9868 case ISD::UMAX:
9869 case ISD::UMIN:
9870 case ISD::SMAX:
9871 case ISD::SMIN:
9872 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9873 DAG.getVectorIdxConstant(0, DL));
9874 }
9875 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9876 Mask, VL, DL, DAG, Subtarget);
9877}
9878
9879// Given a reduction op, this function returns the matching reduction opcode,
9880// the vector SDValue and the scalar SDValue required to lower this to a
9881// RISCVISD node.
9882static std::tuple<unsigned, SDValue, SDValue>
9884 const RISCVSubtarget &Subtarget) {
9885 SDLoc DL(Op);
9886 auto Flags = Op->getFlags();
9887 unsigned Opcode = Op.getOpcode();
9888 switch (Opcode) {
9889 default:
9890 llvm_unreachable("Unhandled reduction");
9891 case ISD::VECREDUCE_FADD: {
9892 // Use positive zero if we can. It is cheaper to materialize.
9893 SDValue Zero =
9894 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9895 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9896 }
9898 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9899 Op.getOperand(0));
9903 case ISD::VECREDUCE_FMAX: {
9904 SDValue Front =
9905 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9906 DAG.getVectorIdxConstant(0, DL));
9907 unsigned RVVOpc =
9908 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9911 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9912 }
9913 }
9914}
9915
9916SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9917 SelectionDAG &DAG) const {
9918 SDLoc DL(Op);
9919 MVT VecEltVT = Op.getSimpleValueType();
9920
9921 unsigned RVVOpcode;
9922 SDValue VectorVal, ScalarVal;
9923 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9924 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9925 MVT VecVT = VectorVal.getSimpleValueType();
9926
9927 MVT ContainerVT = VecVT;
9928 if (VecVT.isFixedLengthVector()) {
9929 ContainerVT = getContainerForFixedLengthVector(VecVT);
9930 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9931 }
9932
9933 MVT ResVT = Op.getSimpleValueType();
9934 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9935 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9936 VL, DL, DAG, Subtarget);
9937 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9938 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9939 return Res;
9940
9941 if (Op->getFlags().hasNoNaNs())
9942 return Res;
9943
9944 // Force output to NaN if any element is Nan.
9945 SDValue IsNan =
9946 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9947 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9948 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9949 MVT XLenVT = Subtarget.getXLenVT();
9950 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9951 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9952 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9953 return DAG.getSelect(
9954 DL, ResVT, NoNaNs, Res,
9956 ResVT));
9957}
9958
9959SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9960 SelectionDAG &DAG) const {
9961 SDLoc DL(Op);
9962 unsigned Opc = Op.getOpcode();
9963 SDValue Start = Op.getOperand(0);
9964 SDValue Vec = Op.getOperand(1);
9965 EVT VecEVT = Vec.getValueType();
9966 MVT XLenVT = Subtarget.getXLenVT();
9967
9968 // TODO: The type may need to be widened rather than split. Or widened before
9969 // it can be split.
9970 if (!isTypeLegal(VecEVT))
9971 return SDValue();
9972
9973 MVT VecVT = VecEVT.getSimpleVT();
9974 unsigned RVVOpcode = getRVVReductionOp(Opc);
9975
9976 if (VecVT.isFixedLengthVector()) {
9977 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9978 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9979 }
9980
9981 SDValue VL = Op.getOperand(3);
9982 SDValue Mask = Op.getOperand(2);
9983 SDValue Res =
9984 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9985 Vec, Mask, VL, DL, DAG, Subtarget);
9986 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9987 Op->getFlags().hasNoNaNs())
9988 return Res;
9989
9990 // Propagate NaNs.
9991 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9992 // Check if any of the elements in Vec is NaN.
9993 SDValue IsNaN = DAG.getNode(
9994 RISCVISD::SETCC_VL, DL, PredVT,
9995 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9996 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9997 // Check if the start value is NaN.
9998 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9999 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10000 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10001 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10002 MVT ResVT = Res.getSimpleValueType();
10003 return DAG.getSelect(
10004 DL, ResVT, NoNaNs, Res,
10006 ResVT));
10007}
10008
10009SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10010 SelectionDAG &DAG) const {
10011 SDValue Vec = Op.getOperand(0);
10012 SDValue SubVec = Op.getOperand(1);
10013 MVT VecVT = Vec.getSimpleValueType();
10014 MVT SubVecVT = SubVec.getSimpleValueType();
10015
10016 SDLoc DL(Op);
10017 MVT XLenVT = Subtarget.getXLenVT();
10018 unsigned OrigIdx = Op.getConstantOperandVal(2);
10019 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10020
10021 // We don't have the ability to slide mask vectors up indexed by their i1
10022 // elements; the smallest we can do is i8. Often we are able to bitcast to
10023 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10024 // into a scalable one, we might not necessarily have enough scalable
10025 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10026 if (SubVecVT.getVectorElementType() == MVT::i1 &&
10027 (OrigIdx != 0 || !Vec.isUndef())) {
10028 if (VecVT.getVectorMinNumElements() >= 8 &&
10029 SubVecVT.getVectorMinNumElements() >= 8) {
10030 assert(OrigIdx % 8 == 0 && "Invalid index");
10031 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10032 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10033 "Unexpected mask vector lowering");
10034 OrigIdx /= 8;
10035 SubVecVT =
10036 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10037 SubVecVT.isScalableVector());
10038 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10039 VecVT.isScalableVector());
10040 Vec = DAG.getBitcast(VecVT, Vec);
10041 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10042 } else {
10043 // We can't slide this mask vector up indexed by its i1 elements.
10044 // This poses a problem when we wish to insert a scalable vector which
10045 // can't be re-expressed as a larger type. Just choose the slow path and
10046 // extend to a larger type, then truncate back down.
10047 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10048 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10049 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10050 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10051 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10052 Op.getOperand(2));
10053 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10054 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10055 }
10056 }
10057
10058 // If the subvector vector is a fixed-length type and we don't know VLEN
10059 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10060 // don't know which register of a LMUL group contains the specific subvector
10061 // as we only know the minimum register size. Therefore we must slide the
10062 // vector group up the full amount.
10063 const auto VLen = Subtarget.getRealVLen();
10064 if (SubVecVT.isFixedLengthVector() && !VLen) {
10065 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
10066 return Op;
10067 MVT ContainerVT = VecVT;
10068 if (VecVT.isFixedLengthVector()) {
10069 ContainerVT = getContainerForFixedLengthVector(VecVT);
10070 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10071 }
10072
10073 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
10074 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10075 DAG.getUNDEF(ContainerVT), SubVec,
10076 DAG.getVectorIdxConstant(0, DL));
10077 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10078 return DAG.getBitcast(Op.getValueType(), SubVec);
10079 }
10080
10081 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10082 DAG.getUNDEF(ContainerVT), SubVec,
10083 DAG.getVectorIdxConstant(0, DL));
10084 SDValue Mask =
10085 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10086 // Set the vector length to only the number of elements we care about. Note
10087 // that for slideup this includes the offset.
10088 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10089 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
10090
10091 // Use tail agnostic policy if we're inserting over Vec's tail.
10093 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10094 Policy = RISCVII::TAIL_AGNOSTIC;
10095
10096 // If we're inserting into the lowest elements, use a tail undisturbed
10097 // vmv.v.v.
10098 if (OrigIdx == 0) {
10099 SubVec =
10100 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10101 } else {
10102 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10103 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10104 SlideupAmt, Mask, VL, Policy);
10105 }
10106
10107 if (VecVT.isFixedLengthVector())
10108 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10109 return DAG.getBitcast(Op.getValueType(), SubVec);
10110 }
10111
10112 MVT ContainerVecVT = VecVT;
10113 if (VecVT.isFixedLengthVector()) {
10114 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10115 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10116 }
10117
10118 MVT ContainerSubVecVT = SubVecVT;
10119 if (SubVecVT.isFixedLengthVector()) {
10120 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10121 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10122 }
10123
10124 unsigned SubRegIdx;
10125 ElementCount RemIdx;
10126 // insert_subvector scales the index by vscale if the subvector is scalable,
10127 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10128 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10129 if (SubVecVT.isFixedLengthVector()) {
10130 assert(VLen);
10131 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10132 auto Decompose =
10134 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10135 SubRegIdx = Decompose.first;
10136 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10137 (OrigIdx % Vscale));
10138 } else {
10139 auto Decompose =
10141 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10142 SubRegIdx = Decompose.first;
10143 RemIdx = ElementCount::getScalable(Decompose.second);
10144 }
10145
10148 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10149 bool ExactlyVecRegSized =
10150 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10151 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10152
10153 // 1. If the Idx has been completely eliminated and this subvector's size is
10154 // a vector register or a multiple thereof, or the surrounding elements are
10155 // undef, then this is a subvector insert which naturally aligns to a vector
10156 // register. These can easily be handled using subregister manipulation.
10157 // 2. If the subvector isn't an exact multiple of a valid register group size,
10158 // then the insertion must preserve the undisturbed elements of the register.
10159 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10160 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10161 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10162 // of that LMUL=1 type back into the larger vector (resolving to another
10163 // subregister operation). See below for how our VSLIDEUP works. We go via a
10164 // LMUL=1 type to avoid allocating a large register group to hold our
10165 // subvector.
10166 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10167 if (SubVecVT.isFixedLengthVector()) {
10168 // We may get NoSubRegister if inserting at index 0 and the subvec
10169 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10170 if (SubRegIdx == RISCV::NoSubRegister) {
10171 assert(OrigIdx == 0);
10172 return Op;
10173 }
10174
10175 SDValue Insert =
10176 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
10177 if (VecVT.isFixedLengthVector())
10178 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10179 return Insert;
10180 }
10181 return Op;
10182 }
10183
10184 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10185 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10186 // (in our case undisturbed). This means we can set up a subvector insertion
10187 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10188 // size of the subvector.
10189 MVT InterSubVT = ContainerVecVT;
10190 SDValue AlignedExtract = Vec;
10191 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10192 if (SubVecVT.isFixedLengthVector())
10193 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10194 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10195 InterSubVT = getLMUL1VT(ContainerVecVT);
10196 // Extract a subvector equal to the nearest full vector register type. This
10197 // should resolve to a EXTRACT_SUBREG instruction.
10198 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10199 DAG.getVectorIdxConstant(AlignedIdx, DL));
10200 }
10201
10202 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10203 DAG.getUNDEF(InterSubVT), SubVec,
10204 DAG.getVectorIdxConstant(0, DL));
10205
10206 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10207
10208 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10209 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10210
10211 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10213 if (Subtarget.expandVScale(EndIndex) ==
10214 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10215 Policy = RISCVII::TAIL_AGNOSTIC;
10216
10217 // If we're inserting into the lowest elements, use a tail undisturbed
10218 // vmv.v.v.
10219 if (RemIdx.isZero()) {
10220 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10221 SubVec, VL);
10222 } else {
10223 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10224
10225 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10226 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10227
10228 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10229 SlideupAmt, Mask, VL, Policy);
10230 }
10231
10232 // If required, insert this subvector back into the correct vector register.
10233 // This should resolve to an INSERT_SUBREG instruction.
10234 if (ContainerVecVT.bitsGT(InterSubVT))
10235 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10236 DAG.getVectorIdxConstant(AlignedIdx, DL));
10237
10238 if (VecVT.isFixedLengthVector())
10239 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10240
10241 // We might have bitcast from a mask type: cast back to the original type if
10242 // required.
10243 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10244}
10245
10246SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10247 SelectionDAG &DAG) const {
10248 SDValue Vec = Op.getOperand(0);
10249 MVT SubVecVT = Op.getSimpleValueType();
10250 MVT VecVT = Vec.getSimpleValueType();
10251
10252 SDLoc DL(Op);
10253 MVT XLenVT = Subtarget.getXLenVT();
10254 unsigned OrigIdx = Op.getConstantOperandVal(1);
10255 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10256
10257 // We don't have the ability to slide mask vectors down indexed by their i1
10258 // elements; the smallest we can do is i8. Often we are able to bitcast to
10259 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10260 // from a scalable one, we might not necessarily have enough scalable
10261 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10262 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
10263 if (VecVT.getVectorMinNumElements() >= 8 &&
10264 SubVecVT.getVectorMinNumElements() >= 8) {
10265 assert(OrigIdx % 8 == 0 && "Invalid index");
10266 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10267 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10268 "Unexpected mask vector lowering");
10269 OrigIdx /= 8;
10270 SubVecVT =
10271 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10272 SubVecVT.isScalableVector());
10273 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10274 VecVT.isScalableVector());
10275 Vec = DAG.getBitcast(VecVT, Vec);
10276 } else {
10277 // We can't slide this mask vector down, indexed by its i1 elements.
10278 // This poses a problem when we wish to extract a scalable vector which
10279 // can't be re-expressed as a larger type. Just choose the slow path and
10280 // extend to a larger type, then truncate back down.
10281 // TODO: We could probably improve this when extracting certain fixed
10282 // from fixed, where we can extract as i8 and shift the correct element
10283 // right to reach the desired subvector?
10284 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10285 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10286 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10287 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10288 Op.getOperand(1));
10289 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10290 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10291 }
10292 }
10293
10294 // With an index of 0 this is a cast-like subvector, which can be performed
10295 // with subregister operations.
10296 if (OrigIdx == 0)
10297 return Op;
10298
10299 const auto VLen = Subtarget.getRealVLen();
10300
10301 // If the subvector vector is a fixed-length type and we don't know VLEN
10302 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10303 // don't know which register of a LMUL group contains the specific subvector
10304 // as we only know the minimum register size. Therefore we must slide the
10305 // vector group down the full amount.
10306 if (SubVecVT.isFixedLengthVector() && !VLen) {
10307 MVT ContainerVT = VecVT;
10308 if (VecVT.isFixedLengthVector()) {
10309 ContainerVT = getContainerForFixedLengthVector(VecVT);
10310 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10311 }
10312
10313 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10314 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10315 if (auto ShrunkVT =
10316 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10317 ContainerVT = *ShrunkVT;
10318 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10319 DAG.getVectorIdxConstant(0, DL));
10320 }
10321
10322 SDValue Mask =
10323 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10324 // Set the vector length to only the number of elements we care about. This
10325 // avoids sliding down elements we're going to discard straight away.
10326 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
10327 Subtarget);
10328 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10329 SDValue Slidedown =
10330 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10331 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10332 // Now we can use a cast-like subvector extract to get the result.
10333 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10334 DAG.getVectorIdxConstant(0, DL));
10335 return DAG.getBitcast(Op.getValueType(), Slidedown);
10336 }
10337
10338 if (VecVT.isFixedLengthVector()) {
10339 VecVT = getContainerForFixedLengthVector(VecVT);
10340 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10341 }
10342
10343 MVT ContainerSubVecVT = SubVecVT;
10344 if (SubVecVT.isFixedLengthVector())
10345 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10346
10347 unsigned SubRegIdx;
10348 ElementCount RemIdx;
10349 // extract_subvector scales the index by vscale if the subvector is scalable,
10350 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10351 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10352 if (SubVecVT.isFixedLengthVector()) {
10353 assert(VLen);
10354 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10355 auto Decompose =
10357 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10358 SubRegIdx = Decompose.first;
10359 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10360 (OrigIdx % Vscale));
10361 } else {
10362 auto Decompose =
10364 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10365 SubRegIdx = Decompose.first;
10366 RemIdx = ElementCount::getScalable(Decompose.second);
10367 }
10368
10369 // If the Idx has been completely eliminated then this is a subvector extract
10370 // which naturally aligns to a vector register. These can easily be handled
10371 // using subregister manipulation.
10372 if (RemIdx.isZero()) {
10373 if (SubVecVT.isFixedLengthVector()) {
10374 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10375 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10376 }
10377 return Op;
10378 }
10379
10380 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10381 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10382 // divide exactly.
10383 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10384 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10385
10386 // If the vector type is an LMUL-group type, extract a subvector equal to the
10387 // nearest full vector register type.
10388 MVT InterSubVT = VecVT;
10389 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10390 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10391 // we should have successfully decomposed the extract into a subregister.
10392 assert(SubRegIdx != RISCV::NoSubRegister);
10393 InterSubVT = getLMUL1VT(VecVT);
10394 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10395 }
10396
10397 // Slide this vector register down by the desired number of elements in order
10398 // to place the desired subvector starting at element 0.
10399 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10400 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10401 if (SubVecVT.isFixedLengthVector())
10402 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10403 Subtarget);
10404 SDValue Slidedown =
10405 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10406 Vec, SlidedownAmt, Mask, VL);
10407
10408 // Now the vector is in the right position, extract our final subvector. This
10409 // should resolve to a COPY.
10410 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10411 DAG.getVectorIdxConstant(0, DL));
10412
10413 // We might have bitcast from a mask type: cast back to the original type if
10414 // required.
10415 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10416}
10417
10418// Widen a vector's operands to i8, then truncate its results back to the
10419// original type, typically i1. All operand and result types must be the same.
10421 SelectionDAG &DAG) {
10422 MVT VT = N.getSimpleValueType();
10423 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10425 for (SDValue Op : N->ops()) {
10426 assert(Op.getSimpleValueType() == VT &&
10427 "Operands and result must be same type");
10428 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10429 }
10430
10431 unsigned NumVals = N->getNumValues();
10432
10434 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10435 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10436 SmallVector<SDValue, 4> TruncVals;
10437 for (unsigned I = 0; I < NumVals; I++) {
10438 TruncVals.push_back(
10439 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10440 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10441 }
10442
10443 if (TruncVals.size() > 1)
10444 return DAG.getMergeValues(TruncVals, DL);
10445 return TruncVals.front();
10446}
10447
10448SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10449 SelectionDAG &DAG) const {
10450 SDLoc DL(Op);
10451 MVT VecVT = Op.getSimpleValueType();
10452
10453 assert(VecVT.isScalableVector() &&
10454 "vector_interleave on non-scalable vector!");
10455
10456 // 1 bit element vectors need to be widened to e8
10457 if (VecVT.getVectorElementType() == MVT::i1)
10458 return widenVectorOpsToi8(Op, DL, DAG);
10459
10460 // If the VT is LMUL=8, we need to split and reassemble.
10461 if (VecVT.getSizeInBits().getKnownMinValue() ==
10462 (8 * RISCV::RVVBitsPerBlock)) {
10463 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10464 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10465 EVT SplitVT = Op0Lo.getValueType();
10466
10468 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10470 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10471
10472 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10473 ResLo.getValue(0), ResHi.getValue(0));
10474 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10475 ResHi.getValue(1));
10476 return DAG.getMergeValues({Even, Odd}, DL);
10477 }
10478
10479 // Concatenate the two vectors as one vector to deinterleave
10480 MVT ConcatVT =
10483 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10484 Op.getOperand(0), Op.getOperand(1));
10485
10486 // We want to operate on all lanes, so get the mask and VL and mask for it
10487 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10488 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10489
10490 // We can deinterleave through vnsrl.wi if the element type is smaller than
10491 // ELEN
10492 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10493 SDValue Even =
10494 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10495 SDValue Odd =
10496 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10497 return DAG.getMergeValues({Even, Odd}, DL);
10498 }
10499
10500 // For the indices, use the same SEW to avoid an extra vsetvli
10501 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10502 // Create a vector of even indices {0, 2, 4, ...}
10503 SDValue EvenIdx =
10504 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10505 // Create a vector of odd indices {1, 3, 5, ... }
10506 SDValue OddIdx =
10507 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10508
10509 // Gather the even and odd elements into two separate vectors
10510 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10511 Concat, EvenIdx, Passthru, Mask, VL);
10512 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10513 Concat, OddIdx, Passthru, Mask, VL);
10514
10515 // Extract the result half of the gather for even and odd
10516 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10517 DAG.getVectorIdxConstant(0, DL));
10518 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10519 DAG.getVectorIdxConstant(0, DL));
10520
10521 return DAG.getMergeValues({Even, Odd}, DL);
10522}
10523
10524SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10525 SelectionDAG &DAG) const {
10526 SDLoc DL(Op);
10527 MVT VecVT = Op.getSimpleValueType();
10528
10529 assert(VecVT.isScalableVector() &&
10530 "vector_interleave on non-scalable vector!");
10531
10532 // i1 vectors need to be widened to i8
10533 if (VecVT.getVectorElementType() == MVT::i1)
10534 return widenVectorOpsToi8(Op, DL, DAG);
10535
10536 MVT XLenVT = Subtarget.getXLenVT();
10537 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10538
10539 // If the VT is LMUL=8, we need to split and reassemble.
10540 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10541 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10542 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10543 EVT SplitVT = Op0Lo.getValueType();
10544
10546 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10548 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10549
10550 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10551 ResLo.getValue(0), ResLo.getValue(1));
10552 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10553 ResHi.getValue(0), ResHi.getValue(1));
10554 return DAG.getMergeValues({Lo, Hi}, DL);
10555 }
10556
10557 SDValue Interleaved;
10558
10559 // If the element type is smaller than ELEN, then we can interleave with
10560 // vwaddu.vv and vwmaccu.vx
10561 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10562 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10563 DAG, Subtarget);
10564 } else {
10565 // Otherwise, fallback to using vrgathere16.vv
10566 MVT ConcatVT =
10569 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10570 Op.getOperand(0), Op.getOperand(1));
10571
10572 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10573
10574 // 0 1 2 3 4 5 6 7 ...
10575 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10576
10577 // 1 1 1 1 1 1 1 1 ...
10578 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10579
10580 // 1 0 1 0 1 0 1 0 ...
10581 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10582 OddMask = DAG.getSetCC(
10583 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10584 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10586
10587 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10588
10589 // Build up the index vector for interleaving the concatenated vector
10590 // 0 0 1 1 2 2 3 3 ...
10591 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10592 // 0 n 1 n+1 2 n+2 3 n+3 ...
10593 Idx =
10594 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10595
10596 // Then perform the interleave
10597 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10598 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10599 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10600 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10601 }
10602
10603 // Extract the two halves from the interleaved result
10604 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10605 DAG.getVectorIdxConstant(0, DL));
10606 SDValue Hi = DAG.getNode(
10607 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10609
10610 return DAG.getMergeValues({Lo, Hi}, DL);
10611}
10612
10613// Lower step_vector to the vid instruction. Any non-identity step value must
10614// be accounted for my manual expansion.
10615SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10616 SelectionDAG &DAG) const {
10617 SDLoc DL(Op);
10618 MVT VT = Op.getSimpleValueType();
10619 assert(VT.isScalableVector() && "Expected scalable vector");
10620 MVT XLenVT = Subtarget.getXLenVT();
10621 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10622 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10623 uint64_t StepValImm = Op.getConstantOperandVal(0);
10624 if (StepValImm != 1) {
10625 if (isPowerOf2_64(StepValImm)) {
10626 SDValue StepVal =
10627 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10628 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10629 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10630 } else {
10631 SDValue StepVal = lowerScalarSplat(
10632 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10633 VL, VT, DL, DAG, Subtarget);
10634 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10635 }
10636 }
10637 return StepVec;
10638}
10639
10640// Implement vector_reverse using vrgather.vv with indices determined by
10641// subtracting the id of each element from (VLMAX-1). This will convert
10642// the indices like so:
10643// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10644// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10645SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10646 SelectionDAG &DAG) const {
10647 SDLoc DL(Op);
10648 MVT VecVT = Op.getSimpleValueType();
10649 if (VecVT.getVectorElementType() == MVT::i1) {
10650 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10651 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10652 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10653 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10654 }
10655 unsigned EltSize = VecVT.getScalarSizeInBits();
10656 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10657 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10658 unsigned MaxVLMAX =
10659 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10660
10661 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10662 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10663
10664 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10665 // to use vrgatherei16.vv.
10666 // TODO: It's also possible to use vrgatherei16.vv for other types to
10667 // decrease register width for the index calculation.
10668 if (MaxVLMAX > 256 && EltSize == 8) {
10669 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10670 // Reverse each half, then reassemble them in reverse order.
10671 // NOTE: It's also possible that after splitting that VLMAX no longer
10672 // requires vrgatherei16.vv.
10673 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10674 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10675 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10676 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10677 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10678 // Reassemble the low and high pieces reversed.
10679 // FIXME: This is a CONCAT_VECTORS.
10680 SDValue Res =
10681 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10682 DAG.getVectorIdxConstant(0, DL));
10683 return DAG.getNode(
10684 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10685 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10686 }
10687
10688 // Just promote the int type to i16 which will double the LMUL.
10689 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10690 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10691 }
10692
10693 MVT XLenVT = Subtarget.getXLenVT();
10694 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10695
10696 // Calculate VLMAX-1 for the desired SEW.
10697 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10698 computeVLMax(VecVT, DL, DAG),
10699 DAG.getConstant(1, DL, XLenVT));
10700
10701 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10702 bool IsRV32E64 =
10703 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10704 SDValue SplatVL;
10705 if (!IsRV32E64)
10706 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10707 else
10708 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10709 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10710
10711 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10712 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10713 DAG.getUNDEF(IntVT), Mask, VL);
10714
10715 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10716 DAG.getUNDEF(VecVT), Mask, VL);
10717}
10718
10719SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10720 SelectionDAG &DAG) const {
10721 SDLoc DL(Op);
10722 SDValue V1 = Op.getOperand(0);
10723 SDValue V2 = Op.getOperand(1);
10724 MVT XLenVT = Subtarget.getXLenVT();
10725 MVT VecVT = Op.getSimpleValueType();
10726
10727 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10728
10729 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10730 SDValue DownOffset, UpOffset;
10731 if (ImmValue >= 0) {
10732 // The operand is a TargetConstant, we need to rebuild it as a regular
10733 // constant.
10734 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10735 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10736 } else {
10737 // The operand is a TargetConstant, we need to rebuild it as a regular
10738 // constant rather than negating the original operand.
10739 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10740 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10741 }
10742
10743 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10744
10745 SDValue SlideDown =
10746 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10747 DownOffset, TrueMask, UpOffset);
10748 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10749 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10751}
10752
10753SDValue
10754RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10755 SelectionDAG &DAG) const {
10756 SDLoc DL(Op);
10757 auto *Load = cast<LoadSDNode>(Op);
10758
10760 Load->getMemoryVT(),
10761 *Load->getMemOperand()) &&
10762 "Expecting a correctly-aligned load");
10763
10764 MVT VT = Op.getSimpleValueType();
10765 MVT XLenVT = Subtarget.getXLenVT();
10766 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10767
10768 // If we know the exact VLEN and our fixed length vector completely fills
10769 // the container, use a whole register load instead.
10770 const auto [MinVLMAX, MaxVLMAX] =
10771 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10772 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10773 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10774 MachineMemOperand *MMO = Load->getMemOperand();
10775 SDValue NewLoad =
10776 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10777 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10778 MMO->getAAInfo(), MMO->getRanges());
10779 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10780 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10781 }
10782
10783 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10784
10785 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10786 SDValue IntID = DAG.getTargetConstant(
10787 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10788 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10789 if (!IsMaskOp)
10790 Ops.push_back(DAG.getUNDEF(ContainerVT));
10791 Ops.push_back(Load->getBasePtr());
10792 Ops.push_back(VL);
10793 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10794 SDValue NewLoad =
10796 Load->getMemoryVT(), Load->getMemOperand());
10797
10798 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10799 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10800}
10801
10802SDValue
10803RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10804 SelectionDAG &DAG) const {
10805 SDLoc DL(Op);
10806 auto *Store = cast<StoreSDNode>(Op);
10807
10809 Store->getMemoryVT(),
10810 *Store->getMemOperand()) &&
10811 "Expecting a correctly-aligned store");
10812
10813 SDValue StoreVal = Store->getValue();
10814 MVT VT = StoreVal.getSimpleValueType();
10815 MVT XLenVT = Subtarget.getXLenVT();
10816
10817 // If the size less than a byte, we need to pad with zeros to make a byte.
10818 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10819 VT = MVT::v8i1;
10820 StoreVal =
10821 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10822 StoreVal, DAG.getVectorIdxConstant(0, DL));
10823 }
10824
10825 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10826
10827 SDValue NewValue =
10828 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10829
10830
10831 // If we know the exact VLEN and our fixed length vector completely fills
10832 // the container, use a whole register store instead.
10833 const auto [MinVLMAX, MaxVLMAX] =
10834 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10835 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10836 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10837 MachineMemOperand *MMO = Store->getMemOperand();
10838 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10839 MMO->getPointerInfo(), MMO->getBaseAlign(),
10840 MMO->getFlags(), MMO->getAAInfo());
10841 }
10842
10843 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10844 Subtarget);
10845
10846 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10847 SDValue IntID = DAG.getTargetConstant(
10848 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10849 return DAG.getMemIntrinsicNode(
10850 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10851 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10852 Store->getMemoryVT(), Store->getMemOperand());
10853}
10854
10855SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10856 SelectionDAG &DAG) const {
10857 SDLoc DL(Op);
10858 MVT VT = Op.getSimpleValueType();
10859
10860 const auto *MemSD = cast<MemSDNode>(Op);
10861 EVT MemVT = MemSD->getMemoryVT();
10862 MachineMemOperand *MMO = MemSD->getMemOperand();
10863 SDValue Chain = MemSD->getChain();
10864 SDValue BasePtr = MemSD->getBasePtr();
10865
10866 SDValue Mask, PassThru, VL;
10867 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10868 Mask = VPLoad->getMask();
10869 PassThru = DAG.getUNDEF(VT);
10870 VL = VPLoad->getVectorLength();
10871 } else {
10872 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10873 Mask = MLoad->getMask();
10874 PassThru = MLoad->getPassThru();
10875 }
10876
10877 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10878
10879 MVT XLenVT = Subtarget.getXLenVT();
10880
10881 MVT ContainerVT = VT;
10882 if (VT.isFixedLengthVector()) {
10883 ContainerVT = getContainerForFixedLengthVector(VT);
10884 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10885 if (!IsUnmasked) {
10886 MVT MaskVT = getMaskTypeFor(ContainerVT);
10887 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10888 }
10889 }
10890
10891 if (!VL)
10892 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10893
10894 unsigned IntID =
10895 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10896 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10897 if (IsUnmasked)
10898 Ops.push_back(DAG.getUNDEF(ContainerVT));
10899 else
10900 Ops.push_back(PassThru);
10901 Ops.push_back(BasePtr);
10902 if (!IsUnmasked)
10903 Ops.push_back(Mask);
10904 Ops.push_back(VL);
10905 if (!IsUnmasked)
10907
10908 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10909
10910 SDValue Result =
10911 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10912 Chain = Result.getValue(1);
10913
10914 if (VT.isFixedLengthVector())
10915 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10916
10917 return DAG.getMergeValues({Result, Chain}, DL);
10918}
10919
10920SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10921 SelectionDAG &DAG) const {
10922 SDLoc DL(Op);
10923
10924 const auto *MemSD = cast<MemSDNode>(Op);
10925 EVT MemVT = MemSD->getMemoryVT();
10926 MachineMemOperand *MMO = MemSD->getMemOperand();
10927 SDValue Chain = MemSD->getChain();
10928 SDValue BasePtr = MemSD->getBasePtr();
10929 SDValue Val, Mask, VL;
10930
10931 bool IsCompressingStore = false;
10932 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10933 Val = VPStore->getValue();
10934 Mask = VPStore->getMask();
10935 VL = VPStore->getVectorLength();
10936 } else {
10937 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10938 Val = MStore->getValue();
10939 Mask = MStore->getMask();
10940 IsCompressingStore = MStore->isCompressingStore();
10941 }
10942
10943 bool IsUnmasked =
10944 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10945
10946 MVT VT = Val.getSimpleValueType();
10947 MVT XLenVT = Subtarget.getXLenVT();
10948
10949 MVT ContainerVT = VT;
10950 if (VT.isFixedLengthVector()) {
10951 ContainerVT = getContainerForFixedLengthVector(VT);
10952
10953 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10954 if (!IsUnmasked || IsCompressingStore) {
10955 MVT MaskVT = getMaskTypeFor(ContainerVT);
10956 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10957 }
10958 }
10959
10960 if (!VL)
10961 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10962
10963 if (IsCompressingStore) {
10964 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10965 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10966 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10967 VL =
10968 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10969 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10970 }
10971
10972 unsigned IntID =
10973 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10974 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10975 Ops.push_back(Val);
10976 Ops.push_back(BasePtr);
10977 if (!IsUnmasked)
10978 Ops.push_back(Mask);
10979 Ops.push_back(VL);
10980
10982 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10983}
10984
10985SDValue
10986RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10987 SelectionDAG &DAG) const {
10988 MVT InVT = Op.getOperand(0).getSimpleValueType();
10989 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10990
10991 MVT VT = Op.getSimpleValueType();
10992
10993 SDValue Op1 =
10994 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10995 SDValue Op2 =
10996 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10997
10998 SDLoc DL(Op);
10999 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11000 DAG, Subtarget);
11001 MVT MaskVT = getMaskTypeFor(ContainerVT);
11002
11003 SDValue Cmp =
11004 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11005 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11006
11007 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11008}
11009
11010SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11011 SelectionDAG &DAG) const {
11012 unsigned Opc = Op.getOpcode();
11013 SDLoc DL(Op);
11014 SDValue Chain = Op.getOperand(0);
11015 SDValue Op1 = Op.getOperand(1);
11016 SDValue Op2 = Op.getOperand(2);
11017 SDValue CC = Op.getOperand(3);
11018 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11019 MVT VT = Op.getSimpleValueType();
11020 MVT InVT = Op1.getSimpleValueType();
11021
11022 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11023 // condition code.
11024 if (Opc == ISD::STRICT_FSETCCS) {
11025 // Expand strict_fsetccs(x, oeq) to
11026 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11027 SDVTList VTList = Op->getVTList();
11028 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11029 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11030 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11031 Op2, OLECCVal);
11032 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11033 Op1, OLECCVal);
11034 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11035 Tmp1.getValue(1), Tmp2.getValue(1));
11036 // Tmp1 and Tmp2 might be the same node.
11037 if (Tmp1 != Tmp2)
11038 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11039 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11040 }
11041
11042 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11043 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11044 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11045 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11046 Op2, OEQCCVal);
11047 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11048 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11049 }
11050 }
11051
11052 MVT ContainerInVT = InVT;
11053 if (InVT.isFixedLengthVector()) {
11054 ContainerInVT = getContainerForFixedLengthVector(InVT);
11055 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11056 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11057 }
11058 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11059
11060 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11061
11062 SDValue Res;
11063 if (Opc == ISD::STRICT_FSETCC &&
11064 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11065 CCVal == ISD::SETOLE)) {
11066 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11067 // active when both input elements are ordered.
11068 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11069 SDValue OrderMask1 = DAG.getNode(
11070 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11071 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11072 True, VL});
11073 SDValue OrderMask2 = DAG.getNode(
11074 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11075 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11076 True, VL});
11077 Mask =
11078 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11079 // Use Mask as the merge operand to let the result be 0 if either of the
11080 // inputs is unordered.
11082 DAG.getVTList(MaskVT, MVT::Other),
11083 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11084 } else {
11085 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11087 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11088 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11089 }
11090
11091 if (VT.isFixedLengthVector()) {
11092 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11093 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11094 }
11095 return Res;
11096}
11097
11098// Lower vector ABS to smax(X, sub(0, X)).
11099SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11100 SDLoc DL(Op);
11101 MVT VT = Op.getSimpleValueType();
11102 SDValue X = Op.getOperand(0);
11103
11104 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11105 "Unexpected type for ISD::ABS");
11106
11107 MVT ContainerVT = VT;
11108 if (VT.isFixedLengthVector()) {
11109 ContainerVT = getContainerForFixedLengthVector(VT);
11110 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11111 }
11112
11113 SDValue Mask, VL;
11114 if (Op->getOpcode() == ISD::VP_ABS) {
11115 Mask = Op->getOperand(1);
11116 if (VT.isFixedLengthVector())
11117 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11118 Subtarget);
11119 VL = Op->getOperand(2);
11120 } else
11121 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11122
11123 SDValue SplatZero = DAG.getNode(
11124 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11125 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11126 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11127 DAG.getUNDEF(ContainerVT), Mask, VL);
11128 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11129 DAG.getUNDEF(ContainerVT), Mask, VL);
11130
11131 if (VT.isFixedLengthVector())
11132 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11133 return Max;
11134}
11135
11136SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11137 SDValue Op, SelectionDAG &DAG) const {
11138 SDLoc DL(Op);
11139 MVT VT = Op.getSimpleValueType();
11140 SDValue Mag = Op.getOperand(0);
11141 SDValue Sign = Op.getOperand(1);
11142 assert(Mag.getValueType() == Sign.getValueType() &&
11143 "Can only handle COPYSIGN with matching types.");
11144
11145 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11146 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11147 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11148
11149 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11150
11151 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11152 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11153
11154 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11155}
11156
11157SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11158 SDValue Op, SelectionDAG &DAG) const {
11159 MVT VT = Op.getSimpleValueType();
11160 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11161
11162 MVT I1ContainerVT =
11163 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11164
11165 SDValue CC =
11166 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11167 SDValue Op1 =
11168 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11169 SDValue Op2 =
11170 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11171
11172 SDLoc DL(Op);
11173 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11174
11175 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11176 Op2, DAG.getUNDEF(ContainerVT), VL);
11177
11178 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11179}
11180
11181SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11182 SelectionDAG &DAG) const {
11183 unsigned NewOpc = getRISCVVLOp(Op);
11184 bool HasMergeOp = hasMergeOp(NewOpc);
11185 bool HasMask = hasMaskOp(NewOpc);
11186
11187 MVT VT = Op.getSimpleValueType();
11188 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11189
11190 // Create list of operands by converting existing ones to scalable types.
11192 for (const SDValue &V : Op->op_values()) {
11193 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11194
11195 // Pass through non-vector operands.
11196 if (!V.getValueType().isVector()) {
11197 Ops.push_back(V);
11198 continue;
11199 }
11200
11201 // "cast" fixed length vector to a scalable vector.
11202 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11203 "Only fixed length vectors are supported!");
11204 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11205 }
11206
11207 SDLoc DL(Op);
11208 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11209 if (HasMergeOp)
11210 Ops.push_back(DAG.getUNDEF(ContainerVT));
11211 if (HasMask)
11212 Ops.push_back(Mask);
11213 Ops.push_back(VL);
11214
11215 // StrictFP operations have two result values. Their lowered result should
11216 // have same result count.
11217 if (Op->isStrictFPOpcode()) {
11218 SDValue ScalableRes =
11219 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11220 Op->getFlags());
11221 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11222 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11223 }
11224
11225 SDValue ScalableRes =
11226 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11227 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11228}
11229
11230// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11231// * Operands of each node are assumed to be in the same order.
11232// * The EVL operand is promoted from i32 to i64 on RV64.
11233// * Fixed-length vectors are converted to their scalable-vector container
11234// types.
11235SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11236 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11237 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
11238
11239 SDLoc DL(Op);
11240 MVT VT = Op.getSimpleValueType();
11242
11243 MVT ContainerVT = VT;
11244 if (VT.isFixedLengthVector())
11245 ContainerVT = getContainerForFixedLengthVector(VT);
11246
11247 for (const auto &OpIdx : enumerate(Op->ops())) {
11248 SDValue V = OpIdx.value();
11249 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11250 // Add dummy merge value before the mask. Or if there isn't a mask, before
11251 // EVL.
11252 if (HasMergeOp) {
11253 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11254 if (MaskIdx) {
11255 if (*MaskIdx == OpIdx.index())
11256 Ops.push_back(DAG.getUNDEF(ContainerVT));
11257 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11258 OpIdx.index()) {
11259 if (Op.getOpcode() == ISD::VP_MERGE) {
11260 // For VP_MERGE, copy the false operand instead of an undef value.
11261 Ops.push_back(Ops.back());
11262 } else {
11263 assert(Op.getOpcode() == ISD::VP_SELECT);
11264 // For VP_SELECT, add an undef value.
11265 Ops.push_back(DAG.getUNDEF(ContainerVT));
11266 }
11267 }
11268 }
11269 // Pass through operands which aren't fixed-length vectors.
11270 if (!V.getValueType().isFixedLengthVector()) {
11271 Ops.push_back(V);
11272 continue;
11273 }
11274 // "cast" fixed length vector to a scalable vector.
11275 MVT OpVT = V.getSimpleValueType();
11276 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11277 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11278 "Only fixed length vectors are supported!");
11279 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11280 }
11281
11282 if (!VT.isFixedLengthVector())
11283 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11284
11285 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11286
11287 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11288}
11289
11290SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11291 SelectionDAG &DAG) const {
11292 SDLoc DL(Op);
11293 MVT VT = Op.getSimpleValueType();
11294
11295 SDValue Src = Op.getOperand(0);
11296 // NOTE: Mask is dropped.
11297 SDValue VL = Op.getOperand(2);
11298
11299 MVT ContainerVT = VT;
11300 if (VT.isFixedLengthVector()) {
11301 ContainerVT = getContainerForFixedLengthVector(VT);
11302 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11303 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11304 }
11305
11306 MVT XLenVT = Subtarget.getXLenVT();
11307 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11308 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11309 DAG.getUNDEF(ContainerVT), Zero, VL);
11310
11311 SDValue SplatValue = DAG.getConstant(
11312 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11313 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11314 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11315
11316 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11317 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11318 if (!VT.isFixedLengthVector())
11319 return Result;
11320 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11321}
11322
11323SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11324 SelectionDAG &DAG) const {
11325 SDLoc DL(Op);
11326 MVT VT = Op.getSimpleValueType();
11327
11328 SDValue Op1 = Op.getOperand(0);
11329 SDValue Op2 = Op.getOperand(1);
11330 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11331 // NOTE: Mask is dropped.
11332 SDValue VL = Op.getOperand(4);
11333
11334 MVT ContainerVT = VT;
11335 if (VT.isFixedLengthVector()) {
11336 ContainerVT = getContainerForFixedLengthVector(VT);
11337 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11338 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11339 }
11340
11342 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11343
11344 switch (Condition) {
11345 default:
11346 break;
11347 // X != Y --> (X^Y)
11348 case ISD::SETNE:
11349 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11350 break;
11351 // X == Y --> ~(X^Y)
11352 case ISD::SETEQ: {
11353 SDValue Temp =
11354 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11355 Result =
11356 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11357 break;
11358 }
11359 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11360 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11361 case ISD::SETGT:
11362 case ISD::SETULT: {
11363 SDValue Temp =
11364 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11365 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11366 break;
11367 }
11368 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11369 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11370 case ISD::SETLT:
11371 case ISD::SETUGT: {
11372 SDValue Temp =
11373 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11374 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11375 break;
11376 }
11377 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11378 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11379 case ISD::SETGE:
11380 case ISD::SETULE: {
11381 SDValue Temp =
11382 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11383 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11384 break;
11385 }
11386 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11387 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11388 case ISD::SETLE:
11389 case ISD::SETUGE: {
11390 SDValue Temp =
11391 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11392 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11393 break;
11394 }
11395 }
11396
11397 if (!VT.isFixedLengthVector())
11398 return Result;
11399 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11400}
11401
11402// Lower Floating-Point/Integer Type-Convert VP SDNodes
11403SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 SDLoc DL(Op);
11406
11407 SDValue Src = Op.getOperand(0);
11408 SDValue Mask = Op.getOperand(1);
11409 SDValue VL = Op.getOperand(2);
11410 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11411
11412 MVT DstVT = Op.getSimpleValueType();
11413 MVT SrcVT = Src.getSimpleValueType();
11414 if (DstVT.isFixedLengthVector()) {
11415 DstVT = getContainerForFixedLengthVector(DstVT);
11416 SrcVT = getContainerForFixedLengthVector(SrcVT);
11417 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11418 MVT MaskVT = getMaskTypeFor(DstVT);
11419 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11420 }
11421
11422 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11423 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11424
11426 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11427 if (SrcVT.isInteger()) {
11428 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11429
11430 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11433
11434 // Do we need to do any pre-widening before converting?
11435 if (SrcEltSize == 1) {
11436 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11437 MVT XLenVT = Subtarget.getXLenVT();
11438 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11439 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11440 DAG.getUNDEF(IntVT), Zero, VL);
11441 SDValue One = DAG.getConstant(
11442 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11443 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11444 DAG.getUNDEF(IntVT), One, VL);
11445 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11446 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11447 } else if (DstEltSize > (2 * SrcEltSize)) {
11448 // Widen before converting.
11449 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11450 DstVT.getVectorElementCount());
11451 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11452 }
11453
11454 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11455 } else {
11456 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11457 "Wrong input/output vector types");
11458
11459 // Convert f16 to f32 then convert f32 to i64.
11460 if (DstEltSize > (2 * SrcEltSize)) {
11461 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11462 MVT InterimFVT =
11463 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11464 Src =
11465 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11466 }
11467
11468 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11469 }
11470 } else { // Narrowing + Conversion
11471 if (SrcVT.isInteger()) {
11472 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11473 // First do a narrowing convert to an FP type half the size, then round
11474 // the FP type to a small FP type if needed.
11475
11476 MVT InterimFVT = DstVT;
11477 if (SrcEltSize > (2 * DstEltSize)) {
11478 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11479 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11480 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11481 }
11482
11483 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11484
11485 if (InterimFVT != DstVT) {
11486 Src = Result;
11487 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11488 }
11489 } else {
11490 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11491 "Wrong input/output vector types");
11492 // First do a narrowing conversion to an integer half the size, then
11493 // truncate if needed.
11494
11495 if (DstEltSize == 1) {
11496 // First convert to the same size integer, then convert to mask using
11497 // setcc.
11498 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11499 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11500 DstVT.getVectorElementCount());
11501 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11502
11503 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11504 // otherwise the conversion was undefined.
11505 MVT XLenVT = Subtarget.getXLenVT();
11506 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11507 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11508 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11509 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11510 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11511 DAG.getUNDEF(DstVT), Mask, VL});
11512 } else {
11513 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11514 DstVT.getVectorElementCount());
11515
11516 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11517
11518 while (InterimIVT != DstVT) {
11519 SrcEltSize /= 2;
11520 Src = Result;
11521 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11522 DstVT.getVectorElementCount());
11523 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11524 Src, Mask, VL);
11525 }
11526 }
11527 }
11528 }
11529
11530 MVT VT = Op.getSimpleValueType();
11531 if (!VT.isFixedLengthVector())
11532 return Result;
11533 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11534}
11535
11536SDValue
11537RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11538 SelectionDAG &DAG) const {
11539 SDLoc DL(Op);
11540
11541 SDValue Op1 = Op.getOperand(0);
11542 SDValue Op2 = Op.getOperand(1);
11543 SDValue Offset = Op.getOperand(2);
11544 SDValue Mask = Op.getOperand(3);
11545 SDValue EVL1 = Op.getOperand(4);
11546 SDValue EVL2 = Op.getOperand(5);
11547
11548 const MVT XLenVT = Subtarget.getXLenVT();
11549 MVT VT = Op.getSimpleValueType();
11550 MVT ContainerVT = VT;
11551 if (VT.isFixedLengthVector()) {
11552 ContainerVT = getContainerForFixedLengthVector(VT);
11553 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11554 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11555 MVT MaskVT = getMaskTypeFor(ContainerVT);
11556 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11557 }
11558
11559 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11560 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11561
11562 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11563 if (IsMaskVector) {
11564 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11565
11566 // Expand input operands
11567 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11568 DAG.getUNDEF(ContainerVT),
11569 DAG.getConstant(1, DL, XLenVT), EVL1);
11570 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11571 DAG.getUNDEF(ContainerVT),
11572 DAG.getConstant(0, DL, XLenVT), EVL1);
11573 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11574 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11575
11576 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11577 DAG.getUNDEF(ContainerVT),
11578 DAG.getConstant(1, DL, XLenVT), EVL2);
11579 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11580 DAG.getUNDEF(ContainerVT),
11581 DAG.getConstant(0, DL, XLenVT), EVL2);
11582 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11583 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11584 }
11585
11586 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11587 SDValue DownOffset, UpOffset;
11588 if (ImmValue >= 0) {
11589 // The operand is a TargetConstant, we need to rebuild it as a regular
11590 // constant.
11591 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11592 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11593 } else {
11594 // The operand is a TargetConstant, we need to rebuild it as a regular
11595 // constant rather than negating the original operand.
11596 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11597 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11598 }
11599
11600 SDValue SlideDown =
11601 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11602 Op1, DownOffset, Mask, UpOffset);
11603 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11604 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11605
11606 if (IsMaskVector) {
11607 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11608 Result = DAG.getNode(
11609 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11610 {Result, DAG.getConstant(0, DL, ContainerVT),
11611 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11612 Mask, EVL2});
11613 }
11614
11615 if (!VT.isFixedLengthVector())
11616 return Result;
11617 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11618}
11619
11620SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
11621 SelectionDAG &DAG) const {
11622 SDLoc DL(Op);
11623 SDValue Val = Op.getOperand(0);
11624 SDValue Mask = Op.getOperand(1);
11625 SDValue VL = Op.getOperand(2);
11626 MVT VT = Op.getSimpleValueType();
11627
11628 MVT ContainerVT = VT;
11629 if (VT.isFixedLengthVector()) {
11630 ContainerVT = getContainerForFixedLengthVector(VT);
11631 MVT MaskVT = getMaskTypeFor(ContainerVT);
11632 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11633 }
11634
11635 SDValue Result =
11636 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
11637
11638 if (!VT.isFixedLengthVector())
11639 return Result;
11640 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11641}
11642
11643SDValue
11644RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11645 SelectionDAG &DAG) const {
11646 SDLoc DL(Op);
11647 MVT VT = Op.getSimpleValueType();
11648 MVT XLenVT = Subtarget.getXLenVT();
11649
11650 SDValue Op1 = Op.getOperand(0);
11651 SDValue Mask = Op.getOperand(1);
11652 SDValue EVL = Op.getOperand(2);
11653
11654 MVT ContainerVT = VT;
11655 if (VT.isFixedLengthVector()) {
11656 ContainerVT = getContainerForFixedLengthVector(VT);
11657 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11658 MVT MaskVT = getMaskTypeFor(ContainerVT);
11659 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11660 }
11661
11662 MVT GatherVT = ContainerVT;
11663 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11664 // Check if we are working with mask vectors
11665 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11666 if (IsMaskVector) {
11667 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11668
11669 // Expand input operand
11670 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11671 DAG.getUNDEF(IndicesVT),
11672 DAG.getConstant(1, DL, XLenVT), EVL);
11673 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11674 DAG.getUNDEF(IndicesVT),
11675 DAG.getConstant(0, DL, XLenVT), EVL);
11676 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11677 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11678 }
11679
11680 unsigned EltSize = GatherVT.getScalarSizeInBits();
11681 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11682 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11683 unsigned MaxVLMAX =
11684 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11685
11686 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11687 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11688 // to use vrgatherei16.vv.
11689 // TODO: It's also possible to use vrgatherei16.vv for other types to
11690 // decrease register width for the index calculation.
11691 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11692 if (MaxVLMAX > 256 && EltSize == 8) {
11693 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11694 // Split the vector in half and reverse each half using a full register
11695 // reverse.
11696 // Swap the halves and concatenate them.
11697 // Slide the concatenated result by (VLMax - VL).
11698 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11699 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11700 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11701
11702 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11703 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11704
11705 // Reassemble the low and high pieces reversed.
11706 // NOTE: this Result is unmasked (because we do not need masks for
11707 // shuffles). If in the future this has to change, we can use a SELECT_VL
11708 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11709 SDValue Result =
11710 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11711
11712 // Slide off any elements from past EVL that were reversed into the low
11713 // elements.
11714 unsigned MinElts = GatherVT.getVectorMinNumElements();
11715 SDValue VLMax =
11716 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11717 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11718
11719 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11720 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11721
11722 if (IsMaskVector) {
11723 // Truncate Result back to a mask vector
11724 Result =
11725 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11726 {Result, DAG.getConstant(0, DL, GatherVT),
11728 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11729 }
11730
11731 if (!VT.isFixedLengthVector())
11732 return Result;
11733 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11734 }
11735
11736 // Just promote the int type to i16 which will double the LMUL.
11737 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11738 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11739 }
11740
11741 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11742 SDValue VecLen =
11743 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11744 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11745 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11746 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11747 DAG.getUNDEF(IndicesVT), Mask, EVL);
11748 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11749 DAG.getUNDEF(GatherVT), Mask, EVL);
11750
11751 if (IsMaskVector) {
11752 // Truncate Result back to a mask vector
11753 Result = DAG.getNode(
11754 RISCVISD::SETCC_VL, DL, ContainerVT,
11755 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11756 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11757 }
11758
11759 if (!VT.isFixedLengthVector())
11760 return Result;
11761 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11762}
11763
11764SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11765 SelectionDAG &DAG) const {
11766 MVT VT = Op.getSimpleValueType();
11767 if (VT.getVectorElementType() != MVT::i1)
11768 return lowerVPOp(Op, DAG);
11769
11770 // It is safe to drop mask parameter as masked-off elements are undef.
11771 SDValue Op1 = Op->getOperand(0);
11772 SDValue Op2 = Op->getOperand(1);
11773 SDValue VL = Op->getOperand(3);
11774
11775 MVT ContainerVT = VT;
11776 const bool IsFixed = VT.isFixedLengthVector();
11777 if (IsFixed) {
11778 ContainerVT = getContainerForFixedLengthVector(VT);
11779 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11780 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11781 }
11782
11783 SDLoc DL(Op);
11784 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11785 if (!IsFixed)
11786 return Val;
11787 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11788}
11789
11790SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11791 SelectionDAG &DAG) const {
11792 SDLoc DL(Op);
11793 MVT XLenVT = Subtarget.getXLenVT();
11794 MVT VT = Op.getSimpleValueType();
11795 MVT ContainerVT = VT;
11796 if (VT.isFixedLengthVector())
11797 ContainerVT = getContainerForFixedLengthVector(VT);
11798
11799 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11800
11801 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11802 // Check if the mask is known to be all ones
11803 SDValue Mask = VPNode->getMask();
11804 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11805
11806 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11807 : Intrinsic::riscv_vlse_mask,
11808 DL, XLenVT);
11809 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11810 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11811 VPNode->getStride()};
11812 if (!IsUnmasked) {
11813 if (VT.isFixedLengthVector()) {
11814 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11815 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11816 }
11817 Ops.push_back(Mask);
11818 }
11819 Ops.push_back(VPNode->getVectorLength());
11820 if (!IsUnmasked) {
11821 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11822 Ops.push_back(Policy);
11823 }
11824
11825 SDValue Result =
11827 VPNode->getMemoryVT(), VPNode->getMemOperand());
11828 SDValue Chain = Result.getValue(1);
11829
11830 if (VT.isFixedLengthVector())
11831 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11832
11833 return DAG.getMergeValues({Result, Chain}, DL);
11834}
11835
11836SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11837 SelectionDAG &DAG) const {
11838 SDLoc DL(Op);
11839 MVT XLenVT = Subtarget.getXLenVT();
11840
11841 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11842 SDValue StoreVal = VPNode->getValue();
11843 MVT VT = StoreVal.getSimpleValueType();
11844 MVT ContainerVT = VT;
11845 if (VT.isFixedLengthVector()) {
11846 ContainerVT = getContainerForFixedLengthVector(VT);
11847 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11848 }
11849
11850 // Check if the mask is known to be all ones
11851 SDValue Mask = VPNode->getMask();
11852 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11853
11854 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11855 : Intrinsic::riscv_vsse_mask,
11856 DL, XLenVT);
11857 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11858 VPNode->getBasePtr(), VPNode->getStride()};
11859 if (!IsUnmasked) {
11860 if (VT.isFixedLengthVector()) {
11861 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11862 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11863 }
11864 Ops.push_back(Mask);
11865 }
11866 Ops.push_back(VPNode->getVectorLength());
11867
11868 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11869 Ops, VPNode->getMemoryVT(),
11870 VPNode->getMemOperand());
11871}
11872
11873// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11874// matched to a RVV indexed load. The RVV indexed load instructions only
11875// support the "unsigned unscaled" addressing mode; indices are implicitly
11876// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11877// signed or scaled indexing is extended to the XLEN value type and scaled
11878// accordingly.
11879SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11880 SelectionDAG &DAG) const {
11881 SDLoc DL(Op);
11882 MVT VT = Op.getSimpleValueType();
11883
11884 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11885 EVT MemVT = MemSD->getMemoryVT();
11886 MachineMemOperand *MMO = MemSD->getMemOperand();
11887 SDValue Chain = MemSD->getChain();
11888 SDValue BasePtr = MemSD->getBasePtr();
11889
11890 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11891 SDValue Index, Mask, PassThru, VL;
11892
11893 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11894 Index = VPGN->getIndex();
11895 Mask = VPGN->getMask();
11896 PassThru = DAG.getUNDEF(VT);
11897 VL = VPGN->getVectorLength();
11898 // VP doesn't support extending loads.
11900 } else {
11901 // Else it must be a MGATHER.
11902 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11903 Index = MGN->getIndex();
11904 Mask = MGN->getMask();
11905 PassThru = MGN->getPassThru();
11906 LoadExtType = MGN->getExtensionType();
11907 }
11908
11909 MVT IndexVT = Index.getSimpleValueType();
11910 MVT XLenVT = Subtarget.getXLenVT();
11911
11913 "Unexpected VTs!");
11914 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11915 // Targets have to explicitly opt-in for extending vector loads.
11916 assert(LoadExtType == ISD::NON_EXTLOAD &&
11917 "Unexpected extending MGATHER/VP_GATHER");
11918
11919 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11920 // the selection of the masked intrinsics doesn't do this for us.
11921 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11922
11923 MVT ContainerVT = VT;
11924 if (VT.isFixedLengthVector()) {
11925 ContainerVT = getContainerForFixedLengthVector(VT);
11926 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11927 ContainerVT.getVectorElementCount());
11928
11929 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11930
11931 if (!IsUnmasked) {
11932 MVT MaskVT = getMaskTypeFor(ContainerVT);
11933 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11934 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11935 }
11936 }
11937
11938 if (!VL)
11939 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11940
11941 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11942 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11943 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11944 }
11945
11946 unsigned IntID =
11947 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11948 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11949 if (IsUnmasked)
11950 Ops.push_back(DAG.getUNDEF(ContainerVT));
11951 else
11952 Ops.push_back(PassThru);
11953 Ops.push_back(BasePtr);
11954 Ops.push_back(Index);
11955 if (!IsUnmasked)
11956 Ops.push_back(Mask);
11957 Ops.push_back(VL);
11958 if (!IsUnmasked)
11960
11961 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11962 SDValue Result =
11963 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11964 Chain = Result.getValue(1);
11965
11966 if (VT.isFixedLengthVector())
11967 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11968
11969 return DAG.getMergeValues({Result, Chain}, DL);
11970}
11971
11972// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11973// matched to a RVV indexed store. The RVV indexed store instructions only
11974// support the "unsigned unscaled" addressing mode; indices are implicitly
11975// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11976// signed or scaled indexing is extended to the XLEN value type and scaled
11977// accordingly.
11978SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11979 SelectionDAG &DAG) const {
11980 SDLoc DL(Op);
11981 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11982 EVT MemVT = MemSD->getMemoryVT();
11983 MachineMemOperand *MMO = MemSD->getMemOperand();
11984 SDValue Chain = MemSD->getChain();
11985 SDValue BasePtr = MemSD->getBasePtr();
11986
11987 [[maybe_unused]] bool IsTruncatingStore = false;
11988 SDValue Index, Mask, Val, VL;
11989
11990 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11991 Index = VPSN->getIndex();
11992 Mask = VPSN->getMask();
11993 Val = VPSN->getValue();
11994 VL = VPSN->getVectorLength();
11995 // VP doesn't support truncating stores.
11996 IsTruncatingStore = false;
11997 } else {
11998 // Else it must be a MSCATTER.
11999 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12000 Index = MSN->getIndex();
12001 Mask = MSN->getMask();
12002 Val = MSN->getValue();
12003 IsTruncatingStore = MSN->isTruncatingStore();
12004 }
12005
12006 MVT VT = Val.getSimpleValueType();
12007 MVT IndexVT = Index.getSimpleValueType();
12008 MVT XLenVT = Subtarget.getXLenVT();
12009
12011 "Unexpected VTs!");
12012 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12013 // Targets have to explicitly opt-in for extending vector loads and
12014 // truncating vector stores.
12015 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12016
12017 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12018 // the selection of the masked intrinsics doesn't do this for us.
12019 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12020
12021 MVT ContainerVT = VT;
12022 if (VT.isFixedLengthVector()) {
12023 ContainerVT = getContainerForFixedLengthVector(VT);
12024 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12025 ContainerVT.getVectorElementCount());
12026
12027 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12028 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12029
12030 if (!IsUnmasked) {
12031 MVT MaskVT = getMaskTypeFor(ContainerVT);
12032 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12033 }
12034 }
12035
12036 if (!VL)
12037 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12038
12039 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12040 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12041 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12042 }
12043
12044 unsigned IntID =
12045 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12046 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12047 Ops.push_back(Val);
12048 Ops.push_back(BasePtr);
12049 Ops.push_back(Index);
12050 if (!IsUnmasked)
12051 Ops.push_back(Mask);
12052 Ops.push_back(VL);
12053
12055 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12056}
12057
12058SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12059 SelectionDAG &DAG) const {
12060 const MVT XLenVT = Subtarget.getXLenVT();
12061 SDLoc DL(Op);
12062 SDValue Chain = Op->getOperand(0);
12063 SDValue SysRegNo = DAG.getTargetConstant(
12064 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12065 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12066 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12067
12068 // Encoding used for rounding mode in RISC-V differs from that used in
12069 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12070 // table, which consists of a sequence of 4-bit fields, each representing
12071 // corresponding FLT_ROUNDS mode.
12072 static const int Table =
12078
12079 SDValue Shift =
12080 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12081 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12082 DAG.getConstant(Table, DL, XLenVT), Shift);
12083 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12084 DAG.getConstant(7, DL, XLenVT));
12085
12086 return DAG.getMergeValues({Masked, Chain}, DL);
12087}
12088
12089SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12090 SelectionDAG &DAG) const {
12091 const MVT XLenVT = Subtarget.getXLenVT();
12092 SDLoc DL(Op);
12093 SDValue Chain = Op->getOperand(0);
12094 SDValue RMValue = Op->getOperand(1);
12095 SDValue SysRegNo = DAG.getTargetConstant(
12096 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12097
12098 // Encoding used for rounding mode in RISC-V differs from that used in
12099 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12100 // a table, which consists of a sequence of 4-bit fields, each representing
12101 // corresponding RISC-V mode.
12102 static const unsigned Table =
12108
12109 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12110
12111 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12112 DAG.getConstant(2, DL, XLenVT));
12113 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12114 DAG.getConstant(Table, DL, XLenVT), Shift);
12115 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12116 DAG.getConstant(0x7, DL, XLenVT));
12117 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12118 RMValue);
12119}
12120
12121SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12122 SelectionDAG &DAG) const {
12124
12125 bool isRISCV64 = Subtarget.is64Bit();
12126 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12127
12128 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12129 return DAG.getFrameIndex(FI, PtrVT);
12130}
12131
12132// Returns the opcode of the target-specific SDNode that implements the 32-bit
12133// form of the given Opcode.
12134static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12135 switch (Opcode) {
12136 default:
12137 llvm_unreachable("Unexpected opcode");
12138 case ISD::SHL:
12139 return RISCVISD::SLLW;
12140 case ISD::SRA:
12141 return RISCVISD::SRAW;
12142 case ISD::SRL:
12143 return RISCVISD::SRLW;
12144 case ISD::SDIV:
12145 return RISCVISD::DIVW;
12146 case ISD::UDIV:
12147 return RISCVISD::DIVUW;
12148 case ISD::UREM:
12149 return RISCVISD::REMUW;
12150 case ISD::ROTL:
12151 return RISCVISD::ROLW;
12152 case ISD::ROTR:
12153 return RISCVISD::RORW;
12154 }
12155}
12156
12157// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12158// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12159// otherwise be promoted to i64, making it difficult to select the
12160// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12161// type i8/i16/i32 is lost.
12163 unsigned ExtOpc = ISD::ANY_EXTEND) {
12164 SDLoc DL(N);
12165 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12166 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12167 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12168 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12169 // ReplaceNodeResults requires we maintain the same type for the return value.
12170 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12171}
12172
12173// Converts the given 32-bit operation to a i64 operation with signed extension
12174// semantic to reduce the signed extension instructions.
12176 SDLoc DL(N);
12177 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12178 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12179 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12180 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12181 DAG.getValueType(MVT::i32));
12182 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12183}
12184
12187 SelectionDAG &DAG) const {
12188 SDLoc DL(N);
12189 switch (N->getOpcode()) {
12190 default:
12191 llvm_unreachable("Don't know how to custom type legalize this operation!");
12194 case ISD::FP_TO_SINT:
12195 case ISD::FP_TO_UINT: {
12196 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12197 "Unexpected custom legalisation");
12198 bool IsStrict = N->isStrictFPOpcode();
12199 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12200 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12201 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12202 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12204 if (!isTypeLegal(Op0.getValueType()))
12205 return;
12206 if (IsStrict) {
12207 SDValue Chain = N->getOperand(0);
12208 // In absense of Zfh, promote f16 to f32, then convert.
12209 if (Op0.getValueType() == MVT::f16 &&
12210 !Subtarget.hasStdExtZfhOrZhinx()) {
12211 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12212 {Chain, Op0});
12213 Chain = Op0.getValue(1);
12214 }
12215 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12217 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12218 SDValue Res = DAG.getNode(
12219 Opc, DL, VTs, Chain, Op0,
12220 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12221 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12222 Results.push_back(Res.getValue(1));
12223 return;
12224 }
12225 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12226 // convert.
12227 if ((Op0.getValueType() == MVT::f16 &&
12228 !Subtarget.hasStdExtZfhOrZhinx()) ||
12229 Op0.getValueType() == MVT::bf16)
12230 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12231
12232 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12233 SDValue Res =
12234 DAG.getNode(Opc, DL, MVT::i64, Op0,
12235 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12237 return;
12238 }
12239 // If the FP type needs to be softened, emit a library call using the 'si'
12240 // version. If we left it to default legalization we'd end up with 'di'. If
12241 // the FP type doesn't need to be softened just let generic type
12242 // legalization promote the result type.
12243 RTLIB::Libcall LC;
12244 if (IsSigned)
12245 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12246 else
12247 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12248 MakeLibCallOptions CallOptions;
12249 EVT OpVT = Op0.getValueType();
12250 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12251 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12252 SDValue Result;
12253 std::tie(Result, Chain) =
12254 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12255 Results.push_back(Result);
12256 if (IsStrict)
12257 Results.push_back(Chain);
12258 break;
12259 }
12260 case ISD::LROUND: {
12261 SDValue Op0 = N->getOperand(0);
12262 EVT Op0VT = Op0.getValueType();
12263 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12265 if (!isTypeLegal(Op0VT))
12266 return;
12267
12268 // In absense of Zfh, promote f16 to f32, then convert.
12269 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12270 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12271
12272 SDValue Res =
12273 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12274 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12275 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12276 return;
12277 }
12278 // If the FP type needs to be softened, emit a library call to lround. We'll
12279 // need to truncate the result. We assume any value that doesn't fit in i32
12280 // is allowed to return an unspecified value.
12281 RTLIB::Libcall LC =
12282 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12283 MakeLibCallOptions CallOptions;
12284 EVT OpVT = Op0.getValueType();
12285 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12286 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12287 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12288 Results.push_back(Result);
12289 break;
12290 }
12293 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12294 "has custom type legalization on riscv32");
12295
12296 SDValue LoCounter, HiCounter;
12297 MVT XLenVT = Subtarget.getXLenVT();
12298 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12299 LoCounter = DAG.getTargetConstant(
12300 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12301 HiCounter = DAG.getTargetConstant(
12302 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12303 } else {
12304 LoCounter = DAG.getTargetConstant(
12305 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12306 HiCounter = DAG.getTargetConstant(
12307 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12308 }
12309 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12311 N->getOperand(0), LoCounter, HiCounter);
12312
12313 Results.push_back(
12314 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12315 Results.push_back(RCW.getValue(2));
12316 break;
12317 }
12318 case ISD::LOAD: {
12319 if (!ISD::isNON_EXTLoad(N))
12320 return;
12321
12322 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12323 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12324 LoadSDNode *Ld = cast<LoadSDNode>(N);
12325
12326 SDLoc dl(N);
12327 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12328 Ld->getBasePtr(), Ld->getMemoryVT(),
12329 Ld->getMemOperand());
12330 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12331 Results.push_back(Res.getValue(1));
12332 return;
12333 }
12334 case ISD::MUL: {
12335 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12336 unsigned XLen = Subtarget.getXLen();
12337 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12338 if (Size > XLen) {
12339 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12340 SDValue LHS = N->getOperand(0);
12341 SDValue RHS = N->getOperand(1);
12342 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12343
12344 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12345 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12346 // We need exactly one side to be unsigned.
12347 if (LHSIsU == RHSIsU)
12348 return;
12349
12350 auto MakeMULPair = [&](SDValue S, SDValue U) {
12351 MVT XLenVT = Subtarget.getXLenVT();
12352 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12353 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12354 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12355 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12356 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12357 };
12358
12359 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12360 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12361
12362 // The other operand should be signed, but still prefer MULH when
12363 // possible.
12364 if (RHSIsU && LHSIsS && !RHSIsS)
12365 Results.push_back(MakeMULPair(LHS, RHS));
12366 else if (LHSIsU && RHSIsS && !LHSIsS)
12367 Results.push_back(MakeMULPair(RHS, LHS));
12368
12369 return;
12370 }
12371 [[fallthrough]];
12372 }
12373 case ISD::ADD:
12374 case ISD::SUB:
12375 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12376 "Unexpected custom legalisation");
12377 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12378 break;
12379 case ISD::SHL:
12380 case ISD::SRA:
12381 case ISD::SRL:
12382 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12383 "Unexpected custom legalisation");
12384 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12385 // If we can use a BSET instruction, allow default promotion to apply.
12386 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12387 isOneConstant(N->getOperand(0)))
12388 break;
12389 Results.push_back(customLegalizeToWOp(N, DAG));
12390 break;
12391 }
12392
12393 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12394 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12395 // shift amount.
12396 if (N->getOpcode() == ISD::SHL) {
12397 SDLoc DL(N);
12398 SDValue NewOp0 =
12399 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12400 SDValue NewOp1 =
12401 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12402 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12403 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12404 DAG.getValueType(MVT::i32));
12405 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12406 }
12407
12408 break;
12409 case ISD::ROTL:
12410 case ISD::ROTR:
12411 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12412 "Unexpected custom legalisation");
12413 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12414 Subtarget.hasVendorXTHeadBb()) &&
12415 "Unexpected custom legalization");
12416 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12417 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12418 return;
12419 Results.push_back(customLegalizeToWOp(N, DAG));
12420 break;
12421 case ISD::CTTZ:
12423 case ISD::CTLZ:
12424 case ISD::CTLZ_ZERO_UNDEF: {
12425 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12426 "Unexpected custom legalisation");
12427
12428 SDValue NewOp0 =
12429 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12430 bool IsCTZ =
12431 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12432 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12433 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12434 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12435 return;
12436 }
12437 case ISD::SDIV:
12438 case ISD::UDIV:
12439 case ISD::UREM: {
12440 MVT VT = N->getSimpleValueType(0);
12441 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12442 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12443 "Unexpected custom legalisation");
12444 // Don't promote division/remainder by constant since we should expand those
12445 // to multiply by magic constant.
12447 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12448 !isIntDivCheap(N->getValueType(0), Attr))
12449 return;
12450
12451 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12452 // the upper 32 bits. For other types we need to sign or zero extend
12453 // based on the opcode.
12454 unsigned ExtOpc = ISD::ANY_EXTEND;
12455 if (VT != MVT::i32)
12456 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12458
12459 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12460 break;
12461 }
12462 case ISD::SADDO: {
12463 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12464 "Unexpected custom legalisation");
12465
12466 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12467 // use the default legalization.
12468 if (!isa<ConstantSDNode>(N->getOperand(1)))
12469 return;
12470
12471 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12472 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12473 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12474 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12475 DAG.getValueType(MVT::i32));
12476
12477 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12478
12479 // For an addition, the result should be less than one of the operands (LHS)
12480 // if and only if the other operand (RHS) is negative, otherwise there will
12481 // be overflow.
12482 // For a subtraction, the result should be less than one of the operands
12483 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12484 // otherwise there will be overflow.
12485 EVT OType = N->getValueType(1);
12486 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12487 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12488
12489 SDValue Overflow =
12490 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12491 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12492 Results.push_back(Overflow);
12493 return;
12494 }
12495 case ISD::UADDO:
12496 case ISD::USUBO: {
12497 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12498 "Unexpected custom legalisation");
12499 bool IsAdd = N->getOpcode() == ISD::UADDO;
12500 // Create an ADDW or SUBW.
12501 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12502 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12503 SDValue Res =
12504 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12505 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12506 DAG.getValueType(MVT::i32));
12507
12508 SDValue Overflow;
12509 if (IsAdd && isOneConstant(RHS)) {
12510 // Special case uaddo X, 1 overflowed if the addition result is 0.
12511 // The general case (X + C) < C is not necessarily beneficial. Although we
12512 // reduce the live range of X, we may introduce the materialization of
12513 // constant C, especially when the setcc result is used by branch. We have
12514 // no compare with constant and branch instructions.
12515 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12516 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12517 } else if (IsAdd && isAllOnesConstant(RHS)) {
12518 // Special case uaddo X, -1 overflowed if X != 0.
12519 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12520 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12521 } else {
12522 // Sign extend the LHS and perform an unsigned compare with the ADDW
12523 // result. Since the inputs are sign extended from i32, this is equivalent
12524 // to comparing the lower 32 bits.
12525 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12526 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12527 IsAdd ? ISD::SETULT : ISD::SETUGT);
12528 }
12529
12530 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12531 Results.push_back(Overflow);
12532 return;
12533 }
12534 case ISD::UADDSAT:
12535 case ISD::USUBSAT: {
12536 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12537 "Unexpected custom legalisation");
12538 if (Subtarget.hasStdExtZbb()) {
12539 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12540 // sign extend allows overflow of the lower 32 bits to be detected on
12541 // the promoted size.
12542 SDValue LHS =
12543 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12544 SDValue RHS =
12545 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12546 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12547 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12548 return;
12549 }
12550
12551 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12552 // promotion for UADDO/USUBO.
12553 Results.push_back(expandAddSubSat(N, DAG));
12554 return;
12555 }
12556 case ISD::SADDSAT:
12557 case ISD::SSUBSAT: {
12558 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12559 "Unexpected custom legalisation");
12560 Results.push_back(expandAddSubSat(N, DAG));
12561 return;
12562 }
12563 case ISD::ABS: {
12564 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12565 "Unexpected custom legalisation");
12566
12567 if (Subtarget.hasStdExtZbb()) {
12568 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12569 // This allows us to remember that the result is sign extended. Expanding
12570 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12571 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12572 N->getOperand(0));
12573 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12574 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12575 return;
12576 }
12577
12578 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12579 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12580
12581 // Freeze the source so we can increase it's use count.
12582 Src = DAG.getFreeze(Src);
12583
12584 // Copy sign bit to all bits using the sraiw pattern.
12585 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12586 DAG.getValueType(MVT::i32));
12587 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12588 DAG.getConstant(31, DL, MVT::i64));
12589
12590 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12591 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12592
12593 // NOTE: The result is only required to be anyextended, but sext is
12594 // consistent with type legalization of sub.
12595 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12596 DAG.getValueType(MVT::i32));
12597 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12598 return;
12599 }
12600 case ISD::BITCAST: {
12601 EVT VT = N->getValueType(0);
12602 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12603 SDValue Op0 = N->getOperand(0);
12604 EVT Op0VT = Op0.getValueType();
12605 MVT XLenVT = Subtarget.getXLenVT();
12606 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12607 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12608 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12609 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12610 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12611 Subtarget.hasStdExtZfbfmin()) {
12612 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12613 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12614 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12615 Subtarget.hasStdExtFOrZfinx()) {
12616 SDValue FPConv =
12617 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12618 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12619 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12620 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12621 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12622 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12623 NewReg.getValue(0), NewReg.getValue(1));
12624 Results.push_back(RetReg);
12625 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12626 isTypeLegal(Op0VT)) {
12627 // Custom-legalize bitcasts from fixed-length vector types to illegal
12628 // scalar types in order to improve codegen. Bitcast the vector to a
12629 // one-element vector type whose element type is the same as the result
12630 // type, and extract the first element.
12631 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12632 if (isTypeLegal(BVT)) {
12633 SDValue BVec = DAG.getBitcast(BVT, Op0);
12634 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12635 DAG.getVectorIdxConstant(0, DL)));
12636 }
12637 }
12638 break;
12639 }
12640 case RISCVISD::BREV8:
12641 case RISCVISD::ORC_B: {
12642 MVT VT = N->getSimpleValueType(0);
12643 MVT XLenVT = Subtarget.getXLenVT();
12644 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12645 "Unexpected custom legalisation");
12646 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12647 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12648 "Unexpected extension");
12649 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12650 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12651 // ReplaceNodeResults requires we maintain the same type for the return
12652 // value.
12653 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12654 break;
12655 }
12657 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12658 // type is illegal (currently only vXi64 RV32).
12659 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12660 // transferred to the destination register. We issue two of these from the
12661 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12662 // first element.
12663 SDValue Vec = N->getOperand(0);
12664 SDValue Idx = N->getOperand(1);
12665
12666 // The vector type hasn't been legalized yet so we can't issue target
12667 // specific nodes if it needs legalization.
12668 // FIXME: We would manually legalize if it's important.
12669 if (!isTypeLegal(Vec.getValueType()))
12670 return;
12671
12672 MVT VecVT = Vec.getSimpleValueType();
12673
12674 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12675 VecVT.getVectorElementType() == MVT::i64 &&
12676 "Unexpected EXTRACT_VECTOR_ELT legalization");
12677
12678 // If this is a fixed vector, we need to convert it to a scalable vector.
12679 MVT ContainerVT = VecVT;
12680 if (VecVT.isFixedLengthVector()) {
12681 ContainerVT = getContainerForFixedLengthVector(VecVT);
12682 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12683 }
12684
12685 MVT XLenVT = Subtarget.getXLenVT();
12686
12687 // Use a VL of 1 to avoid processing more elements than we need.
12688 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12689
12690 // Unless the index is known to be 0, we must slide the vector down to get
12691 // the desired element into index 0.
12692 if (!isNullConstant(Idx)) {
12693 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12694 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12695 }
12696
12697 // Extract the lower XLEN bits of the correct vector element.
12698 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12699
12700 // To extract the upper XLEN bits of the vector element, shift the first
12701 // element right by 32 bits and re-extract the lower XLEN bits.
12702 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12703 DAG.getUNDEF(ContainerVT),
12704 DAG.getConstant(32, DL, XLenVT), VL);
12705 SDValue LShr32 =
12706 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12707 DAG.getUNDEF(ContainerVT), Mask, VL);
12708
12709 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12710
12711 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12712 break;
12713 }
12715 unsigned IntNo = N->getConstantOperandVal(0);
12716 switch (IntNo) {
12717 default:
12719 "Don't know how to custom type legalize this intrinsic!");
12720 case Intrinsic::experimental_get_vector_length: {
12721 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12722 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12723 return;
12724 }
12725 case Intrinsic::experimental_cttz_elts: {
12726 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12727 Results.push_back(
12728 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12729 return;
12730 }
12731 case Intrinsic::riscv_orc_b:
12732 case Intrinsic::riscv_brev8:
12733 case Intrinsic::riscv_sha256sig0:
12734 case Intrinsic::riscv_sha256sig1:
12735 case Intrinsic::riscv_sha256sum0:
12736 case Intrinsic::riscv_sha256sum1:
12737 case Intrinsic::riscv_sm3p0:
12738 case Intrinsic::riscv_sm3p1: {
12739 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12740 return;
12741 unsigned Opc;
12742 switch (IntNo) {
12743 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12744 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12745 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12746 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12747 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12748 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12749 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12750 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12751 }
12752
12753 SDValue NewOp =
12754 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12755 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12756 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12757 return;
12758 }
12759 case Intrinsic::riscv_sm4ks:
12760 case Intrinsic::riscv_sm4ed: {
12761 unsigned Opc =
12762 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12763 SDValue NewOp0 =
12764 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12765 SDValue NewOp1 =
12766 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12767 SDValue Res =
12768 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12769 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12770 return;
12771 }
12772 case Intrinsic::riscv_mopr: {
12773 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12774 return;
12775 SDValue NewOp =
12776 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12777 SDValue Res = DAG.getNode(
12778 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12779 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12780 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12781 return;
12782 }
12783 case Intrinsic::riscv_moprr: {
12784 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12785 return;
12786 SDValue NewOp0 =
12787 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12788 SDValue NewOp1 =
12789 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12790 SDValue Res = DAG.getNode(
12791 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12792 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12793 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12794 return;
12795 }
12796 case Intrinsic::riscv_clmul: {
12797 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12798 return;
12799
12800 SDValue NewOp0 =
12801 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12802 SDValue NewOp1 =
12803 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12804 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12805 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12806 return;
12807 }
12808 case Intrinsic::riscv_clmulh:
12809 case Intrinsic::riscv_clmulr: {
12810 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12811 return;
12812
12813 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12814 // to the full 128-bit clmul result of multiplying two xlen values.
12815 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12816 // upper 32 bits.
12817 //
12818 // The alternative is to mask the inputs to 32 bits and use clmul, but
12819 // that requires two shifts to mask each input without zext.w.
12820 // FIXME: If the inputs are known zero extended or could be freely
12821 // zero extended, the mask form would be better.
12822 SDValue NewOp0 =
12823 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12824 SDValue NewOp1 =
12825 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12826 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12827 DAG.getConstant(32, DL, MVT::i64));
12828 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12829 DAG.getConstant(32, DL, MVT::i64));
12830 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12832 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12833 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12834 DAG.getConstant(32, DL, MVT::i64));
12835 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12836 return;
12837 }
12838 case Intrinsic::riscv_vmv_x_s: {
12839 EVT VT = N->getValueType(0);
12840 MVT XLenVT = Subtarget.getXLenVT();
12841 if (VT.bitsLT(XLenVT)) {
12842 // Simple case just extract using vmv.x.s and truncate.
12843 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12844 Subtarget.getXLenVT(), N->getOperand(1));
12845 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12846 return;
12847 }
12848
12849 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12850 "Unexpected custom legalization");
12851
12852 // We need to do the move in two steps.
12853 SDValue Vec = N->getOperand(1);
12854 MVT VecVT = Vec.getSimpleValueType();
12855
12856 // First extract the lower XLEN bits of the element.
12857 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12858
12859 // To extract the upper XLEN bits of the vector element, shift the first
12860 // element right by 32 bits and re-extract the lower XLEN bits.
12861 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12862
12863 SDValue ThirtyTwoV =
12864 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12865 DAG.getConstant(32, DL, XLenVT), VL);
12866 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12867 DAG.getUNDEF(VecVT), Mask, VL);
12868 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12869
12870 Results.push_back(
12871 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12872 break;
12873 }
12874 }
12875 break;
12876 }
12877 case ISD::VECREDUCE_ADD:
12878 case ISD::VECREDUCE_AND:
12879 case ISD::VECREDUCE_OR:
12880 case ISD::VECREDUCE_XOR:
12885 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12886 Results.push_back(V);
12887 break;
12888 case ISD::VP_REDUCE_ADD:
12889 case ISD::VP_REDUCE_AND:
12890 case ISD::VP_REDUCE_OR:
12891 case ISD::VP_REDUCE_XOR:
12892 case ISD::VP_REDUCE_SMAX:
12893 case ISD::VP_REDUCE_UMAX:
12894 case ISD::VP_REDUCE_SMIN:
12895 case ISD::VP_REDUCE_UMIN:
12896 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12897 Results.push_back(V);
12898 break;
12899 case ISD::GET_ROUNDING: {
12900 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12901 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12902 Results.push_back(Res.getValue(0));
12903 Results.push_back(Res.getValue(1));
12904 break;
12905 }
12906 }
12907}
12908
12909/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12910/// which corresponds to it.
12911static unsigned getVecReduceOpcode(unsigned Opc) {
12912 switch (Opc) {
12913 default:
12914 llvm_unreachable("Unhandled binary to transfrom reduction");
12915 case ISD::ADD:
12916 return ISD::VECREDUCE_ADD;
12917 case ISD::UMAX:
12918 return ISD::VECREDUCE_UMAX;
12919 case ISD::SMAX:
12920 return ISD::VECREDUCE_SMAX;
12921 case ISD::UMIN:
12922 return ISD::VECREDUCE_UMIN;
12923 case ISD::SMIN:
12924 return ISD::VECREDUCE_SMIN;
12925 case ISD::AND:
12926 return ISD::VECREDUCE_AND;
12927 case ISD::OR:
12928 return ISD::VECREDUCE_OR;
12929 case ISD::XOR:
12930 return ISD::VECREDUCE_XOR;
12931 case ISD::FADD:
12932 // Note: This is the associative form of the generic reduction opcode.
12933 return ISD::VECREDUCE_FADD;
12934 }
12935}
12936
12937/// Perform two related transforms whose purpose is to incrementally recognize
12938/// an explode_vector followed by scalar reduction as a vector reduction node.
12939/// This exists to recover from a deficiency in SLP which can't handle
12940/// forests with multiple roots sharing common nodes. In some cases, one
12941/// of the trees will be vectorized, and the other will remain (unprofitably)
12942/// scalarized.
12943static SDValue
12945 const RISCVSubtarget &Subtarget) {
12946
12947 // This transforms need to run before all integer types have been legalized
12948 // to i64 (so that the vector element type matches the add type), and while
12949 // it's safe to introduce odd sized vector types.
12951 return SDValue();
12952
12953 // Without V, this transform isn't useful. We could form the (illegal)
12954 // operations and let them be scalarized again, but there's really no point.
12955 if (!Subtarget.hasVInstructions())
12956 return SDValue();
12957
12958 const SDLoc DL(N);
12959 const EVT VT = N->getValueType(0);
12960 const unsigned Opc = N->getOpcode();
12961
12962 // For FADD, we only handle the case with reassociation allowed. We
12963 // could handle strict reduction order, but at the moment, there's no
12964 // known reason to, and the complexity isn't worth it.
12965 // TODO: Handle fminnum and fmaxnum here
12966 if (!VT.isInteger() &&
12967 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12968 return SDValue();
12969
12970 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12971 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12972 "Inconsistent mappings");
12973 SDValue LHS = N->getOperand(0);
12974 SDValue RHS = N->getOperand(1);
12975
12976 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12977 return SDValue();
12978
12979 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12980 std::swap(LHS, RHS);
12981
12982 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12983 !isa<ConstantSDNode>(RHS.getOperand(1)))
12984 return SDValue();
12985
12986 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12987 SDValue SrcVec = RHS.getOperand(0);
12988 EVT SrcVecVT = SrcVec.getValueType();
12989 assert(SrcVecVT.getVectorElementType() == VT);
12990 if (SrcVecVT.isScalableVector())
12991 return SDValue();
12992
12993 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12994 return SDValue();
12995
12996 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12997 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12998 // root of our reduction tree. TODO: We could extend this to any two
12999 // adjacent aligned constant indices if desired.
13000 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13001 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13002 uint64_t LHSIdx =
13003 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13004 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13005 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13006 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13007 DAG.getVectorIdxConstant(0, DL));
13008 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13009 }
13010 }
13011
13012 // Match (binop (reduce (extract_subvector V, 0),
13013 // (extract_vector_elt V, sizeof(SubVec))))
13014 // into a reduction of one more element from the original vector V.
13015 if (LHS.getOpcode() != ReduceOpc)
13016 return SDValue();
13017
13018 SDValue ReduceVec = LHS.getOperand(0);
13019 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13020 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13021 isNullConstant(ReduceVec.getOperand(1)) &&
13022 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13023 // For illegal types (e.g. 3xi32), most will be combined again into a
13024 // wider (hopefully legal) type. If this is a terminal state, we are
13025 // relying on type legalization here to produce something reasonable
13026 // and this lowering quality could probably be improved. (TODO)
13027 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13028 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13029 DAG.getVectorIdxConstant(0, DL));
13030 auto Flags = ReduceVec->getFlags();
13031 Flags.intersectWith(N->getFlags());
13032 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
13033 }
13034
13035 return SDValue();
13036}
13037
13038
13039// Try to fold (<bop> x, (reduction.<bop> vec, start))
13041 const RISCVSubtarget &Subtarget) {
13042 auto BinOpToRVVReduce = [](unsigned Opc) {
13043 switch (Opc) {
13044 default:
13045 llvm_unreachable("Unhandled binary to transfrom reduction");
13046 case ISD::ADD:
13048 case ISD::UMAX:
13050 case ISD::SMAX:
13052 case ISD::UMIN:
13054 case ISD::SMIN:
13056 case ISD::AND:
13058 case ISD::OR:
13060 case ISD::XOR:
13062 case ISD::FADD:
13064 case ISD::FMAXNUM:
13066 case ISD::FMINNUM:
13068 }
13069 };
13070
13071 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13072 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13073 isNullConstant(V.getOperand(1)) &&
13074 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13075 };
13076
13077 unsigned Opc = N->getOpcode();
13078 unsigned ReduceIdx;
13079 if (IsReduction(N->getOperand(0), Opc))
13080 ReduceIdx = 0;
13081 else if (IsReduction(N->getOperand(1), Opc))
13082 ReduceIdx = 1;
13083 else
13084 return SDValue();
13085
13086 // Skip if FADD disallows reassociation but the combiner needs.
13087 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13088 return SDValue();
13089
13090 SDValue Extract = N->getOperand(ReduceIdx);
13091 SDValue Reduce = Extract.getOperand(0);
13092 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13093 return SDValue();
13094
13095 SDValue ScalarV = Reduce.getOperand(2);
13096 EVT ScalarVT = ScalarV.getValueType();
13097 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13098 ScalarV.getOperand(0)->isUndef() &&
13099 isNullConstant(ScalarV.getOperand(2)))
13100 ScalarV = ScalarV.getOperand(1);
13101
13102 // Make sure that ScalarV is a splat with VL=1.
13103 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13104 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13105 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13106 return SDValue();
13107
13108 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13109 return SDValue();
13110
13111 // Check the scalar of ScalarV is neutral element
13112 // TODO: Deal with value other than neutral element.
13113 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13114 0))
13115 return SDValue();
13116
13117 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13118 // FIXME: We might be able to improve this if operand 0 is undef.
13119 if (!isNonZeroAVL(Reduce.getOperand(5)))
13120 return SDValue();
13121
13122 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13123
13124 SDLoc DL(N);
13125 SDValue NewScalarV =
13126 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13127 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13128
13129 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13130 if (ScalarVT != ScalarV.getValueType())
13131 NewScalarV =
13132 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13133 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13134
13135 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13136 NewScalarV, Reduce.getOperand(3),
13137 Reduce.getOperand(4), Reduce.getOperand(5)};
13138 SDValue NewReduce =
13139 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13140 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13141 Extract.getOperand(1));
13142}
13143
13144// Optimize (add (shl x, c0), (shl y, c1)) ->
13145// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13147 const RISCVSubtarget &Subtarget) {
13148 // Perform this optimization only in the zba extension.
13149 if (!Subtarget.hasStdExtZba())
13150 return SDValue();
13151
13152 // Skip for vector types and larger types.
13153 EVT VT = N->getValueType(0);
13154 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13155 return SDValue();
13156
13157 // The two operand nodes must be SHL and have no other use.
13158 SDValue N0 = N->getOperand(0);
13159 SDValue N1 = N->getOperand(1);
13160 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13161 !N0->hasOneUse() || !N1->hasOneUse())
13162 return SDValue();
13163
13164 // Check c0 and c1.
13165 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13166 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13167 if (!N0C || !N1C)
13168 return SDValue();
13169 int64_t C0 = N0C->getSExtValue();
13170 int64_t C1 = N1C->getSExtValue();
13171 if (C0 <= 0 || C1 <= 0)
13172 return SDValue();
13173
13174 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13175 int64_t Bits = std::min(C0, C1);
13176 int64_t Diff = std::abs(C0 - C1);
13177 if (Diff != 1 && Diff != 2 && Diff != 3)
13178 return SDValue();
13179
13180 // Build nodes.
13181 SDLoc DL(N);
13182 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13183 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13184 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13185 DAG.getConstant(Diff, DL, VT), NS);
13186 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13187}
13188
13189// Combine a constant select operand into its use:
13190//
13191// (and (select cond, -1, c), x)
13192// -> (select cond, x, (and x, c)) [AllOnes=1]
13193// (or (select cond, 0, c), x)
13194// -> (select cond, x, (or x, c)) [AllOnes=0]
13195// (xor (select cond, 0, c), x)
13196// -> (select cond, x, (xor x, c)) [AllOnes=0]
13197// (add (select cond, 0, c), x)
13198// -> (select cond, x, (add x, c)) [AllOnes=0]
13199// (sub x, (select cond, 0, c))
13200// -> (select cond, x, (sub x, c)) [AllOnes=0]
13202 SelectionDAG &DAG, bool AllOnes,
13203 const RISCVSubtarget &Subtarget) {
13204 EVT VT = N->getValueType(0);
13205
13206 // Skip vectors.
13207 if (VT.isVector())
13208 return SDValue();
13209
13210 if (!Subtarget.hasConditionalMoveFusion()) {
13211 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13212 if ((!Subtarget.hasStdExtZicond() &&
13213 !Subtarget.hasVendorXVentanaCondOps()) ||
13214 N->getOpcode() != ISD::AND)
13215 return SDValue();
13216
13217 // Maybe harmful when condition code has multiple use.
13218 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13219 return SDValue();
13220
13221 // Maybe harmful when VT is wider than XLen.
13222 if (VT.getSizeInBits() > Subtarget.getXLen())
13223 return SDValue();
13224 }
13225
13226 if ((Slct.getOpcode() != ISD::SELECT &&
13227 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13228 !Slct.hasOneUse())
13229 return SDValue();
13230
13231 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13233 };
13234
13235 bool SwapSelectOps;
13236 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13237 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13238 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13239 SDValue NonConstantVal;
13240 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13241 SwapSelectOps = false;
13242 NonConstantVal = FalseVal;
13243 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13244 SwapSelectOps = true;
13245 NonConstantVal = TrueVal;
13246 } else
13247 return SDValue();
13248
13249 // Slct is now know to be the desired identity constant when CC is true.
13250 TrueVal = OtherOp;
13251 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13252 // Unless SwapSelectOps says the condition should be false.
13253 if (SwapSelectOps)
13254 std::swap(TrueVal, FalseVal);
13255
13256 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13257 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13258 {Slct.getOperand(0), Slct.getOperand(1),
13259 Slct.getOperand(2), TrueVal, FalseVal});
13260
13261 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13262 {Slct.getOperand(0), TrueVal, FalseVal});
13263}
13264
13265// Attempt combineSelectAndUse on each operand of a commutative operator N.
13267 bool AllOnes,
13268 const RISCVSubtarget &Subtarget) {
13269 SDValue N0 = N->getOperand(0);
13270 SDValue N1 = N->getOperand(1);
13271 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13272 return Result;
13273 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13274 return Result;
13275 return SDValue();
13276}
13277
13278// Transform (add (mul x, c0), c1) ->
13279// (add (mul (add x, c1/c0), c0), c1%c0).
13280// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13281// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13282// to an infinite loop in DAGCombine if transformed.
13283// Or transform (add (mul x, c0), c1) ->
13284// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13285// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13286// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13287// lead to an infinite loop in DAGCombine if transformed.
13288// Or transform (add (mul x, c0), c1) ->
13289// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13290// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13291// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13292// lead to an infinite loop in DAGCombine if transformed.
13293// Or transform (add (mul x, c0), c1) ->
13294// (mul (add x, c1/c0), c0).
13295// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13297 const RISCVSubtarget &Subtarget) {
13298 // Skip for vector types and larger types.
13299 EVT VT = N->getValueType(0);
13300 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13301 return SDValue();
13302 // The first operand node must be a MUL and has no other use.
13303 SDValue N0 = N->getOperand(0);
13304 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13305 return SDValue();
13306 // Check if c0 and c1 match above conditions.
13307 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13308 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13309 if (!N0C || !N1C)
13310 return SDValue();
13311 // If N0C has multiple uses it's possible one of the cases in
13312 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13313 // in an infinite loop.
13314 if (!N0C->hasOneUse())
13315 return SDValue();
13316 int64_t C0 = N0C->getSExtValue();
13317 int64_t C1 = N1C->getSExtValue();
13318 int64_t CA, CB;
13319 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13320 return SDValue();
13321 // Search for proper CA (non-zero) and CB that both are simm12.
13322 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13323 !isInt<12>(C0 * (C1 / C0))) {
13324 CA = C1 / C0;
13325 CB = C1 % C0;
13326 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13327 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13328 CA = C1 / C0 + 1;
13329 CB = C1 % C0 - C0;
13330 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13331 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13332 CA = C1 / C0 - 1;
13333 CB = C1 % C0 + C0;
13334 } else
13335 return SDValue();
13336 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13337 SDLoc DL(N);
13338 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13339 DAG.getConstant(CA, DL, VT));
13340 SDValue New1 =
13341 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
13342 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
13343}
13344
13345// add (zext, zext) -> zext (add (zext, zext))
13346// sub (zext, zext) -> sext (sub (zext, zext))
13347// mul (zext, zext) -> zext (mul (zext, zext))
13348// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13349// udiv (zext, zext) -> zext (udiv (zext, zext))
13350// srem (zext, zext) -> zext (srem (zext, zext))
13351// urem (zext, zext) -> zext (urem (zext, zext))
13352//
13353// where the sum of the extend widths match, and the the range of the bin op
13354// fits inside the width of the narrower bin op. (For profitability on rvv, we
13355// use a power of two for both inner and outer extend.)
13357
13358 EVT VT = N->getValueType(0);
13359 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13360 return SDValue();
13361
13362 SDValue N0 = N->getOperand(0);
13363 SDValue N1 = N->getOperand(1);
13365 return SDValue();
13366 if (!N0.hasOneUse() || !N1.hasOneUse())
13367 return SDValue();
13368
13369 SDValue Src0 = N0.getOperand(0);
13370 SDValue Src1 = N1.getOperand(0);
13371 EVT SrcVT = Src0.getValueType();
13372 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13373 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13374 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13375 return SDValue();
13376
13377 LLVMContext &C = *DAG.getContext();
13379 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13380
13381 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13382 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13383
13384 // Src0 and Src1 are zero extended, so they're always positive if signed.
13385 //
13386 // sub can produce a negative from two positive operands, so it needs sign
13387 // extended. Other nodes produce a positive from two positive operands, so
13388 // zero extend instead.
13389 unsigned OuterExtend =
13390 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13391
13392 return DAG.getNode(
13393 OuterExtend, SDLoc(N), VT,
13394 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13395}
13396
13397// Try to turn (add (xor bool, 1) -1) into (neg bool).
13399 SDValue N0 = N->getOperand(0);
13400 SDValue N1 = N->getOperand(1);
13401 EVT VT = N->getValueType(0);
13402 SDLoc DL(N);
13403
13404 // RHS should be -1.
13405 if (!isAllOnesConstant(N1))
13406 return SDValue();
13407
13408 // Look for (xor X, 1).
13409 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13410 return SDValue();
13411
13412 // First xor input should be 0 or 1.
13414 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13415 return SDValue();
13416
13417 // Emit a negate of the setcc.
13418 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13419 N0.getOperand(0));
13420}
13421
13424 const RISCVSubtarget &Subtarget) {
13425 SelectionDAG &DAG = DCI.DAG;
13426 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13427 return V;
13428 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13429 return V;
13430 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13431 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13432 return V;
13433 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13434 return V;
13435 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13436 return V;
13437 if (SDValue V = combineBinOpOfZExt(N, DAG))
13438 return V;
13439
13440 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13441 // (select lhs, rhs, cc, x, (add x, y))
13442 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13443}
13444
13445// Try to turn a sub boolean RHS and constant LHS into an addi.
13447 SDValue N0 = N->getOperand(0);
13448 SDValue N1 = N->getOperand(1);
13449 EVT VT = N->getValueType(0);
13450 SDLoc DL(N);
13451
13452 // Require a constant LHS.
13453 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13454 if (!N0C)
13455 return SDValue();
13456
13457 // All our optimizations involve subtracting 1 from the immediate and forming
13458 // an ADDI. Make sure the new immediate is valid for an ADDI.
13459 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13460 if (!ImmValMinus1.isSignedIntN(12))
13461 return SDValue();
13462
13463 SDValue NewLHS;
13464 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13465 // (sub constant, (setcc x, y, eq/neq)) ->
13466 // (add (setcc x, y, neq/eq), constant - 1)
13467 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13468 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13469 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13470 return SDValue();
13471 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13472 NewLHS =
13473 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13474 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13475 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13476 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13477 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13478 NewLHS = N1.getOperand(0);
13479 } else
13480 return SDValue();
13481
13482 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13483 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13484}
13485
13486// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13487// non-zero. Replace with orc.b.
13489 const RISCVSubtarget &Subtarget) {
13490 if (!Subtarget.hasStdExtZbb())
13491 return SDValue();
13492
13493 EVT VT = N->getValueType(0);
13494
13495 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13496 return SDValue();
13497
13498 SDValue N0 = N->getOperand(0);
13499 SDValue N1 = N->getOperand(1);
13500
13501 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13502 return SDValue();
13503
13504 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13505 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13506 return SDValue();
13507
13508 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13509 if (!DAG.MaskedValueIsZero(N1, Mask))
13510 return SDValue();
13511
13512 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13513}
13514
13516 const RISCVSubtarget &Subtarget) {
13517 if (SDValue V = combineSubOfBoolean(N, DAG))
13518 return V;
13519
13520 EVT VT = N->getValueType(0);
13521 SDValue N0 = N->getOperand(0);
13522 SDValue N1 = N->getOperand(1);
13523 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13524 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13525 isNullConstant(N1.getOperand(1))) {
13526 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13527 if (CCVal == ISD::SETLT) {
13528 SDLoc DL(N);
13529 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13530 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13531 DAG.getConstant(ShAmt, DL, VT));
13532 }
13533 }
13534
13535 if (SDValue V = combineBinOpOfZExt(N, DAG))
13536 return V;
13537 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13538 return V;
13539
13540 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13541 // (select lhs, rhs, cc, x, (sub x, y))
13542 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13543}
13544
13545// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13546// Legalizing setcc can introduce xors like this. Doing this transform reduces
13547// the number of xors and may allow the xor to fold into a branch condition.
13549 SDValue N0 = N->getOperand(0);
13550 SDValue N1 = N->getOperand(1);
13551 bool IsAnd = N->getOpcode() == ISD::AND;
13552
13553 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13554 return SDValue();
13555
13556 if (!N0.hasOneUse() || !N1.hasOneUse())
13557 return SDValue();
13558
13559 SDValue N01 = N0.getOperand(1);
13560 SDValue N11 = N1.getOperand(1);
13561
13562 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13563 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13564 // operation is And, allow one of the Xors to use -1.
13565 if (isOneConstant(N01)) {
13566 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13567 return SDValue();
13568 } else if (isOneConstant(N11)) {
13569 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13570 if (!(IsAnd && isAllOnesConstant(N01)))
13571 return SDValue();
13572 } else
13573 return SDValue();
13574
13575 EVT VT = N->getValueType(0);
13576
13577 SDValue N00 = N0.getOperand(0);
13578 SDValue N10 = N1.getOperand(0);
13579
13580 // The LHS of the xors needs to be 0/1.
13582 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13583 return SDValue();
13584
13585 // Invert the opcode and insert a new xor.
13586 SDLoc DL(N);
13587 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13588 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13589 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13590}
13591
13592// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13593// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13594// value to an unsigned value. This will be lowered to vmax and series of
13595// vnclipu instructions later. This can be extended to other truncated types
13596// other than i8 by replacing 256 and 255 with the equivalent constants for the
13597// type.
13599 EVT VT = N->getValueType(0);
13600 SDValue N0 = N->getOperand(0);
13601 EVT SrcVT = N0.getValueType();
13602
13603 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13604 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13605 return SDValue();
13606
13607 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13608 return SDValue();
13609
13610 SDValue Cond = N0.getOperand(0);
13611 SDValue True = N0.getOperand(1);
13612 SDValue False = N0.getOperand(2);
13613
13614 if (Cond.getOpcode() != ISD::SETCC)
13615 return SDValue();
13616
13617 // FIXME: Support the version of this pattern with the select operands
13618 // swapped.
13619 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13620 if (CCVal != ISD::SETULT)
13621 return SDValue();
13622
13623 SDValue CondLHS = Cond.getOperand(0);
13624 SDValue CondRHS = Cond.getOperand(1);
13625
13626 if (CondLHS != True)
13627 return SDValue();
13628
13629 unsigned ScalarBits = VT.getScalarSizeInBits();
13630
13631 // FIXME: Support other constants.
13632 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13633 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13634 return SDValue();
13635
13636 if (False.getOpcode() != ISD::SIGN_EXTEND)
13637 return SDValue();
13638
13639 False = False.getOperand(0);
13640
13641 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13642 return SDValue();
13643
13644 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13645 if (!FalseRHSC || !FalseRHSC->isZero())
13646 return SDValue();
13647
13648 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13649 if (CCVal2 != ISD::SETGT)
13650 return SDValue();
13651
13652 // Emit the signed to unsigned saturation pattern.
13653 SDLoc DL(N);
13654 SDValue Max =
13655 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13656 SDValue Min =
13657 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13658 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13659 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13660}
13661
13663 const RISCVSubtarget &Subtarget) {
13664 SDValue N0 = N->getOperand(0);
13665 EVT VT = N->getValueType(0);
13666
13667 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13668 // extending X. This is safe since we only need the LSB after the shift and
13669 // shift amounts larger than 31 would produce poison. If we wait until
13670 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13671 // to use a BEXT instruction.
13672 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13673 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13674 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13675 SDLoc DL(N0);
13676 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13677 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13678 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13679 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13680 }
13681
13682 return combineTruncSelectToSMaxUSat(N, DAG);
13683}
13684
13685// Combines two comparison operation and logic operation to one selection
13686// operation(min, max) and logic operation. Returns new constructed Node if
13687// conditions for optimization are satisfied.
13690 const RISCVSubtarget &Subtarget) {
13691 SelectionDAG &DAG = DCI.DAG;
13692
13693 SDValue N0 = N->getOperand(0);
13694 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13695 // extending X. This is safe since we only need the LSB after the shift and
13696 // shift amounts larger than 31 would produce poison. If we wait until
13697 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13698 // to use a BEXT instruction.
13699 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13700 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13701 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13702 N0.hasOneUse()) {
13703 SDLoc DL(N);
13704 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13705 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13706 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13707 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13708 DAG.getConstant(1, DL, MVT::i64));
13709 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13710 }
13711
13712 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13713 return V;
13714 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13715 return V;
13716
13717 if (DCI.isAfterLegalizeDAG())
13718 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13719 return V;
13720
13721 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13722 // (select lhs, rhs, cc, x, (and x, y))
13723 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13724}
13725
13726// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13727// FIXME: Generalize to other binary operators with same operand.
13729 SelectionDAG &DAG) {
13730 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13731
13732 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13734 !N0.hasOneUse() || !N1.hasOneUse())
13735 return SDValue();
13736
13737 // Should have the same condition.
13738 SDValue Cond = N0.getOperand(1);
13739 if (Cond != N1.getOperand(1))
13740 return SDValue();
13741
13742 SDValue TrueV = N0.getOperand(0);
13743 SDValue FalseV = N1.getOperand(0);
13744
13745 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13746 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13747 !isOneConstant(TrueV.getOperand(1)) ||
13748 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13749 return SDValue();
13750
13751 EVT VT = N->getValueType(0);
13752 SDLoc DL(N);
13753
13754 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13755 Cond);
13756 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13757 Cond);
13758 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13759 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13760}
13761
13763 const RISCVSubtarget &Subtarget) {
13764 SelectionDAG &DAG = DCI.DAG;
13765
13766 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13767 return V;
13768 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13769 return V;
13770
13771 if (DCI.isAfterLegalizeDAG())
13772 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13773 return V;
13774
13775 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13776 // We may be able to pull a common operation out of the true and false value.
13777 SDValue N0 = N->getOperand(0);
13778 SDValue N1 = N->getOperand(1);
13779 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13780 return V;
13781 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13782 return V;
13783
13784 // fold (or (select cond, 0, y), x) ->
13785 // (select cond, x, (or x, y))
13786 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13787}
13788
13790 const RISCVSubtarget &Subtarget) {
13791 SDValue N0 = N->getOperand(0);
13792 SDValue N1 = N->getOperand(1);
13793
13794 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13795 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13796 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13797 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13798 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13799 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13800 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13801 SDLoc DL(N);
13802 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13803 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13804 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13805 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13806 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13807 }
13808
13809 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13810 // NOTE: Assumes ROL being legal means ROLW is legal.
13811 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13812 if (N0.getOpcode() == RISCVISD::SLLW &&
13814 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13815 SDLoc DL(N);
13816 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13817 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13818 }
13819
13820 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13821 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13822 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13823 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13824 if (ConstN00 && CC == ISD::SETLT) {
13825 EVT VT = N0.getValueType();
13826 SDLoc DL(N0);
13827 const APInt &Imm = ConstN00->getAPIntValue();
13828 if ((Imm + 1).isSignedIntN(12))
13829 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13830 DAG.getConstant(Imm + 1, DL, VT), CC);
13831 }
13832 }
13833
13834 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13835 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13836 // would have been promoted to i32, but the setcc would have i64 result.
13837 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13838 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13839 SDValue N00 = N0.getOperand(0);
13840 SDLoc DL(N);
13841 SDValue LHS = N00.getOperand(0);
13842 SDValue RHS = N00.getOperand(1);
13843 SDValue CC = N00.getOperand(2);
13844 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13845 LHS.getValueType());
13846 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13847 LHS, RHS, NotCC);
13848 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13849 }
13850
13851 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13852 return V;
13853 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13854 return V;
13855
13856 // fold (xor (select cond, 0, y), x) ->
13857 // (select cond, x, (xor x, y))
13858 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13859}
13860
13861// Try to expand a scalar multiply to a faster sequence.
13864 const RISCVSubtarget &Subtarget) {
13865
13866 EVT VT = N->getValueType(0);
13867
13868 // LI + MUL is usually smaller than the alternative sequence.
13870 return SDValue();
13871
13872 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13873 return SDValue();
13874
13875 if (VT != Subtarget.getXLenVT())
13876 return SDValue();
13877
13878 const bool HasShlAdd =
13879 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
13880
13881 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13882 if (!CNode)
13883 return SDValue();
13884 uint64_t MulAmt = CNode->getZExtValue();
13885
13886 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13887 // We're adding additional uses of X here, and in principle, we should be freezing
13888 // X before doing so. However, adding freeze here causes real regressions, and no
13889 // other target properly freezes X in these cases either.
13890 SDValue X = N->getOperand(0);
13891
13892 if (HasShlAdd) {
13893 for (uint64_t Divisor : {3, 5, 9}) {
13894 if (MulAmt % Divisor != 0)
13895 continue;
13896 uint64_t MulAmt2 = MulAmt / Divisor;
13897 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13898 if (isPowerOf2_64(MulAmt2)) {
13899 SDLoc DL(N);
13900 SDValue X = N->getOperand(0);
13901 // Put the shift first if we can fold a zext into the
13902 // shift forming a slli.uw.
13903 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13904 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13905 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13906 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13907 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13908 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
13909 Shl);
13910 }
13911 // Otherwise, put rhe shl second so that it can fold with following
13912 // instructions (e.g. sext or add).
13913 SDValue Mul359 =
13914 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13915 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13916 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13917 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13918 }
13919
13920 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13921 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13922 SDLoc DL(N);
13923 SDValue Mul359 =
13924 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13925 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13926 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13927 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13928 Mul359);
13929 }
13930 }
13931
13932 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13933 // shXadd. First check if this a sum of two power of 2s because that's
13934 // easy. Then count how many zeros are up to the first bit.
13935 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13936 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13937 if (ScaleShift >= 1 && ScaleShift < 4) {
13938 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13939 SDLoc DL(N);
13940 SDValue Shift1 =
13941 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13942 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13943 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13944 }
13945 }
13946
13947 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13948 // This is the two instruction form, there are also three instruction
13949 // variants we could implement. e.g.
13950 // (2^(1,2,3) * 3,5,9 + 1) << C2
13951 // 2^(C1>3) * 3,5,9 +/- 1
13952 for (uint64_t Divisor : {3, 5, 9}) {
13953 uint64_t C = MulAmt - 1;
13954 if (C <= Divisor)
13955 continue;
13956 unsigned TZ = llvm::countr_zero(C);
13957 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13958 SDLoc DL(N);
13959 SDValue Mul359 =
13960 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13961 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13962 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13963 DAG.getConstant(TZ, DL, VT), X);
13964 }
13965 }
13966
13967 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13968 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13969 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13970 if (ScaleShift >= 1 && ScaleShift < 4) {
13971 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13972 SDLoc DL(N);
13973 SDValue Shift1 =
13974 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13975 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13976 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13977 DAG.getConstant(ScaleShift, DL, VT), X));
13978 }
13979 }
13980
13981 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13982 for (uint64_t Offset : {3, 5, 9}) {
13983 if (isPowerOf2_64(MulAmt + Offset)) {
13984 SDLoc DL(N);
13985 SDValue Shift1 =
13986 DAG.getNode(ISD::SHL, DL, VT, X,
13987 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13988 SDValue Mul359 =
13989 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13990 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
13991 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13992 }
13993 }
13994 }
13995
13996 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
13997 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
13998 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
13999 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14000 SDLoc DL(N);
14001 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14002 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14003 SDValue Shift2 =
14004 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14005 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14006 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14007 }
14008
14009 return SDValue();
14010}
14011
14012// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14013// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14014// Same for other equivalent types with other equivalent constants.
14016 EVT VT = N->getValueType(0);
14017 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14018
14019 // Do this for legal vectors unless they are i1 or i8 vectors.
14020 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14021 return SDValue();
14022
14023 if (N->getOperand(0).getOpcode() != ISD::AND ||
14024 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14025 return SDValue();
14026
14027 SDValue And = N->getOperand(0);
14028 SDValue Srl = And.getOperand(0);
14029
14030 APInt V1, V2, V3;
14031 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14032 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14034 return SDValue();
14035
14036 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14037 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14038 V3 != (HalfSize - 1))
14039 return SDValue();
14040
14041 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14042 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14043 VT.getVectorElementCount() * 2);
14044 SDLoc DL(N);
14045 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14046 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14047 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14048 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14049}
14050
14053 const RISCVSubtarget &Subtarget) {
14054 EVT VT = N->getValueType(0);
14055 if (!VT.isVector())
14056 return expandMul(N, DAG, DCI, Subtarget);
14057
14058 SDLoc DL(N);
14059 SDValue N0 = N->getOperand(0);
14060 SDValue N1 = N->getOperand(1);
14061 SDValue MulOper;
14062 unsigned AddSubOpc;
14063
14064 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14065 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14066 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14067 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14068 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14069 AddSubOpc = V->getOpcode();
14070 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14071 SDValue Opnd = V->getOperand(1);
14072 MulOper = V->getOperand(0);
14073 if (AddSubOpc == ISD::SUB)
14074 std::swap(Opnd, MulOper);
14075 if (isOneOrOneSplat(Opnd))
14076 return true;
14077 }
14078 return false;
14079 };
14080
14081 if (IsAddSubWith1(N0)) {
14082 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14083 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14084 }
14085
14086 if (IsAddSubWith1(N1)) {
14087 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14088 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14089 }
14090
14091 if (SDValue V = combineBinOpOfZExt(N, DAG))
14092 return V;
14093
14095 return V;
14096
14097 return SDValue();
14098}
14099
14100/// According to the property that indexed load/store instructions zero-extend
14101/// their indices, try to narrow the type of index operand.
14102static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14103 if (isIndexTypeSigned(IndexType))
14104 return false;
14105
14106 if (!N->hasOneUse())
14107 return false;
14108
14109 EVT VT = N.getValueType();
14110 SDLoc DL(N);
14111
14112 // In general, what we're doing here is seeing if we can sink a truncate to
14113 // a smaller element type into the expression tree building our index.
14114 // TODO: We can generalize this and handle a bunch more cases if useful.
14115
14116 // Narrow a buildvector to the narrowest element type. This requires less
14117 // work and less register pressure at high LMUL, and creates smaller constants
14118 // which may be cheaper to materialize.
14119 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14120 KnownBits Known = DAG.computeKnownBits(N);
14121 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14122 LLVMContext &C = *DAG.getContext();
14123 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14124 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14125 N = DAG.getNode(ISD::TRUNCATE, DL,
14126 VT.changeVectorElementType(ResultVT), N);
14127 return true;
14128 }
14129 }
14130
14131 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14132 if (N.getOpcode() != ISD::SHL)
14133 return false;
14134
14135 SDValue N0 = N.getOperand(0);
14136 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14138 return false;
14139 if (!N0->hasOneUse())
14140 return false;
14141
14142 APInt ShAmt;
14143 SDValue N1 = N.getOperand(1);
14144 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14145 return false;
14146
14147 SDValue Src = N0.getOperand(0);
14148 EVT SrcVT = Src.getValueType();
14149 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14150 unsigned ShAmtV = ShAmt.getZExtValue();
14151 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14152 NewElen = std::max(NewElen, 8U);
14153
14154 // Skip if NewElen is not narrower than the original extended type.
14155 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14156 return false;
14157
14158 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14159 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14160
14161 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14162 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14163 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14164 return true;
14165}
14166
14167// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14168// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14169// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14170// can become a sext.w instead of a shift pair.
14172 const RISCVSubtarget &Subtarget) {
14173 SDValue N0 = N->getOperand(0);
14174 SDValue N1 = N->getOperand(1);
14175 EVT VT = N->getValueType(0);
14176 EVT OpVT = N0.getValueType();
14177
14178 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14179 return SDValue();
14180
14181 // RHS needs to be a constant.
14182 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14183 if (!N1C)
14184 return SDValue();
14185
14186 // LHS needs to be (and X, 0xffffffff).
14187 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14188 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14189 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14190 return SDValue();
14191
14192 // Looking for an equality compare.
14193 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14194 if (!isIntEqualitySetCC(Cond))
14195 return SDValue();
14196
14197 // Don't do this if the sign bit is provably zero, it will be turned back into
14198 // an AND.
14199 APInt SignMask = APInt::getOneBitSet(64, 31);
14200 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14201 return SDValue();
14202
14203 const APInt &C1 = N1C->getAPIntValue();
14204
14205 SDLoc dl(N);
14206 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14207 // to be equal.
14208 if (C1.getActiveBits() > 32)
14209 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14210
14211 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14212 N0.getOperand(0), DAG.getValueType(MVT::i32));
14213 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14214 dl, OpVT), Cond);
14215}
14216
14217static SDValue
14219 const RISCVSubtarget &Subtarget) {
14220 SDValue Src = N->getOperand(0);
14221 EVT VT = N->getValueType(0);
14222
14223 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14224 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14225 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
14226 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14227 Src.getOperand(0));
14228
14229 return SDValue();
14230}
14231
14232namespace {
14233// Forward declaration of the structure holding the necessary information to
14234// apply a combine.
14235struct CombineResult;
14236
14237enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14238/// Helper class for folding sign/zero extensions.
14239/// In particular, this class is used for the following combines:
14240/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14241/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14242/// mul | mul_vl -> vwmul(u) | vwmul_su
14243/// shl | shl_vl -> vwsll
14244/// fadd -> vfwadd | vfwadd_w
14245/// fsub -> vfwsub | vfwsub_w
14246/// fmul -> vfwmul
14247/// An object of this class represents an operand of the operation we want to
14248/// combine.
14249/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14250/// NodeExtensionHelper for `a` and one for `b`.
14251///
14252/// This class abstracts away how the extension is materialized and
14253/// how its number of users affect the combines.
14254///
14255/// In particular:
14256/// - VWADD_W is conceptually == add(op0, sext(op1))
14257/// - VWADDU_W == add(op0, zext(op1))
14258/// - VWSUB_W == sub(op0, sext(op1))
14259/// - VWSUBU_W == sub(op0, zext(op1))
14260/// - VFWADD_W == fadd(op0, fpext(op1))
14261/// - VFWSUB_W == fsub(op0, fpext(op1))
14262/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14263/// zext|sext(smaller_value).
14264struct NodeExtensionHelper {
14265 /// Records if this operand is like being zero extended.
14266 bool SupportsZExt;
14267 /// Records if this operand is like being sign extended.
14268 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14269 /// instance, a splat constant (e.g., 3), would support being both sign and
14270 /// zero extended.
14271 bool SupportsSExt;
14272 /// Records if this operand is like being floating-Point extended.
14273 bool SupportsFPExt;
14274 /// This boolean captures whether we care if this operand would still be
14275 /// around after the folding happens.
14276 bool EnforceOneUse;
14277 /// Original value that this NodeExtensionHelper represents.
14278 SDValue OrigOperand;
14279
14280 /// Get the value feeding the extension or the value itself.
14281 /// E.g., for zext(a), this would return a.
14282 SDValue getSource() const {
14283 switch (OrigOperand.getOpcode()) {
14284 case ISD::ZERO_EXTEND:
14285 case ISD::SIGN_EXTEND:
14286 case RISCVISD::VSEXT_VL:
14287 case RISCVISD::VZEXT_VL:
14289 return OrigOperand.getOperand(0);
14290 default:
14291 return OrigOperand;
14292 }
14293 }
14294
14295 /// Check if this instance represents a splat.
14296 bool isSplat() const {
14297 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14298 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14299 }
14300
14301 /// Get the extended opcode.
14302 unsigned getExtOpc(ExtKind SupportsExt) const {
14303 switch (SupportsExt) {
14304 case ExtKind::SExt:
14305 return RISCVISD::VSEXT_VL;
14306 case ExtKind::ZExt:
14307 return RISCVISD::VZEXT_VL;
14308 case ExtKind::FPExt:
14310 }
14311 llvm_unreachable("Unknown ExtKind enum");
14312 }
14313
14314 /// Get or create a value that can feed \p Root with the given extension \p
14315 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14316 /// operand. \see ::getSource().
14317 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14318 const RISCVSubtarget &Subtarget,
14319 std::optional<ExtKind> SupportsExt) const {
14320 if (!SupportsExt.has_value())
14321 return OrigOperand;
14322
14323 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14324
14325 SDValue Source = getSource();
14326 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
14327 if (Source.getValueType() == NarrowVT)
14328 return Source;
14329
14330 unsigned ExtOpc = getExtOpc(*SupportsExt);
14331
14332 // If we need an extension, we should be changing the type.
14333 SDLoc DL(OrigOperand);
14334 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
14335 switch (OrigOperand.getOpcode()) {
14336 case ISD::ZERO_EXTEND:
14337 case ISD::SIGN_EXTEND:
14338 case RISCVISD::VSEXT_VL:
14339 case RISCVISD::VZEXT_VL:
14341 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
14342 case ISD::SPLAT_VECTOR:
14343 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14345 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14346 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14348 Source = Source.getOperand(1);
14349 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
14350 Source = Source.getOperand(0);
14351 assert(Source.getValueType() == NarrowVT.getVectorElementType());
14352 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
14353 DAG.getUNDEF(NarrowVT), Source, VL);
14354 default:
14355 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
14356 // and that operand should already have the right NarrowVT so no
14357 // extension should be required at this point.
14358 llvm_unreachable("Unsupported opcode");
14359 }
14360 }
14361
14362 /// Helper function to get the narrow type for \p Root.
14363 /// The narrow type is the type of \p Root where we divided the size of each
14364 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14365 /// \pre Both the narrow type and the original type should be legal.
14366 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14367 MVT VT = Root->getSimpleValueType(0);
14368
14369 // Determine the narrow size.
14370 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14371
14372 MVT EltVT = SupportsExt == ExtKind::FPExt
14373 ? MVT::getFloatingPointVT(NarrowSize)
14374 : MVT::getIntegerVT(NarrowSize);
14375
14376 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14377 "Trying to extend something we can't represent");
14378 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14379 return NarrowVT;
14380 }
14381
14382 /// Get the opcode to materialize:
14383 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14384 static unsigned getSExtOpcode(unsigned Opcode) {
14385 switch (Opcode) {
14386 case ISD::ADD:
14387 case RISCVISD::ADD_VL:
14390 case ISD::OR:
14391 return RISCVISD::VWADD_VL;
14392 case ISD::SUB:
14393 case RISCVISD::SUB_VL:
14396 return RISCVISD::VWSUB_VL;
14397 case ISD::MUL:
14398 case RISCVISD::MUL_VL:
14399 return RISCVISD::VWMUL_VL;
14400 default:
14401 llvm_unreachable("Unexpected opcode");
14402 }
14403 }
14404
14405 /// Get the opcode to materialize:
14406 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14407 static unsigned getZExtOpcode(unsigned Opcode) {
14408 switch (Opcode) {
14409 case ISD::ADD:
14410 case RISCVISD::ADD_VL:
14413 case ISD::OR:
14414 return RISCVISD::VWADDU_VL;
14415 case ISD::SUB:
14416 case RISCVISD::SUB_VL:
14419 return RISCVISD::VWSUBU_VL;
14420 case ISD::MUL:
14421 case RISCVISD::MUL_VL:
14422 return RISCVISD::VWMULU_VL;
14423 case ISD::SHL:
14424 case RISCVISD::SHL_VL:
14425 return RISCVISD::VWSLL_VL;
14426 default:
14427 llvm_unreachable("Unexpected opcode");
14428 }
14429 }
14430
14431 /// Get the opcode to materialize:
14432 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14433 static unsigned getFPExtOpcode(unsigned Opcode) {
14434 switch (Opcode) {
14435 case RISCVISD::FADD_VL:
14437 return RISCVISD::VFWADD_VL;
14438 case RISCVISD::FSUB_VL:
14440 return RISCVISD::VFWSUB_VL;
14441 case RISCVISD::FMUL_VL:
14442 return RISCVISD::VFWMUL_VL;
14443 default:
14444 llvm_unreachable("Unexpected opcode");
14445 }
14446 }
14447
14448 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14449 /// newOpcode(a, b).
14450 static unsigned getSUOpcode(unsigned Opcode) {
14451 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14452 "SU is only supported for MUL");
14453 return RISCVISD::VWMULSU_VL;
14454 }
14455
14456 /// Get the opcode to materialize
14457 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14458 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14459 switch (Opcode) {
14460 case ISD::ADD:
14461 case RISCVISD::ADD_VL:
14462 case ISD::OR:
14463 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14465 case ISD::SUB:
14466 case RISCVISD::SUB_VL:
14467 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14469 case RISCVISD::FADD_VL:
14470 return RISCVISD::VFWADD_W_VL;
14471 case RISCVISD::FSUB_VL:
14472 return RISCVISD::VFWSUB_W_VL;
14473 default:
14474 llvm_unreachable("Unexpected opcode");
14475 }
14476 }
14477
14478 using CombineToTry = std::function<std::optional<CombineResult>(
14479 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14480 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14481 const RISCVSubtarget &)>;
14482
14483 /// Check if this node needs to be fully folded or extended for all users.
14484 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14485
14486 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14487 const RISCVSubtarget &Subtarget) {
14488 unsigned Opc = OrigOperand.getOpcode();
14489 MVT VT = OrigOperand.getSimpleValueType();
14490
14491 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14492 "Unexpected Opcode");
14493
14494 // The pasthru must be undef for tail agnostic.
14495 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14496 return;
14497
14498 // Get the scalar value.
14499 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14500 : OrigOperand.getOperand(1);
14501
14502 // See if we have enough sign bits or zero bits in the scalar to use a
14503 // widening opcode by splatting to smaller element size.
14504 unsigned EltBits = VT.getScalarSizeInBits();
14505 unsigned ScalarBits = Op.getValueSizeInBits();
14506 // If we're not getting all bits from the element, we need special handling.
14507 if (ScalarBits < EltBits) {
14508 // This should only occur on RV32.
14509 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
14510 !Subtarget.is64Bit() && "Unexpected splat");
14511 // vmv.v.x sign extends narrow inputs.
14512 SupportsSExt = true;
14513
14514 // If the input is positive, then sign extend is also zero extend.
14515 if (DAG.SignBitIsZero(Op))
14516 SupportsZExt = true;
14517
14518 EnforceOneUse = false;
14519 return;
14520 }
14521
14522 unsigned NarrowSize = EltBits / 2;
14523 // If the narrow type cannot be expressed with a legal VMV,
14524 // this is not a valid candidate.
14525 if (NarrowSize < 8)
14526 return;
14527
14528 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14529 SupportsSExt = true;
14530
14531 if (DAG.MaskedValueIsZero(Op,
14532 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14533 SupportsZExt = true;
14534
14535 EnforceOneUse = false;
14536 }
14537
14538 /// Helper method to set the various fields of this struct based on the
14539 /// type of \p Root.
14540 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14541 const RISCVSubtarget &Subtarget) {
14542 SupportsZExt = false;
14543 SupportsSExt = false;
14544 SupportsFPExt = false;
14545 EnforceOneUse = true;
14546 unsigned Opc = OrigOperand.getOpcode();
14547 // For the nodes we handle below, we end up using their inputs directly: see
14548 // getSource(). However since they either don't have a passthru or we check
14549 // that their passthru is undef, we can safely ignore their mask and VL.
14550 switch (Opc) {
14551 case ISD::ZERO_EXTEND:
14552 case ISD::SIGN_EXTEND: {
14553 MVT VT = OrigOperand.getSimpleValueType();
14554 if (!VT.isVector())
14555 break;
14556
14557 SDValue NarrowElt = OrigOperand.getOperand(0);
14558 MVT NarrowVT = NarrowElt.getSimpleValueType();
14559 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14560 if (NarrowVT.getVectorElementType() == MVT::i1)
14561 break;
14562
14563 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14564 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14565 break;
14566 }
14567 case RISCVISD::VZEXT_VL:
14568 SupportsZExt = true;
14569 break;
14570 case RISCVISD::VSEXT_VL:
14571 SupportsSExt = true;
14572 break;
14574 SupportsFPExt = true;
14575 break;
14576 case ISD::SPLAT_VECTOR:
14578 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14579 break;
14580 case RISCVISD::VFMV_V_F_VL: {
14581 MVT VT = OrigOperand.getSimpleValueType();
14582
14583 if (!OrigOperand.getOperand(0).isUndef())
14584 break;
14585
14586 SDValue Op = OrigOperand.getOperand(1);
14587 if (Op.getOpcode() != ISD::FP_EXTEND)
14588 break;
14589
14590 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14591 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
14592 if (NarrowSize != ScalarBits)
14593 break;
14594
14595 SupportsFPExt = true;
14596 break;
14597 }
14598 default:
14599 break;
14600 }
14601 }
14602
14603 /// Check if \p Root supports any extension folding combines.
14604 static bool isSupportedRoot(const SDNode *Root,
14605 const RISCVSubtarget &Subtarget) {
14606 switch (Root->getOpcode()) {
14607 case ISD::ADD:
14608 case ISD::SUB:
14609 case ISD::MUL: {
14610 return Root->getValueType(0).isScalableVector();
14611 }
14612 case ISD::OR: {
14613 return Root->getValueType(0).isScalableVector() &&
14614 Root->getFlags().hasDisjoint();
14615 }
14616 // Vector Widening Integer Add/Sub/Mul Instructions
14617 case RISCVISD::ADD_VL:
14618 case RISCVISD::MUL_VL:
14621 case RISCVISD::SUB_VL:
14624 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14625 case RISCVISD::FADD_VL:
14626 case RISCVISD::FSUB_VL:
14627 case RISCVISD::FMUL_VL:
14630 return true;
14631 case ISD::SHL:
14632 return Root->getValueType(0).isScalableVector() &&
14633 Subtarget.hasStdExtZvbb();
14634 case RISCVISD::SHL_VL:
14635 return Subtarget.hasStdExtZvbb();
14636 default:
14637 return false;
14638 }
14639 }
14640
14641 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14642 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14643 const RISCVSubtarget &Subtarget) {
14644 assert(isSupportedRoot(Root, Subtarget) &&
14645 "Trying to build an helper with an "
14646 "unsupported root");
14647 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14649 OrigOperand = Root->getOperand(OperandIdx);
14650
14651 unsigned Opc = Root->getOpcode();
14652 switch (Opc) {
14653 // We consider
14654 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14655 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14656 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14663 if (OperandIdx == 1) {
14664 SupportsZExt =
14666 SupportsSExt =
14668 SupportsFPExt =
14670 // There's no existing extension here, so we don't have to worry about
14671 // making sure it gets removed.
14672 EnforceOneUse = false;
14673 break;
14674 }
14675 [[fallthrough]];
14676 default:
14677 fillUpExtensionSupport(Root, DAG, Subtarget);
14678 break;
14679 }
14680 }
14681
14682 /// Helper function to get the Mask and VL from \p Root.
14683 static std::pair<SDValue, SDValue>
14684 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14685 const RISCVSubtarget &Subtarget) {
14686 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14687 switch (Root->getOpcode()) {
14688 case ISD::ADD:
14689 case ISD::SUB:
14690 case ISD::MUL:
14691 case ISD::OR:
14692 case ISD::SHL: {
14693 SDLoc DL(Root);
14694 MVT VT = Root->getSimpleValueType(0);
14695 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14696 }
14697 default:
14698 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14699 }
14700 }
14701
14702 /// Helper function to check if \p N is commutative with respect to the
14703 /// foldings that are supported by this class.
14704 static bool isCommutative(const SDNode *N) {
14705 switch (N->getOpcode()) {
14706 case ISD::ADD:
14707 case ISD::MUL:
14708 case ISD::OR:
14709 case RISCVISD::ADD_VL:
14710 case RISCVISD::MUL_VL:
14713 case RISCVISD::FADD_VL:
14714 case RISCVISD::FMUL_VL:
14716 return true;
14717 case ISD::SUB:
14718 case RISCVISD::SUB_VL:
14721 case RISCVISD::FSUB_VL:
14723 case ISD::SHL:
14724 case RISCVISD::SHL_VL:
14725 return false;
14726 default:
14727 llvm_unreachable("Unexpected opcode");
14728 }
14729 }
14730
14731 /// Get a list of combine to try for folding extensions in \p Root.
14732 /// Note that each returned CombineToTry function doesn't actually modify
14733 /// anything. Instead they produce an optional CombineResult that if not None,
14734 /// need to be materialized for the combine to be applied.
14735 /// \see CombineResult::materialize.
14736 /// If the related CombineToTry function returns std::nullopt, that means the
14737 /// combine didn't match.
14738 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14739};
14740
14741/// Helper structure that holds all the necessary information to materialize a
14742/// combine that does some extension folding.
14743struct CombineResult {
14744 /// Opcode to be generated when materializing the combine.
14745 unsigned TargetOpcode;
14746 // No value means no extension is needed.
14747 std::optional<ExtKind> LHSExt;
14748 std::optional<ExtKind> RHSExt;
14749 /// Root of the combine.
14750 SDNode *Root;
14751 /// LHS of the TargetOpcode.
14752 NodeExtensionHelper LHS;
14753 /// RHS of the TargetOpcode.
14754 NodeExtensionHelper RHS;
14755
14756 CombineResult(unsigned TargetOpcode, SDNode *Root,
14757 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14758 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14759 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14760 LHS(LHS), RHS(RHS) {}
14761
14762 /// Return a value that uses TargetOpcode and that can be used to replace
14763 /// Root.
14764 /// The actual replacement is *not* done in that method.
14765 SDValue materialize(SelectionDAG &DAG,
14766 const RISCVSubtarget &Subtarget) const {
14767 SDValue Mask, VL, Merge;
14768 std::tie(Mask, VL) =
14769 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14770 switch (Root->getOpcode()) {
14771 default:
14772 Merge = Root->getOperand(2);
14773 break;
14774 case ISD::ADD:
14775 case ISD::SUB:
14776 case ISD::MUL:
14777 case ISD::OR:
14778 case ISD::SHL:
14779 Merge = DAG.getUNDEF(Root->getValueType(0));
14780 break;
14781 }
14782 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14783 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14784 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14785 Merge, Mask, VL);
14786 }
14787};
14788
14789/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14790/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14791/// are zext) and LHS and RHS can be folded into Root.
14792/// AllowExtMask define which form `ext` can take in this pattern.
14793///
14794/// \note If the pattern can match with both zext and sext, the returned
14795/// CombineResult will feature the zext result.
14796///
14797/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14798/// can be used to apply the pattern.
14799static std::optional<CombineResult>
14800canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14801 const NodeExtensionHelper &RHS,
14802 uint8_t AllowExtMask, SelectionDAG &DAG,
14803 const RISCVSubtarget &Subtarget) {
14804 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14805 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14806 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14807 /*RHSExt=*/{ExtKind::ZExt});
14808 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14809 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14810 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14811 /*RHSExt=*/{ExtKind::SExt});
14812 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14813 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14814 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14815 /*RHSExt=*/{ExtKind::FPExt});
14816 return std::nullopt;
14817}
14818
14819/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14820/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14821/// are zext) and LHS and RHS can be folded into Root.
14822///
14823/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14824/// can be used to apply the pattern.
14825static std::optional<CombineResult>
14826canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14827 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14828 const RISCVSubtarget &Subtarget) {
14829 return canFoldToVWWithSameExtensionImpl(
14830 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14831 Subtarget);
14832}
14833
14834/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14835///
14836/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14837/// can be used to apply the pattern.
14838static std::optional<CombineResult>
14839canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14840 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14841 const RISCVSubtarget &Subtarget) {
14842 if (RHS.SupportsFPExt)
14843 return CombineResult(
14844 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14845 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14846
14847 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14848 // sext/zext?
14849 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14850 // purposes.
14851 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14852 return CombineResult(
14853 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14854 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14855 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14856 return CombineResult(
14857 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14858 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14859 return std::nullopt;
14860}
14861
14862/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14863///
14864/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14865/// can be used to apply the pattern.
14866static std::optional<CombineResult>
14867canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14868 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14869 const RISCVSubtarget &Subtarget) {
14870 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14871 Subtarget);
14872}
14873
14874/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14875///
14876/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14877/// can be used to apply the pattern.
14878static std::optional<CombineResult>
14879canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14880 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14881 const RISCVSubtarget &Subtarget) {
14882 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14883 Subtarget);
14884}
14885
14886/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14887///
14888/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14889/// can be used to apply the pattern.
14890static std::optional<CombineResult>
14891canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14892 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14893 const RISCVSubtarget &Subtarget) {
14894 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14895 Subtarget);
14896}
14897
14898/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14899///
14900/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14901/// can be used to apply the pattern.
14902static std::optional<CombineResult>
14903canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14904 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14905 const RISCVSubtarget &Subtarget) {
14906
14907 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14908 return std::nullopt;
14909 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14910 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14911 /*RHSExt=*/{ExtKind::ZExt});
14912}
14913
14915NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14916 SmallVector<CombineToTry> Strategies;
14917 switch (Root->getOpcode()) {
14918 case ISD::ADD:
14919 case ISD::SUB:
14920 case ISD::OR:
14921 case RISCVISD::ADD_VL:
14922 case RISCVISD::SUB_VL:
14923 case RISCVISD::FADD_VL:
14924 case RISCVISD::FSUB_VL:
14925 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14926 Strategies.push_back(canFoldToVWWithSameExtension);
14927 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14928 Strategies.push_back(canFoldToVW_W);
14929 break;
14930 case RISCVISD::FMUL_VL:
14931 Strategies.push_back(canFoldToVWWithSameExtension);
14932 break;
14933 case ISD::MUL:
14934 case RISCVISD::MUL_VL:
14935 // mul -> vwmul(u)
14936 Strategies.push_back(canFoldToVWWithSameExtension);
14937 // mul -> vwmulsu
14938 Strategies.push_back(canFoldToVW_SU);
14939 break;
14940 case ISD::SHL:
14941 case RISCVISD::SHL_VL:
14942 // shl -> vwsll
14943 Strategies.push_back(canFoldToVWWithZEXT);
14944 break;
14947 // vwadd_w|vwsub_w -> vwadd|vwsub
14948 Strategies.push_back(canFoldToVWWithSEXT);
14949 break;
14952 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14953 Strategies.push_back(canFoldToVWWithZEXT);
14954 break;
14957 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14958 Strategies.push_back(canFoldToVWWithFPEXT);
14959 break;
14960 default:
14961 llvm_unreachable("Unexpected opcode");
14962 }
14963 return Strategies;
14964}
14965} // End anonymous namespace.
14966
14967/// Combine a binary operation to its equivalent VW or VW_W form.
14968/// The supported combines are:
14969/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14970/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14971/// mul | mul_vl -> vwmul(u) | vwmul_su
14972/// shl | shl_vl -> vwsll
14973/// fadd_vl -> vfwadd | vfwadd_w
14974/// fsub_vl -> vfwsub | vfwsub_w
14975/// fmul_vl -> vfwmul
14976/// vwadd_w(u) -> vwadd(u)
14977/// vwsub_w(u) -> vwsub(u)
14978/// vfwadd_w -> vfwadd
14979/// vfwsub_w -> vfwsub
14982 const RISCVSubtarget &Subtarget) {
14983 SelectionDAG &DAG = DCI.DAG;
14984 if (DCI.isBeforeLegalize())
14985 return SDValue();
14986
14987 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14988 return SDValue();
14989
14990 SmallVector<SDNode *> Worklist;
14991 SmallSet<SDNode *, 8> Inserted;
14992 Worklist.push_back(N);
14993 Inserted.insert(N);
14994 SmallVector<CombineResult> CombinesToApply;
14995
14996 while (!Worklist.empty()) {
14997 SDNode *Root = Worklist.pop_back_val();
14998 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14999 return SDValue();
15000
15001 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15002 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15003 auto AppendUsersIfNeeded = [&Worklist,
15004 &Inserted](const NodeExtensionHelper &Op) {
15005 if (Op.needToPromoteOtherUsers()) {
15006 for (SDNode *TheUse : Op.OrigOperand->uses()) {
15007 if (Inserted.insert(TheUse).second)
15008 Worklist.push_back(TheUse);
15009 }
15010 }
15011 };
15012
15013 // Control the compile time by limiting the number of node we look at in
15014 // total.
15015 if (Inserted.size() > ExtensionMaxWebSize)
15016 return SDValue();
15017
15019 NodeExtensionHelper::getSupportedFoldings(Root);
15020
15021 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15022 bool Matched = false;
15023 for (int Attempt = 0;
15024 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15025 ++Attempt) {
15026
15027 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15028 FoldingStrategies) {
15029 std::optional<CombineResult> Res =
15030 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15031 if (Res) {
15032 Matched = true;
15033 CombinesToApply.push_back(*Res);
15034 // All the inputs that are extended need to be folded, otherwise
15035 // we would be leaving the old input (since it is may still be used),
15036 // and the new one.
15037 if (Res->LHSExt.has_value())
15038 AppendUsersIfNeeded(LHS);
15039 if (Res->RHSExt.has_value())
15040 AppendUsersIfNeeded(RHS);
15041 break;
15042 }
15043 }
15044 std::swap(LHS, RHS);
15045 }
15046 // Right now we do an all or nothing approach.
15047 if (!Matched)
15048 return SDValue();
15049 }
15050 // Store the value for the replacement of the input node separately.
15051 SDValue InputRootReplacement;
15052 // We do the RAUW after we materialize all the combines, because some replaced
15053 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15054 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15055 // yet-to-be-visited CombinesToApply roots.
15057 ValuesToReplace.reserve(CombinesToApply.size());
15058 for (CombineResult Res : CombinesToApply) {
15059 SDValue NewValue = Res.materialize(DAG, Subtarget);
15060 if (!InputRootReplacement) {
15061 assert(Res.Root == N &&
15062 "First element is expected to be the current node");
15063 InputRootReplacement = NewValue;
15064 } else {
15065 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15066 }
15067 }
15068 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15069 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15070 DCI.AddToWorklist(OldNewValues.second.getNode());
15071 }
15072 return InputRootReplacement;
15073}
15074
15075// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15076// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15077// y will be the Passthru and cond will be the Mask.
15079 unsigned Opc = N->getOpcode();
15082
15083 SDValue Y = N->getOperand(0);
15084 SDValue MergeOp = N->getOperand(1);
15085 unsigned MergeOpc = MergeOp.getOpcode();
15086
15087 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15088 return SDValue();
15089
15090 SDValue X = MergeOp->getOperand(1);
15091
15092 if (!MergeOp.hasOneUse())
15093 return SDValue();
15094
15095 // Passthru should be undef
15096 SDValue Passthru = N->getOperand(2);
15097 if (!Passthru.isUndef())
15098 return SDValue();
15099
15100 // Mask should be all ones
15101 SDValue Mask = N->getOperand(3);
15102 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15103 return SDValue();
15104
15105 // False value of MergeOp should be all zeros
15106 SDValue Z = MergeOp->getOperand(2);
15107
15108 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15109 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15110 Z = Z.getOperand(1);
15111
15112 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15113 return SDValue();
15114
15115 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15116 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15117 N->getFlags());
15118}
15119
15122 const RISCVSubtarget &Subtarget) {
15123 [[maybe_unused]] unsigned Opc = N->getOpcode();
15126
15127 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15128 return V;
15129
15130 return combineVWADDSUBWSelect(N, DCI.DAG);
15131}
15132
15133// Helper function for performMemPairCombine.
15134// Try to combine the memory loads/stores LSNode1 and LSNode2
15135// into a single memory pair operation.
15137 LSBaseSDNode *LSNode2, SDValue BasePtr,
15138 uint64_t Imm) {
15140 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15141
15142 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15143 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15144 return SDValue();
15145
15147 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15148
15149 // The new operation has twice the width.
15150 MVT XLenVT = Subtarget.getXLenVT();
15151 EVT MemVT = LSNode1->getMemoryVT();
15152 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15153 MachineMemOperand *MMO = LSNode1->getMemOperand();
15155 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15156
15157 if (LSNode1->getOpcode() == ISD::LOAD) {
15158 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15159 unsigned Opcode;
15160 if (MemVT == MVT::i32)
15161 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15162 else
15163 Opcode = RISCVISD::TH_LDD;
15164
15165 SDValue Res = DAG.getMemIntrinsicNode(
15166 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15167 {LSNode1->getChain(), BasePtr,
15168 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15169 NewMemVT, NewMMO);
15170
15171 SDValue Node1 =
15172 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15173 SDValue Node2 =
15174 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15175
15176 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15177 return Node1;
15178 } else {
15179 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15180
15181 SDValue Res = DAG.getMemIntrinsicNode(
15182 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15183 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15184 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15185 NewMemVT, NewMMO);
15186
15187 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15188 return Res;
15189 }
15190}
15191
15192// Try to combine two adjacent loads/stores to a single pair instruction from
15193// the XTHeadMemPair vendor extension.
15196 SelectionDAG &DAG = DCI.DAG;
15198 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15199
15200 // Target does not support load/store pair.
15201 if (!Subtarget.hasVendorXTHeadMemPair())
15202 return SDValue();
15203
15204 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15205 EVT MemVT = LSNode1->getMemoryVT();
15206 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15207
15208 // No volatile, indexed or atomic loads/stores.
15209 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15210 return SDValue();
15211
15212 // Function to get a base + constant representation from a memory value.
15213 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15214 if (Ptr->getOpcode() == ISD::ADD)
15215 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15216 return {Ptr->getOperand(0), C1->getZExtValue()};
15217 return {Ptr, 0};
15218 };
15219
15220 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15221
15222 SDValue Chain = N->getOperand(0);
15223 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
15224 UI != UE; ++UI) {
15225 SDUse &Use = UI.getUse();
15226 if (Use.getUser() != N && Use.getResNo() == 0 &&
15227 Use.getUser()->getOpcode() == N->getOpcode()) {
15228 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
15229
15230 // No volatile, indexed or atomic loads/stores.
15231 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15232 continue;
15233
15234 // Check if LSNode1 and LSNode2 have the same type and extension.
15235 if (LSNode1->getOpcode() == ISD::LOAD)
15236 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15237 cast<LoadSDNode>(LSNode1)->getExtensionType())
15238 continue;
15239
15240 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15241 continue;
15242
15243 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15244
15245 // Check if the base pointer is the same for both instruction.
15246 if (Base1 != Base2)
15247 continue;
15248
15249 // Check if the offsets match the XTHeadMemPair encoding contraints.
15250 bool Valid = false;
15251 if (MemVT == MVT::i32) {
15252 // Check for adjacent i32 values and a 2-bit index.
15253 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15254 Valid = true;
15255 } else if (MemVT == MVT::i64) {
15256 // Check for adjacent i64 values and a 2-bit index.
15257 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15258 Valid = true;
15259 }
15260
15261 if (!Valid)
15262 continue;
15263
15264 // Try to combine.
15265 if (SDValue Res =
15266 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15267 return Res;
15268 }
15269 }
15270
15271 return SDValue();
15272}
15273
15274// Fold
15275// (fp_to_int (froundeven X)) -> fcvt X, rne
15276// (fp_to_int (ftrunc X)) -> fcvt X, rtz
15277// (fp_to_int (ffloor X)) -> fcvt X, rdn
15278// (fp_to_int (fceil X)) -> fcvt X, rup
15279// (fp_to_int (fround X)) -> fcvt X, rmm
15280// (fp_to_int (frint X)) -> fcvt X
15283 const RISCVSubtarget &Subtarget) {
15284 SelectionDAG &DAG = DCI.DAG;
15285 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15286 MVT XLenVT = Subtarget.getXLenVT();
15287
15288 SDValue Src = N->getOperand(0);
15289
15290 // Don't do this for strict-fp Src.
15291 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15292 return SDValue();
15293
15294 // Ensure the FP type is legal.
15295 if (!TLI.isTypeLegal(Src.getValueType()))
15296 return SDValue();
15297
15298 // Don't do this for f16 with Zfhmin and not Zfh.
15299 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15300 return SDValue();
15301
15302 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15303 // If the result is invalid, we didn't find a foldable instruction.
15304 if (FRM == RISCVFPRndMode::Invalid)
15305 return SDValue();
15306
15307 SDLoc DL(N);
15308 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
15309 EVT VT = N->getValueType(0);
15310
15311 if (VT.isVector() && TLI.isTypeLegal(VT)) {
15312 MVT SrcVT = Src.getSimpleValueType();
15313 MVT SrcContainerVT = SrcVT;
15314 MVT ContainerVT = VT.getSimpleVT();
15315 SDValue XVal = Src.getOperand(0);
15316
15317 // For widening and narrowing conversions we just combine it into a
15318 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
15319 // end up getting lowered to their appropriate pseudo instructions based on
15320 // their operand types
15321 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
15322 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
15323 return SDValue();
15324
15325 // Make fixed-length vectors scalable first
15326 if (SrcVT.isFixedLengthVector()) {
15327 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
15328 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
15329 ContainerVT =
15330 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
15331 }
15332
15333 auto [Mask, VL] =
15334 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
15335
15336 SDValue FpToInt;
15337 if (FRM == RISCVFPRndMode::RTZ) {
15338 // Use the dedicated trunc static rounding mode if we're truncating so we
15339 // don't need to generate calls to fsrmi/fsrm
15340 unsigned Opc =
15342 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15343 } else if (FRM == RISCVFPRndMode::DYN) {
15344 unsigned Opc =
15346 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15347 } else {
15348 unsigned Opc =
15350 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
15351 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
15352 }
15353
15354 // If converted from fixed-length to scalable, convert back
15355 if (VT.isFixedLengthVector())
15356 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
15357
15358 return FpToInt;
15359 }
15360
15361 // Only handle XLen or i32 types. Other types narrower than XLen will
15362 // eventually be legalized to XLenVT.
15363 if (VT != MVT::i32 && VT != XLenVT)
15364 return SDValue();
15365
15366 unsigned Opc;
15367 if (VT == XLenVT)
15368 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15369 else
15371
15372 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15373 DAG.getTargetConstant(FRM, DL, XLenVT));
15374 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15375}
15376
15377// Fold
15378// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15379// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15380// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15381// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15382// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15383// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15386 const RISCVSubtarget &Subtarget) {
15387 SelectionDAG &DAG = DCI.DAG;
15388 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15389 MVT XLenVT = Subtarget.getXLenVT();
15390
15391 // Only handle XLen types. Other types narrower than XLen will eventually be
15392 // legalized to XLenVT.
15393 EVT DstVT = N->getValueType(0);
15394 if (DstVT != XLenVT)
15395 return SDValue();
15396
15397 SDValue Src = N->getOperand(0);
15398
15399 // Don't do this for strict-fp Src.
15400 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15401 return SDValue();
15402
15403 // Ensure the FP type is also legal.
15404 if (!TLI.isTypeLegal(Src.getValueType()))
15405 return SDValue();
15406
15407 // Don't do this for f16 with Zfhmin and not Zfh.
15408 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15409 return SDValue();
15410
15411 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15412
15413 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15414 if (FRM == RISCVFPRndMode::Invalid)
15415 return SDValue();
15416
15417 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15418
15419 unsigned Opc;
15420 if (SatVT == DstVT)
15421 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15422 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15424 else
15425 return SDValue();
15426 // FIXME: Support other SatVTs by clamping before or after the conversion.
15427
15428 Src = Src.getOperand(0);
15429
15430 SDLoc DL(N);
15431 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15432 DAG.getTargetConstant(FRM, DL, XLenVT));
15433
15434 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15435 // extend.
15436 if (Opc == RISCVISD::FCVT_WU_RV64)
15437 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15438
15439 // RISC-V FP-to-int conversions saturate to the destination register size, but
15440 // don't produce 0 for nan.
15441 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15442 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15443}
15444
15445// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15446// smaller than XLenVT.
15448 const RISCVSubtarget &Subtarget) {
15449 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15450
15451 SDValue Src = N->getOperand(0);
15452 if (Src.getOpcode() != ISD::BSWAP)
15453 return SDValue();
15454
15455 EVT VT = N->getValueType(0);
15456 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15457 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15458 return SDValue();
15459
15460 SDLoc DL(N);
15461 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15462}
15463
15464// Convert from one FMA opcode to another based on whether we are negating the
15465// multiply result and/or the accumulator.
15466// NOTE: Only supports RVV operations with VL.
15467static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15468 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15469 if (NegMul) {
15470 // clang-format off
15471 switch (Opcode) {
15472 default: llvm_unreachable("Unexpected opcode");
15473 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15474 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15475 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15476 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15481 }
15482 // clang-format on
15483 }
15484
15485 // Negating the accumulator changes ADD<->SUB.
15486 if (NegAcc) {
15487 // clang-format off
15488 switch (Opcode) {
15489 default: llvm_unreachable("Unexpected opcode");
15490 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15491 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15492 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15493 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15498 }
15499 // clang-format on
15500 }
15501
15502 return Opcode;
15503}
15504
15506 // Fold FNEG_VL into FMA opcodes.
15507 // The first operand of strict-fp is chain.
15508 unsigned Offset = N->isTargetStrictFPOpcode();
15509 SDValue A = N->getOperand(0 + Offset);
15510 SDValue B = N->getOperand(1 + Offset);
15511 SDValue C = N->getOperand(2 + Offset);
15512 SDValue Mask = N->getOperand(3 + Offset);
15513 SDValue VL = N->getOperand(4 + Offset);
15514
15515 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15516 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15517 V.getOperand(2) == VL) {
15518 // Return the negated input.
15519 V = V.getOperand(0);
15520 return true;
15521 }
15522
15523 return false;
15524 };
15525
15526 bool NegA = invertIfNegative(A);
15527 bool NegB = invertIfNegative(B);
15528 bool NegC = invertIfNegative(C);
15529
15530 // If no operands are negated, we're done.
15531 if (!NegA && !NegB && !NegC)
15532 return SDValue();
15533
15534 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15535 if (N->isTargetStrictFPOpcode())
15536 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15537 {N->getOperand(0), A, B, C, Mask, VL});
15538 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15539 VL);
15540}
15541
15543 const RISCVSubtarget &Subtarget) {
15545 return V;
15546
15547 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
15548 !Subtarget.hasVInstructionsF16())
15549 return SDValue();
15550
15551 // FIXME: Ignore strict opcodes for now.
15552 if (N->isTargetStrictFPOpcode())
15553 return SDValue();
15554
15555 // Try to form widening FMA.
15556 SDValue Op0 = N->getOperand(0);
15557 SDValue Op1 = N->getOperand(1);
15558 SDValue Mask = N->getOperand(3);
15559 SDValue VL = N->getOperand(4);
15560
15561 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
15563 return SDValue();
15564
15565 // TODO: Refactor to handle more complex cases similar to
15566 // combineBinOp_VLToVWBinOp_VL.
15567 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
15568 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
15569 return SDValue();
15570
15571 // Check the mask and VL are the same.
15572 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15573 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15574 return SDValue();
15575
15576 unsigned NewOpc;
15577 switch (N->getOpcode()) {
15578 default:
15579 llvm_unreachable("Unexpected opcode");
15581 NewOpc = RISCVISD::VFWMADD_VL;
15582 break;
15584 NewOpc = RISCVISD::VFWNMSUB_VL;
15585 break;
15587 NewOpc = RISCVISD::VFWNMADD_VL;
15588 break;
15590 NewOpc = RISCVISD::VFWMSUB_VL;
15591 break;
15592 }
15593
15594 Op0 = Op0.getOperand(0);
15595 Op1 = Op1.getOperand(0);
15596
15597 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15598 N->getOperand(2), Mask, VL);
15599}
15600
15602 const RISCVSubtarget &Subtarget) {
15603 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15604
15605 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15606 return SDValue();
15607
15608 if (!isa<ConstantSDNode>(N->getOperand(1)))
15609 return SDValue();
15610 uint64_t ShAmt = N->getConstantOperandVal(1);
15611 if (ShAmt > 32)
15612 return SDValue();
15613
15614 SDValue N0 = N->getOperand(0);
15615
15616 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15617 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15618 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15619 if (ShAmt < 32 &&
15620 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15621 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15622 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15623 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15624 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15625 if (LShAmt < 32) {
15626 SDLoc ShlDL(N0.getOperand(0));
15627 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15628 N0.getOperand(0).getOperand(0),
15629 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15630 SDLoc DL(N);
15631 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15632 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15633 }
15634 }
15635
15636 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15637 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15638 //
15639 // Also try these folds where an add or sub is in the middle.
15640 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15641 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15642 SDValue Shl;
15643 ConstantSDNode *AddC = nullptr;
15644
15645 // We might have an ADD or SUB between the SRA and SHL.
15646 bool IsAdd = N0.getOpcode() == ISD::ADD;
15647 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15648 // Other operand needs to be a constant we can modify.
15649 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15650 if (!AddC)
15651 return SDValue();
15652
15653 // AddC needs to have at least 32 trailing zeros.
15654 if (AddC->getAPIntValue().countr_zero() < 32)
15655 return SDValue();
15656
15657 // All users should be a shift by constant less than or equal to 32. This
15658 // ensures we'll do this optimization for each of them to produce an
15659 // add/sub+sext_inreg they can all share.
15660 for (SDNode *U : N0->uses()) {
15661 if (U->getOpcode() != ISD::SRA ||
15662 !isa<ConstantSDNode>(U->getOperand(1)) ||
15663 U->getConstantOperandVal(1) > 32)
15664 return SDValue();
15665 }
15666
15667 Shl = N0.getOperand(IsAdd ? 0 : 1);
15668 } else {
15669 // Not an ADD or SUB.
15670 Shl = N0;
15671 }
15672
15673 // Look for a shift left by 32.
15674 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15675 Shl.getConstantOperandVal(1) != 32)
15676 return SDValue();
15677
15678 // We if we didn't look through an add/sub, then the shl should have one use.
15679 // If we did look through an add/sub, the sext_inreg we create is free so
15680 // we're only creating 2 new instructions. It's enough to only remove the
15681 // original sra+add/sub.
15682 if (!AddC && !Shl.hasOneUse())
15683 return SDValue();
15684
15685 SDLoc DL(N);
15686 SDValue In = Shl.getOperand(0);
15687
15688 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15689 // constant.
15690 if (AddC) {
15691 SDValue ShiftedAddC =
15692 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15693 if (IsAdd)
15694 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15695 else
15696 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15697 }
15698
15699 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15700 DAG.getValueType(MVT::i32));
15701 if (ShAmt == 32)
15702 return SExt;
15703
15704 return DAG.getNode(
15705 ISD::SHL, DL, MVT::i64, SExt,
15706 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15707}
15708
15709// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15710// the result is used as the conditon of a br_cc or select_cc we can invert,
15711// inverting the setcc is free, and Z is 0/1. Caller will invert the
15712// br_cc/select_cc.
15714 bool IsAnd = Cond.getOpcode() == ISD::AND;
15715 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15716 return SDValue();
15717
15718 if (!Cond.hasOneUse())
15719 return SDValue();
15720
15721 SDValue Setcc = Cond.getOperand(0);
15722 SDValue Xor = Cond.getOperand(1);
15723 // Canonicalize setcc to LHS.
15724 if (Setcc.getOpcode() != ISD::SETCC)
15725 std::swap(Setcc, Xor);
15726 // LHS should be a setcc and RHS should be an xor.
15727 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15728 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15729 return SDValue();
15730
15731 // If the condition is an And, SimplifyDemandedBits may have changed
15732 // (xor Z, 1) to (not Z).
15733 SDValue Xor1 = Xor.getOperand(1);
15734 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15735 return SDValue();
15736
15737 EVT VT = Cond.getValueType();
15738 SDValue Xor0 = Xor.getOperand(0);
15739
15740 // The LHS of the xor needs to be 0/1.
15742 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15743 return SDValue();
15744
15745 // We can only invert integer setccs.
15746 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15747 if (!SetCCOpVT.isScalarInteger())
15748 return SDValue();
15749
15750 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15751 if (ISD::isIntEqualitySetCC(CCVal)) {
15752 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15753 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15754 Setcc.getOperand(1), CCVal);
15755 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15756 // Invert (setlt 0, X) by converting to (setlt X, 1).
15757 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15758 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15759 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15760 // (setlt X, 1) by converting to (setlt 0, X).
15761 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15762 DAG.getConstant(0, SDLoc(Setcc), VT),
15763 Setcc.getOperand(0), CCVal);
15764 } else
15765 return SDValue();
15766
15767 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15768 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15769}
15770
15771// Perform common combines for BR_CC and SELECT_CC condtions.
15772static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15773 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15774 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15775
15776 // As far as arithmetic right shift always saves the sign,
15777 // shift can be omitted.
15778 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15779 // setge (sra X, N), 0 -> setge X, 0
15780 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15781 LHS.getOpcode() == ISD::SRA) {
15782 LHS = LHS.getOperand(0);
15783 return true;
15784 }
15785
15786 if (!ISD::isIntEqualitySetCC(CCVal))
15787 return false;
15788
15789 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15790 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15791 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15792 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15793 // If we're looking for eq 0 instead of ne 0, we need to invert the
15794 // condition.
15795 bool Invert = CCVal == ISD::SETEQ;
15796 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15797 if (Invert)
15798 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15799
15800 RHS = LHS.getOperand(1);
15801 LHS = LHS.getOperand(0);
15802 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15803
15804 CC = DAG.getCondCode(CCVal);
15805 return true;
15806 }
15807
15808 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15809 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15810 RHS = LHS.getOperand(1);
15811 LHS = LHS.getOperand(0);
15812 return true;
15813 }
15814
15815 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15816 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15817 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15818 SDValue LHS0 = LHS.getOperand(0);
15819 if (LHS0.getOpcode() == ISD::AND &&
15820 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15821 uint64_t Mask = LHS0.getConstantOperandVal(1);
15822 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15823 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15824 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15825 CC = DAG.getCondCode(CCVal);
15826
15827 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15828 LHS = LHS0.getOperand(0);
15829 if (ShAmt != 0)
15830 LHS =
15831 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15832 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15833 return true;
15834 }
15835 }
15836 }
15837
15838 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15839 // This can occur when legalizing some floating point comparisons.
15840 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15841 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15842 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15843 CC = DAG.getCondCode(CCVal);
15844 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15845 return true;
15846 }
15847
15848 if (isNullConstant(RHS)) {
15849 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15850 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15851 CC = DAG.getCondCode(CCVal);
15852 LHS = NewCond;
15853 return true;
15854 }
15855 }
15856
15857 return false;
15858}
15859
15860// Fold
15861// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15862// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15863// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15864// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15866 SDValue TrueVal, SDValue FalseVal,
15867 bool Swapped) {
15868 bool Commutative = true;
15869 unsigned Opc = TrueVal.getOpcode();
15870 switch (Opc) {
15871 default:
15872 return SDValue();
15873 case ISD::SHL:
15874 case ISD::SRA:
15875 case ISD::SRL:
15876 case ISD::SUB:
15877 Commutative = false;
15878 break;
15879 case ISD::ADD:
15880 case ISD::OR:
15881 case ISD::XOR:
15882 break;
15883 }
15884
15885 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15886 return SDValue();
15887
15888 unsigned OpToFold;
15889 if (FalseVal == TrueVal.getOperand(0))
15890 OpToFold = 0;
15891 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15892 OpToFold = 1;
15893 else
15894 return SDValue();
15895
15896 EVT VT = N->getValueType(0);
15897 SDLoc DL(N);
15898 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15899 EVT OtherOpVT = OtherOp.getValueType();
15900 SDValue IdentityOperand =
15901 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15902 if (!Commutative)
15903 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15904 assert(IdentityOperand && "No identity operand!");
15905
15906 if (Swapped)
15907 std::swap(OtherOp, IdentityOperand);
15908 SDValue NewSel =
15909 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15910 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15911}
15912
15913// This tries to get rid of `select` and `icmp` that are being used to handle
15914// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15916 SDValue Cond = N->getOperand(0);
15917
15918 // This represents either CTTZ or CTLZ instruction.
15919 SDValue CountZeroes;
15920
15921 SDValue ValOnZero;
15922
15923 if (Cond.getOpcode() != ISD::SETCC)
15924 return SDValue();
15925
15926 if (!isNullConstant(Cond->getOperand(1)))
15927 return SDValue();
15928
15929 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15930 if (CCVal == ISD::CondCode::SETEQ) {
15931 CountZeroes = N->getOperand(2);
15932 ValOnZero = N->getOperand(1);
15933 } else if (CCVal == ISD::CondCode::SETNE) {
15934 CountZeroes = N->getOperand(1);
15935 ValOnZero = N->getOperand(2);
15936 } else {
15937 return SDValue();
15938 }
15939
15940 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15941 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15942 CountZeroes = CountZeroes.getOperand(0);
15943
15944 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15945 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15946 CountZeroes.getOpcode() != ISD::CTLZ &&
15947 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15948 return SDValue();
15949
15950 if (!isNullConstant(ValOnZero))
15951 return SDValue();
15952
15953 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15954 if (Cond->getOperand(0) != CountZeroesArgument)
15955 return SDValue();
15956
15957 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15958 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15959 CountZeroes.getValueType(), CountZeroesArgument);
15960 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15961 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15962 CountZeroes.getValueType(), CountZeroesArgument);
15963 }
15964
15965 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15966 SDValue BitWidthMinusOne =
15967 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15968
15969 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15970 CountZeroes, BitWidthMinusOne);
15971 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15972}
15973
15975 const RISCVSubtarget &Subtarget) {
15976 SDValue Cond = N->getOperand(0);
15977 SDValue True = N->getOperand(1);
15978 SDValue False = N->getOperand(2);
15979 SDLoc DL(N);
15980 EVT VT = N->getValueType(0);
15981 EVT CondVT = Cond.getValueType();
15982
15983 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15984 return SDValue();
15985
15986 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15987 // BEXTI, where C is power of 2.
15988 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15989 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15990 SDValue LHS = Cond.getOperand(0);
15991 SDValue RHS = Cond.getOperand(1);
15992 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15993 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15994 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15995 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15996 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15997 return DAG.getSelect(DL, VT,
15998 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15999 False, True);
16000 }
16001 }
16002 return SDValue();
16003}
16004
16006 const RISCVSubtarget &Subtarget) {
16007 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16008 return Folded;
16009
16010 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16011 return V;
16012
16013 if (Subtarget.hasConditionalMoveFusion())
16014 return SDValue();
16015
16016 SDValue TrueVal = N->getOperand(1);
16017 SDValue FalseVal = N->getOperand(2);
16018 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16019 return V;
16020 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16021}
16022
16023/// If we have a build_vector where each lane is binop X, C, where C
16024/// is a constant (but not necessarily the same constant on all lanes),
16025/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16026/// We assume that materializing a constant build vector will be no more
16027/// expensive that performing O(n) binops.
16029 const RISCVSubtarget &Subtarget,
16030 const RISCVTargetLowering &TLI) {
16031 SDLoc DL(N);
16032 EVT VT = N->getValueType(0);
16033
16034 assert(!VT.isScalableVector() && "unexpected build vector");
16035
16036 if (VT.getVectorNumElements() == 1)
16037 return SDValue();
16038
16039 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16040 if (!TLI.isBinOp(Opcode))
16041 return SDValue();
16042
16043 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16044 return SDValue();
16045
16046 // This BUILD_VECTOR involves an implicit truncation, and sinking
16047 // truncates through binops is non-trivial.
16048 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16049 return SDValue();
16050
16051 SmallVector<SDValue> LHSOps;
16052 SmallVector<SDValue> RHSOps;
16053 for (SDValue Op : N->ops()) {
16054 if (Op.isUndef()) {
16055 // We can't form a divide or remainder from undef.
16056 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16057 return SDValue();
16058
16059 LHSOps.push_back(Op);
16060 RHSOps.push_back(Op);
16061 continue;
16062 }
16063
16064 // TODO: We can handle operations which have an neutral rhs value
16065 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16066 // of profit in a more explicit manner.
16067 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16068 return SDValue();
16069
16070 LHSOps.push_back(Op.getOperand(0));
16071 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16072 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16073 return SDValue();
16074 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16075 // have different LHS and RHS types.
16076 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16077 return SDValue();
16078
16079 RHSOps.push_back(Op.getOperand(1));
16080 }
16081
16082 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16083 DAG.getBuildVector(VT, DL, RHSOps));
16084}
16085
16087 const RISCVSubtarget &Subtarget,
16088 const RISCVTargetLowering &TLI) {
16089 SDValue InVec = N->getOperand(0);
16090 SDValue InVal = N->getOperand(1);
16091 SDValue EltNo = N->getOperand(2);
16092 SDLoc DL(N);
16093
16094 EVT VT = InVec.getValueType();
16095 if (VT.isScalableVector())
16096 return SDValue();
16097
16098 if (!InVec.hasOneUse())
16099 return SDValue();
16100
16101 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16102 // move the insert_vector_elts into the arms of the binop. Note that
16103 // the new RHS must be a constant.
16104 const unsigned InVecOpcode = InVec->getOpcode();
16105 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16106 InVal.hasOneUse()) {
16107 SDValue InVecLHS = InVec->getOperand(0);
16108 SDValue InVecRHS = InVec->getOperand(1);
16109 SDValue InValLHS = InVal->getOperand(0);
16110 SDValue InValRHS = InVal->getOperand(1);
16111
16113 return SDValue();
16114 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16115 return SDValue();
16116 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16117 // have different LHS and RHS types.
16118 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16119 return SDValue();
16121 InVecLHS, InValLHS, EltNo);
16123 InVecRHS, InValRHS, EltNo);
16124 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16125 }
16126
16127 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16128 // move the insert_vector_elt to the source operand of the concat_vector.
16129 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16130 return SDValue();
16131
16132 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16133 if (!IndexC)
16134 return SDValue();
16135 unsigned Elt = IndexC->getZExtValue();
16136
16137 EVT ConcatVT = InVec.getOperand(0).getValueType();
16138 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16139 return SDValue();
16140 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16141 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16142
16143 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16144 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16145 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16146 ConcatOp, InVal, NewIdx);
16147
16148 SmallVector<SDValue> ConcatOps;
16149 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16150 ConcatOps[ConcatOpIdx] = ConcatOp;
16151 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16152}
16153
16154// If we're concatenating a series of vector loads like
16155// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16156// Then we can turn this into a strided load by widening the vector elements
16157// vlse32 p, stride=n
16159 const RISCVSubtarget &Subtarget,
16160 const RISCVTargetLowering &TLI) {
16161 SDLoc DL(N);
16162 EVT VT = N->getValueType(0);
16163
16164 // Only perform this combine on legal MVTs.
16165 if (!TLI.isTypeLegal(VT))
16166 return SDValue();
16167
16168 // TODO: Potentially extend this to scalable vectors
16169 if (VT.isScalableVector())
16170 return SDValue();
16171
16172 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16173 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16174 !SDValue(BaseLd, 0).hasOneUse())
16175 return SDValue();
16176
16177 EVT BaseLdVT = BaseLd->getValueType(0);
16178
16179 // Go through the loads and check that they're strided
16181 Lds.push_back(BaseLd);
16182 Align Align = BaseLd->getAlign();
16183 for (SDValue Op : N->ops().drop_front()) {
16184 auto *Ld = dyn_cast<LoadSDNode>(Op);
16185 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16186 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16187 Ld->getValueType(0) != BaseLdVT)
16188 return SDValue();
16189
16190 Lds.push_back(Ld);
16191
16192 // The common alignment is the most restrictive (smallest) of all the loads
16193 Align = std::min(Align, Ld->getAlign());
16194 }
16195
16196 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16197 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16198 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16199 // If the load ptrs can be decomposed into a common (Base + Index) with a
16200 // common constant stride, then return the constant stride.
16201 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16202 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16203 if (BIO1.equalBaseIndex(BIO2, DAG))
16204 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16205
16206 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16207 SDValue P1 = Ld1->getBasePtr();
16208 SDValue P2 = Ld2->getBasePtr();
16209 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16210 return {{P2.getOperand(1), false}};
16211 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16212 return {{P1.getOperand(1), true}};
16213
16214 return std::nullopt;
16215 };
16216
16217 // Get the distance between the first and second loads
16218 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16219 if (!BaseDiff)
16220 return SDValue();
16221
16222 // Check all the loads are the same distance apart
16223 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16224 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16225 return SDValue();
16226
16227 // TODO: At this point, we've successfully matched a generalized gather
16228 // load. Maybe we should emit that, and then move the specialized
16229 // matchers above and below into a DAG combine?
16230
16231 // Get the widened scalar type, e.g. v4i8 -> i64
16232 unsigned WideScalarBitWidth =
16233 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16234 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16235
16236 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16237 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16238 if (!TLI.isTypeLegal(WideVecVT))
16239 return SDValue();
16240
16241 // Check that the operation is legal
16242 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16243 return SDValue();
16244
16245 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16246 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
16247 ? std::get<SDValue>(StrideVariant)
16248 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
16249 Lds[0]->getOffset().getValueType());
16250 if (MustNegateStride)
16251 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16252
16253 SDValue AllOneMask =
16254 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16255 DAG.getConstant(1, DL, MVT::i1));
16256
16257 uint64_t MemSize;
16258 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16259 ConstStride && ConstStride->getSExtValue() >= 0)
16260 // total size = (elsize * n) + (stride - elsize) * (n-1)
16261 // = elsize + stride * (n-1)
16262 MemSize = WideScalarVT.getSizeInBits() +
16263 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16264 else
16265 // If Stride isn't constant, then we can't know how much it will load
16267
16269 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16270 Align);
16271
16272 SDValue StridedLoad = DAG.getStridedLoadVP(
16273 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
16274 AllOneMask,
16275 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
16276
16277 for (SDValue Ld : N->ops())
16278 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16279
16280 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16281}
16282
16284 const RISCVSubtarget &Subtarget) {
16285
16286 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16287
16288 if (N->getValueType(0).isFixedLengthVector())
16289 return SDValue();
16290
16291 SDValue Addend = N->getOperand(0);
16292 SDValue MulOp = N->getOperand(1);
16293
16294 if (N->getOpcode() == RISCVISD::ADD_VL) {
16295 SDValue AddMergeOp = N->getOperand(2);
16296 if (!AddMergeOp.isUndef())
16297 return SDValue();
16298 }
16299
16300 auto IsVWMulOpc = [](unsigned Opc) {
16301 switch (Opc) {
16302 case RISCVISD::VWMUL_VL:
16305 return true;
16306 default:
16307 return false;
16308 }
16309 };
16310
16311 if (!IsVWMulOpc(MulOp.getOpcode()))
16312 std::swap(Addend, MulOp);
16313
16314 if (!IsVWMulOpc(MulOp.getOpcode()))
16315 return SDValue();
16316
16317 SDValue MulMergeOp = MulOp.getOperand(2);
16318
16319 if (!MulMergeOp.isUndef())
16320 return SDValue();
16321
16322 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
16323 const RISCVSubtarget &Subtarget) {
16324 if (N->getOpcode() == ISD::ADD) {
16325 SDLoc DL(N);
16326 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
16327 Subtarget);
16328 }
16329 return std::make_pair(N->getOperand(3), N->getOperand(4));
16330 }(N, DAG, Subtarget);
16331
16332 SDValue MulMask = MulOp.getOperand(3);
16333 SDValue MulVL = MulOp.getOperand(4);
16334
16335 if (AddMask != MulMask || AddVL != MulVL)
16336 return SDValue();
16337
16338 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
16339 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
16340 "Unexpected opcode after VWMACC_VL");
16341 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
16342 "Unexpected opcode after VWMACC_VL!");
16343 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
16344 "Unexpected opcode after VWMUL_VL!");
16345 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
16346 "Unexpected opcode after VWMUL_VL!");
16347
16348 SDLoc DL(N);
16349 EVT VT = N->getValueType(0);
16350 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
16351 AddVL};
16352 return DAG.getNode(Opc, DL, VT, Ops);
16353}
16354
16356 ISD::MemIndexType &IndexType,
16358 if (!DCI.isBeforeLegalize())
16359 return false;
16360
16361 SelectionDAG &DAG = DCI.DAG;
16362 const MVT XLenVT =
16363 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
16364
16365 const EVT IndexVT = Index.getValueType();
16366
16367 // RISC-V indexed loads only support the "unsigned unscaled" addressing
16368 // mode, so anything else must be manually legalized.
16369 if (!isIndexTypeSigned(IndexType))
16370 return false;
16371
16372 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
16373 // Any index legalization should first promote to XLenVT, so we don't lose
16374 // bits when scaling. This may create an illegal index type so we let
16375 // LLVM's legalization take care of the splitting.
16376 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
16378 IndexVT.changeVectorElementType(XLenVT), Index);
16379 }
16380 IndexType = ISD::UNSIGNED_SCALED;
16381 return true;
16382}
16383
16384/// Match the index vector of a scatter or gather node as the shuffle mask
16385/// which performs the rearrangement if possible. Will only match if
16386/// all lanes are touched, and thus replacing the scatter or gather with
16387/// a unit strided access and shuffle is legal.
16389 SmallVector<int> &ShuffleMask) {
16390 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16391 return false;
16393 return false;
16394
16395 const unsigned ElementSize = VT.getScalarStoreSize();
16396 const unsigned NumElems = VT.getVectorNumElements();
16397
16398 // Create the shuffle mask and check all bits active
16399 assert(ShuffleMask.empty());
16400 BitVector ActiveLanes(NumElems);
16401 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16402 // TODO: We've found an active bit of UB, and could be
16403 // more aggressive here if desired.
16404 if (Index->getOperand(i)->isUndef())
16405 return false;
16406 uint64_t C = Index->getConstantOperandVal(i);
16407 if (C % ElementSize != 0)
16408 return false;
16409 C = C / ElementSize;
16410 if (C >= NumElems)
16411 return false;
16412 ShuffleMask.push_back(C);
16413 ActiveLanes.set(C);
16414 }
16415 return ActiveLanes.all();
16416}
16417
16418/// Match the index of a gather or scatter operation as an operation
16419/// with twice the element width and half the number of elements. This is
16420/// generally profitable (if legal) because these operations are linear
16421/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16422/// come out ahead.
16424 Align BaseAlign, const RISCVSubtarget &ST) {
16425 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16426 return false;
16428 return false;
16429
16430 // Attempt a doubling. If we can use a element type 4x or 8x in
16431 // size, this will happen via multiply iterations of the transform.
16432 const unsigned NumElems = VT.getVectorNumElements();
16433 if (NumElems % 2 != 0)
16434 return false;
16435
16436 const unsigned ElementSize = VT.getScalarStoreSize();
16437 const unsigned WiderElementSize = ElementSize * 2;
16438 if (WiderElementSize > ST.getELen()/8)
16439 return false;
16440
16441 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16442 return false;
16443
16444 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16445 // TODO: We've found an active bit of UB, and could be
16446 // more aggressive here if desired.
16447 if (Index->getOperand(i)->isUndef())
16448 return false;
16449 // TODO: This offset check is too strict if we support fully
16450 // misaligned memory operations.
16451 uint64_t C = Index->getConstantOperandVal(i);
16452 if (i % 2 == 0) {
16453 if (C % WiderElementSize != 0)
16454 return false;
16455 continue;
16456 }
16457 uint64_t Last = Index->getConstantOperandVal(i-1);
16458 if (C != Last + ElementSize)
16459 return false;
16460 }
16461 return true;
16462}
16463
16464// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16465// This would be benefit for the cases where X and Y are both the same value
16466// type of low precision vectors. Since the truncate would be lowered into
16467// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16468// restriction, such pattern would be expanded into a series of "vsetvli"
16469// and "vnsrl" instructions later to reach this point.
16471 SDValue Mask = N->getOperand(1);
16472 SDValue VL = N->getOperand(2);
16473
16474 bool IsVLMAX = isAllOnesConstant(VL) ||
16475 (isa<RegisterSDNode>(VL) &&
16476 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16477 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16478 Mask.getOperand(0) != VL)
16479 return SDValue();
16480
16481 auto IsTruncNode = [&](SDValue V) {
16482 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16483 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16484 };
16485
16486 SDValue Op = N->getOperand(0);
16487
16488 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16489 // to distinguish such pattern.
16490 while (IsTruncNode(Op)) {
16491 if (!Op.hasOneUse())
16492 return SDValue();
16493 Op = Op.getOperand(0);
16494 }
16495
16496 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16497 return SDValue();
16498
16499 SDValue N0 = Op.getOperand(0);
16500 SDValue N1 = Op.getOperand(1);
16501 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16502 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16503 return SDValue();
16504
16505 SDValue N00 = N0.getOperand(0);
16506 SDValue N10 = N1.getOperand(0);
16507 if (!N00.getValueType().isVector() ||
16508 N00.getValueType() != N10.getValueType() ||
16509 N->getValueType(0) != N10.getValueType())
16510 return SDValue();
16511
16512 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16513 SDValue SMin =
16514 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16515 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16516 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16517}
16518
16519// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16520// maximum value for the truncated type.
16521// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16522// is the signed maximum value for the truncated type and C2 is the signed
16523// minimum value.
16525 const RISCVSubtarget &Subtarget) {
16526 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16527
16528 MVT VT = N->getSimpleValueType(0);
16529
16530 SDValue Mask = N->getOperand(1);
16531 SDValue VL = N->getOperand(2);
16532
16533 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16534 APInt &SplatVal) {
16535 if (V.getOpcode() != Opc &&
16536 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16537 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16538 return SDValue();
16539
16540 SDValue Op = V.getOperand(1);
16541
16542 // Peek through conversion between fixed and scalable vectors.
16543 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16544 isNullConstant(Op.getOperand(2)) &&
16545 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16546 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16547 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16548 isNullConstant(Op.getOperand(1).getOperand(1)))
16549 Op = Op.getOperand(1).getOperand(0);
16550
16551 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16552 return V.getOperand(0);
16553
16554 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16555 Op.getOperand(2) == VL) {
16556 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16557 SplatVal =
16558 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16559 return V.getOperand(0);
16560 }
16561 }
16562
16563 return SDValue();
16564 };
16565
16566 SDLoc DL(N);
16567
16568 auto DetectUSatPattern = [&](SDValue V) {
16569 APInt LoC, HiC;
16570
16571 // Simple case, V is a UMIN.
16572 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16573 if (HiC.isMask(VT.getScalarSizeInBits()))
16574 return UMinOp;
16575
16576 // If we have an SMAX that removes negative numbers first, then we can match
16577 // SMIN instead of UMIN.
16578 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16579 if (SDValue SMaxOp =
16580 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16581 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16582 return SMinOp;
16583
16584 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16585 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16586 // first.
16587 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16588 if (SDValue SMinOp =
16589 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16590 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16591 HiC.uge(LoC))
16592 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16593 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16594 Mask, VL);
16595
16596 return SDValue();
16597 };
16598
16599 auto DetectSSatPattern = [&](SDValue V) {
16600 unsigned NumDstBits = VT.getScalarSizeInBits();
16601 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16602 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16603 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16604
16605 APInt HiC, LoC;
16606 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16607 if (SDValue SMaxOp =
16608 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16609 if (HiC == SignedMax && LoC == SignedMin)
16610 return SMaxOp;
16611
16612 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16613 if (SDValue SMinOp =
16614 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16615 if (HiC == SignedMax && LoC == SignedMin)
16616 return SMinOp;
16617
16618 return SDValue();
16619 };
16620
16621 SDValue Src = N->getOperand(0);
16622
16623 // Look through multiple layers of truncates.
16624 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16625 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16626 Src.hasOneUse())
16627 Src = Src.getOperand(0);
16628
16629 SDValue Val;
16630 unsigned ClipOpc;
16631 if ((Val = DetectUSatPattern(Src)))
16632 ClipOpc = RISCVISD::VNCLIPU_VL;
16633 else if ((Val = DetectSSatPattern(Src)))
16634 ClipOpc = RISCVISD::VNCLIP_VL;
16635 else
16636 return SDValue();
16637
16638 MVT ValVT = Val.getSimpleValueType();
16639
16640 do {
16641 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16642 ValVT = ValVT.changeVectorElementType(ValEltVT);
16643 // Rounding mode here is arbitrary since we aren't shifting out any bits.
16644 Val = DAG.getNode(
16645 ClipOpc, DL, ValVT,
16646 {Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask,
16648 VL});
16649 } while (ValVT != VT);
16650
16651 return Val;
16652}
16653
16655 DAGCombinerInfo &DCI) const {
16656 SelectionDAG &DAG = DCI.DAG;
16657 const MVT XLenVT = Subtarget.getXLenVT();
16658 SDLoc DL(N);
16659
16660 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16661 // bits are demanded. N will be added to the Worklist if it was not deleted.
16662 // Caller should return SDValue(N, 0) if this returns true.
16663 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16664 SDValue Op = N->getOperand(OpNo);
16665 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16666 if (!SimplifyDemandedBits(Op, Mask, DCI))
16667 return false;
16668
16669 if (N->getOpcode() != ISD::DELETED_NODE)
16670 DCI.AddToWorklist(N);
16671 return true;
16672 };
16673
16674 switch (N->getOpcode()) {
16675 default:
16676 break;
16677 case RISCVISD::SplitF64: {
16678 SDValue Op0 = N->getOperand(0);
16679 // If the input to SplitF64 is just BuildPairF64 then the operation is
16680 // redundant. Instead, use BuildPairF64's operands directly.
16681 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16682 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16683
16684 if (Op0->isUndef()) {
16685 SDValue Lo = DAG.getUNDEF(MVT::i32);
16686 SDValue Hi = DAG.getUNDEF(MVT::i32);
16687 return DCI.CombineTo(N, Lo, Hi);
16688 }
16689
16690 // It's cheaper to materialise two 32-bit integers than to load a double
16691 // from the constant pool and transfer it to integer registers through the
16692 // stack.
16693 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16694 APInt V = C->getValueAPF().bitcastToAPInt();
16695 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16696 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16697 return DCI.CombineTo(N, Lo, Hi);
16698 }
16699
16700 // This is a target-specific version of a DAGCombine performed in
16701 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16702 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16703 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16704 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16705 !Op0.getNode()->hasOneUse())
16706 break;
16707 SDValue NewSplitF64 =
16708 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16709 Op0.getOperand(0));
16710 SDValue Lo = NewSplitF64.getValue(0);
16711 SDValue Hi = NewSplitF64.getValue(1);
16712 APInt SignBit = APInt::getSignMask(32);
16713 if (Op0.getOpcode() == ISD::FNEG) {
16714 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16715 DAG.getConstant(SignBit, DL, MVT::i32));
16716 return DCI.CombineTo(N, Lo, NewHi);
16717 }
16718 assert(Op0.getOpcode() == ISD::FABS);
16719 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16720 DAG.getConstant(~SignBit, DL, MVT::i32));
16721 return DCI.CombineTo(N, Lo, NewHi);
16722 }
16723 case RISCVISD::SLLW:
16724 case RISCVISD::SRAW:
16725 case RISCVISD::SRLW:
16726 case RISCVISD::RORW:
16727 case RISCVISD::ROLW: {
16728 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16729 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16730 SimplifyDemandedLowBitsHelper(1, 5))
16731 return SDValue(N, 0);
16732
16733 break;
16734 }
16735 case RISCVISD::CLZW:
16736 case RISCVISD::CTZW: {
16737 // Only the lower 32 bits of the first operand are read
16738 if (SimplifyDemandedLowBitsHelper(0, 32))
16739 return SDValue(N, 0);
16740 break;
16741 }
16743 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16744 // conversion is unnecessary and can be replaced with the
16745 // FMV_X_ANYEXTW_RV64 operand.
16746 SDValue Op0 = N->getOperand(0);
16748 return Op0.getOperand(0);
16749 break;
16750 }
16753 SDLoc DL(N);
16754 SDValue Op0 = N->getOperand(0);
16755 MVT VT = N->getSimpleValueType(0);
16756 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16757 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16758 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16759 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16760 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16761 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16762 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16763 assert(Op0.getOperand(0).getValueType() == VT &&
16764 "Unexpected value type!");
16765 return Op0.getOperand(0);
16766 }
16767
16768 // This is a target-specific version of a DAGCombine performed in
16769 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16770 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16771 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16772 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16773 !Op0.getNode()->hasOneUse())
16774 break;
16775 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16776 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16777 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16778 if (Op0.getOpcode() == ISD::FNEG)
16779 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16780 DAG.getConstant(SignBit, DL, VT));
16781
16782 assert(Op0.getOpcode() == ISD::FABS);
16783 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16784 DAG.getConstant(~SignBit, DL, VT));
16785 }
16786 case ISD::ABS: {
16787 EVT VT = N->getValueType(0);
16788 SDValue N0 = N->getOperand(0);
16789 // abs (sext) -> zext (abs)
16790 // abs (zext) -> zext (handled elsewhere)
16791 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16792 SDValue Src = N0.getOperand(0);
16793 SDLoc DL(N);
16794 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16795 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16796 }
16797 break;
16798 }
16799 case ISD::ADD: {
16800 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16801 return V;
16802 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16803 return V;
16804 return performADDCombine(N, DCI, Subtarget);
16805 }
16806 case ISD::SUB: {
16807 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16808 return V;
16809 return performSUBCombine(N, DAG, Subtarget);
16810 }
16811 case ISD::AND:
16812 return performANDCombine(N, DCI, Subtarget);
16813 case ISD::OR: {
16814 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16815 return V;
16816 return performORCombine(N, DCI, Subtarget);
16817 }
16818 case ISD::XOR:
16819 return performXORCombine(N, DAG, Subtarget);
16820 case ISD::MUL:
16821 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16822 return V;
16823 return performMULCombine(N, DAG, DCI, Subtarget);
16824 case ISD::SDIV:
16825 case ISD::UDIV:
16826 case ISD::SREM:
16827 case ISD::UREM:
16828 if (SDValue V = combineBinOpOfZExt(N, DAG))
16829 return V;
16830 break;
16831 case ISD::FADD:
16832 case ISD::UMAX:
16833 case ISD::UMIN:
16834 case ISD::SMAX:
16835 case ISD::SMIN:
16836 case ISD::FMAXNUM:
16837 case ISD::FMINNUM: {
16838 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16839 return V;
16840 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16841 return V;
16842 return SDValue();
16843 }
16844 case ISD::SETCC:
16845 return performSETCCCombine(N, DAG, Subtarget);
16847 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16848 case ISD::ZERO_EXTEND:
16849 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16850 // type legalization. This is safe because fp_to_uint produces poison if
16851 // it overflows.
16852 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16853 SDValue Src = N->getOperand(0);
16854 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16855 isTypeLegal(Src.getOperand(0).getValueType()))
16856 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16857 Src.getOperand(0));
16858 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16859 isTypeLegal(Src.getOperand(1).getValueType())) {
16860 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16861 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16862 Src.getOperand(0), Src.getOperand(1));
16863 DCI.CombineTo(N, Res);
16864 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16865 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16866 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16867 }
16868 }
16869 return SDValue();
16871 if (SDValue V = combineTruncOfSraSext(N, DAG))
16872 return V;
16873 return combineTruncToVnclip(N, DAG, Subtarget);
16874 case ISD::TRUNCATE:
16875 return performTRUNCATECombine(N, DAG, Subtarget);
16876 case ISD::SELECT:
16877 return performSELECTCombine(N, DAG, Subtarget);
16879 case RISCVISD::CZERO_NEZ: {
16880 SDValue Val = N->getOperand(0);
16881 SDValue Cond = N->getOperand(1);
16882
16883 unsigned Opc = N->getOpcode();
16884
16885 // czero_eqz x, x -> x
16886 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16887 return Val;
16888
16889 unsigned InvOpc =
16891
16892 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16893 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16894 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16895 SDValue NewCond = Cond.getOperand(0);
16896 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16897 if (DAG.MaskedValueIsZero(NewCond, Mask))
16898 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16899 }
16900 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16901 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16902 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16903 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16904 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16905 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16906 if (ISD::isIntEqualitySetCC(CCVal))
16907 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16908 N->getValueType(0), Val, Cond.getOperand(0));
16909 }
16910 return SDValue();
16911 }
16912 case RISCVISD::SELECT_CC: {
16913 // Transform
16914 SDValue LHS = N->getOperand(0);
16915 SDValue RHS = N->getOperand(1);
16916 SDValue CC = N->getOperand(2);
16917 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16918 SDValue TrueV = N->getOperand(3);
16919 SDValue FalseV = N->getOperand(4);
16920 SDLoc DL(N);
16921 EVT VT = N->getValueType(0);
16922
16923 // If the True and False values are the same, we don't need a select_cc.
16924 if (TrueV == FalseV)
16925 return TrueV;
16926
16927 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16928 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16929 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16930 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16931 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16932 if (CCVal == ISD::CondCode::SETGE)
16933 std::swap(TrueV, FalseV);
16934
16935 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16936 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16937 // Only handle simm12, if it is not in this range, it can be considered as
16938 // register.
16939 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16940 isInt<12>(TrueSImm - FalseSImm)) {
16941 SDValue SRA =
16942 DAG.getNode(ISD::SRA, DL, VT, LHS,
16943 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16944 SDValue AND =
16945 DAG.getNode(ISD::AND, DL, VT, SRA,
16946 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16947 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16948 }
16949
16950 if (CCVal == ISD::CondCode::SETGE)
16951 std::swap(TrueV, FalseV);
16952 }
16953
16954 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16955 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16956 {LHS, RHS, CC, TrueV, FalseV});
16957
16958 if (!Subtarget.hasConditionalMoveFusion()) {
16959 // (select c, -1, y) -> -c | y
16960 if (isAllOnesConstant(TrueV)) {
16961 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16962 SDValue Neg = DAG.getNegative(C, DL, VT);
16963 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16964 }
16965 // (select c, y, -1) -> -!c | y
16966 if (isAllOnesConstant(FalseV)) {
16967 SDValue C =
16968 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16969 SDValue Neg = DAG.getNegative(C, DL, VT);
16970 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16971 }
16972
16973 // (select c, 0, y) -> -!c & y
16974 if (isNullConstant(TrueV)) {
16975 SDValue C =
16976 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16977 SDValue Neg = DAG.getNegative(C, DL, VT);
16978 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16979 }
16980 // (select c, y, 0) -> -c & y
16981 if (isNullConstant(FalseV)) {
16982 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16983 SDValue Neg = DAG.getNegative(C, DL, VT);
16984 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16985 }
16986 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16987 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16988 if (((isOneConstant(FalseV) && LHS == TrueV &&
16989 CCVal == ISD::CondCode::SETNE) ||
16990 (isOneConstant(TrueV) && LHS == FalseV &&
16991 CCVal == ISD::CondCode::SETEQ)) &&
16993 // freeze it to be safe.
16994 LHS = DAG.getFreeze(LHS);
16996 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16997 }
16998 }
16999
17000 // If both true/false are an xor with 1, pull through the select.
17001 // This can occur after op legalization if both operands are setccs that
17002 // require an xor to invert.
17003 // FIXME: Generalize to other binary ops with identical operand?
17004 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17005 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17006 isOneConstant(TrueV.getOperand(1)) &&
17007 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17008 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17009 TrueV.getOperand(0), FalseV.getOperand(0));
17010 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17011 }
17012
17013 return SDValue();
17014 }
17015 case RISCVISD::BR_CC: {
17016 SDValue LHS = N->getOperand(1);
17017 SDValue RHS = N->getOperand(2);
17018 SDValue CC = N->getOperand(3);
17019 SDLoc DL(N);
17020
17021 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17022 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17023 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17024
17025 return SDValue();
17026 }
17027 case ISD::BITREVERSE:
17028 return performBITREVERSECombine(N, DAG, Subtarget);
17029 case ISD::FP_TO_SINT:
17030 case ISD::FP_TO_UINT:
17031 return performFP_TO_INTCombine(N, DCI, Subtarget);
17034 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17035 case ISD::FCOPYSIGN: {
17036 EVT VT = N->getValueType(0);
17037 if (!VT.isVector())
17038 break;
17039 // There is a form of VFSGNJ which injects the negated sign of its second
17040 // operand. Try and bubble any FNEG up after the extend/round to produce
17041 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17042 // TRUNC=1.
17043 SDValue In2 = N->getOperand(1);
17044 // Avoid cases where the extend/round has multiple uses, as duplicating
17045 // those is typically more expensive than removing a fneg.
17046 if (!In2.hasOneUse())
17047 break;
17048 if (In2.getOpcode() != ISD::FP_EXTEND &&
17049 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17050 break;
17051 In2 = In2.getOperand(0);
17052 if (In2.getOpcode() != ISD::FNEG)
17053 break;
17054 SDLoc DL(N);
17055 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17056 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17057 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17058 }
17059 case ISD::MGATHER: {
17060 const auto *MGN = cast<MaskedGatherSDNode>(N);
17061 const EVT VT = N->getValueType(0);
17062 SDValue Index = MGN->getIndex();
17063 SDValue ScaleOp = MGN->getScale();
17064 ISD::MemIndexType IndexType = MGN->getIndexType();
17065 assert(!MGN->isIndexScaled() &&
17066 "Scaled gather/scatter should not be formed");
17067
17068 SDLoc DL(N);
17069 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17070 return DAG.getMaskedGather(
17071 N->getVTList(), MGN->getMemoryVT(), DL,
17072 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17073 MGN->getBasePtr(), Index, ScaleOp},
17074 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17075
17076 if (narrowIndex(Index, IndexType, DAG))
17077 return DAG.getMaskedGather(
17078 N->getVTList(), MGN->getMemoryVT(), DL,
17079 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17080 MGN->getBasePtr(), Index, ScaleOp},
17081 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17082
17083 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17084 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17085 // The sequence will be XLenVT, not the type of Index. Tell
17086 // isSimpleVIDSequence this so we avoid overflow.
17087 if (std::optional<VIDSequence> SimpleVID =
17088 isSimpleVIDSequence(Index, Subtarget.getXLen());
17089 SimpleVID && SimpleVID->StepDenominator == 1) {
17090 const int64_t StepNumerator = SimpleVID->StepNumerator;
17091 const int64_t Addend = SimpleVID->Addend;
17092
17093 // Note: We don't need to check alignment here since (by assumption
17094 // from the existance of the gather), our offsets must be sufficiently
17095 // aligned.
17096
17097 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17098 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17099 assert(IndexType == ISD::UNSIGNED_SCALED);
17100 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17101 DAG.getConstant(Addend, DL, PtrVT));
17102
17103 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17105 SDValue StridedLoad =
17106 DAG.getStridedLoadVP(VT, DL, MGN->getChain(), BasePtr,
17107 DAG.getConstant(StepNumerator, DL, XLenVT),
17108 MGN->getMask(), EVL, MGN->getMemOperand());
17109 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17110 StridedLoad, MGN->getPassThru(), EVL);
17111 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17112 DL);
17113 }
17114 }
17115
17116 SmallVector<int> ShuffleMask;
17117 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17118 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17119 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17120 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17121 MGN->getMask(), DAG.getUNDEF(VT),
17122 MGN->getMemoryVT(), MGN->getMemOperand(),
17124 SDValue Shuffle =
17125 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17126 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17127 }
17128
17129 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17130 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17131 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17132 SmallVector<SDValue> NewIndices;
17133 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17134 NewIndices.push_back(Index.getOperand(i));
17135 EVT IndexVT = Index.getValueType()
17137 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17138
17139 unsigned ElementSize = VT.getScalarStoreSize();
17140 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17141 auto EltCnt = VT.getVectorElementCount();
17142 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17143 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17144 EltCnt.divideCoefficientBy(2));
17145 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17146 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17147 EltCnt.divideCoefficientBy(2));
17148 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17149
17150 SDValue Gather =
17151 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17152 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17153 Index, ScaleOp},
17154 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17155 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17156 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17157 }
17158 break;
17159 }
17160 case ISD::MSCATTER:{
17161 const auto *MSN = cast<MaskedScatterSDNode>(N);
17162 SDValue Index = MSN->getIndex();
17163 SDValue ScaleOp = MSN->getScale();
17164 ISD::MemIndexType IndexType = MSN->getIndexType();
17165 assert(!MSN->isIndexScaled() &&
17166 "Scaled gather/scatter should not be formed");
17167
17168 SDLoc DL(N);
17169 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17170 return DAG.getMaskedScatter(
17171 N->getVTList(), MSN->getMemoryVT(), DL,
17172 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17173 Index, ScaleOp},
17174 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17175
17176 if (narrowIndex(Index, IndexType, DAG))
17177 return DAG.getMaskedScatter(
17178 N->getVTList(), MSN->getMemoryVT(), DL,
17179 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17180 Index, ScaleOp},
17181 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17182
17183 EVT VT = MSN->getValue()->getValueType(0);
17184 SmallVector<int> ShuffleMask;
17185 if (!MSN->isTruncatingStore() &&
17186 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17187 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17188 DAG.getUNDEF(VT), ShuffleMask);
17189 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17190 DAG.getUNDEF(XLenVT), MSN->getMask(),
17191 MSN->getMemoryVT(), MSN->getMemOperand(),
17192 ISD::UNINDEXED, false);
17193 }
17194 break;
17195 }
17196 case ISD::VP_GATHER: {
17197 const auto *VPGN = cast<VPGatherSDNode>(N);
17198 SDValue Index = VPGN->getIndex();
17199 SDValue ScaleOp = VPGN->getScale();
17200 ISD::MemIndexType IndexType = VPGN->getIndexType();
17201 assert(!VPGN->isIndexScaled() &&
17202 "Scaled gather/scatter should not be formed");
17203
17204 SDLoc DL(N);
17205 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17206 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17207 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17208 ScaleOp, VPGN->getMask(),
17209 VPGN->getVectorLength()},
17210 VPGN->getMemOperand(), IndexType);
17211
17212 if (narrowIndex(Index, IndexType, DAG))
17213 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17214 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17215 ScaleOp, VPGN->getMask(),
17216 VPGN->getVectorLength()},
17217 VPGN->getMemOperand(), IndexType);
17218
17219 break;
17220 }
17221 case ISD::VP_SCATTER: {
17222 const auto *VPSN = cast<VPScatterSDNode>(N);
17223 SDValue Index = VPSN->getIndex();
17224 SDValue ScaleOp = VPSN->getScale();
17225 ISD::MemIndexType IndexType = VPSN->getIndexType();
17226 assert(!VPSN->isIndexScaled() &&
17227 "Scaled gather/scatter should not be formed");
17228
17229 SDLoc DL(N);
17230 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17231 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17232 {VPSN->getChain(), VPSN->getValue(),
17233 VPSN->getBasePtr(), Index, ScaleOp,
17234 VPSN->getMask(), VPSN->getVectorLength()},
17235 VPSN->getMemOperand(), IndexType);
17236
17237 if (narrowIndex(Index, IndexType, DAG))
17238 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17239 {VPSN->getChain(), VPSN->getValue(),
17240 VPSN->getBasePtr(), Index, ScaleOp,
17241 VPSN->getMask(), VPSN->getVectorLength()},
17242 VPSN->getMemOperand(), IndexType);
17243 break;
17244 }
17245 case RISCVISD::SHL_VL:
17246 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17247 return V;
17248 [[fallthrough]];
17249 case RISCVISD::SRA_VL:
17250 case RISCVISD::SRL_VL: {
17251 SDValue ShAmt = N->getOperand(1);
17253 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17254 SDLoc DL(N);
17255 SDValue VL = N->getOperand(4);
17256 EVT VT = N->getValueType(0);
17257 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17258 ShAmt.getOperand(1), VL);
17259 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
17260 N->getOperand(2), N->getOperand(3), N->getOperand(4));
17261 }
17262 break;
17263 }
17264 case ISD::SRA:
17265 if (SDValue V = performSRACombine(N, DAG, Subtarget))
17266 return V;
17267 [[fallthrough]];
17268 case ISD::SRL:
17269 case ISD::SHL: {
17270 if (N->getOpcode() == ISD::SHL) {
17271 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17272 return V;
17273 }
17274 SDValue ShAmt = N->getOperand(1);
17276 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17277 SDLoc DL(N);
17278 EVT VT = N->getValueType(0);
17279 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17280 ShAmt.getOperand(1),
17281 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
17282 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
17283 }
17284 break;
17285 }
17286 case RISCVISD::ADD_VL:
17287 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17288 return V;
17289 return combineToVWMACC(N, DAG, Subtarget);
17294 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
17295 case RISCVISD::SUB_VL:
17296 case RISCVISD::MUL_VL:
17297 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
17306 return performVFMADD_VLCombine(N, DAG, Subtarget);
17307 case RISCVISD::FADD_VL:
17308 case RISCVISD::FSUB_VL:
17309 case RISCVISD::FMUL_VL:
17311 case RISCVISD::VFWSUB_W_VL: {
17312 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
17313 !Subtarget.hasVInstructionsF16())
17314 return SDValue();
17315 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
17316 }
17317 case ISD::LOAD:
17318 case ISD::STORE: {
17319 if (DCI.isAfterLegalizeDAG())
17320 if (SDValue V = performMemPairCombine(N, DCI))
17321 return V;
17322
17323 if (N->getOpcode() != ISD::STORE)
17324 break;
17325
17326 auto *Store = cast<StoreSDNode>(N);
17327 SDValue Chain = Store->getChain();
17328 EVT MemVT = Store->getMemoryVT();
17329 SDValue Val = Store->getValue();
17330 SDLoc DL(N);
17331
17332 bool IsScalarizable =
17333 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
17334 Store->isSimple() &&
17335 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
17336 isPowerOf2_64(MemVT.getSizeInBits()) &&
17337 MemVT.getSizeInBits() <= Subtarget.getXLen();
17338
17339 // If sufficiently aligned we can scalarize stores of constant vectors of
17340 // any power-of-two size up to XLen bits, provided that they aren't too
17341 // expensive to materialize.
17342 // vsetivli zero, 2, e8, m1, ta, ma
17343 // vmv.v.i v8, 4
17344 // vse64.v v8, (a0)
17345 // ->
17346 // li a1, 1028
17347 // sh a1, 0(a0)
17348 if (DCI.isBeforeLegalize() && IsScalarizable &&
17350 // Get the constant vector bits
17351 APInt NewC(Val.getValueSizeInBits(), 0);
17352 uint64_t EltSize = Val.getScalarValueSizeInBits();
17353 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
17354 if (Val.getOperand(i).isUndef())
17355 continue;
17356 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
17357 i * EltSize);
17358 }
17359 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17360
17361 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
17362 true) <= 2 &&
17364 NewVT, *Store->getMemOperand())) {
17365 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
17366 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
17367 Store->getPointerInfo(), Store->getOriginalAlign(),
17368 Store->getMemOperand()->getFlags());
17369 }
17370 }
17371
17372 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
17373 // vsetivli zero, 2, e16, m1, ta, ma
17374 // vle16.v v8, (a0)
17375 // vse16.v v8, (a1)
17376 if (auto *L = dyn_cast<LoadSDNode>(Val);
17377 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
17378 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
17379 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
17380 L->getMemoryVT() == MemVT) {
17381 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17383 NewVT, *Store->getMemOperand()) &&
17385 NewVT, *L->getMemOperand())) {
17386 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17387 L->getPointerInfo(), L->getOriginalAlign(),
17388 L->getMemOperand()->getFlags());
17389 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17390 Store->getPointerInfo(), Store->getOriginalAlign(),
17391 Store->getMemOperand()->getFlags());
17392 }
17393 }
17394
17395 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17396 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17397 // any illegal types.
17398 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17399 (DCI.isAfterLegalizeDAG() &&
17401 isNullConstant(Val.getOperand(1)))) {
17402 SDValue Src = Val.getOperand(0);
17403 MVT VecVT = Src.getSimpleValueType();
17404 // VecVT should be scalable and memory VT should match the element type.
17405 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17406 MemVT == VecVT.getVectorElementType()) {
17407 SDLoc DL(N);
17408 MVT MaskVT = getMaskTypeFor(VecVT);
17409 return DAG.getStoreVP(
17410 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17411 DAG.getConstant(1, DL, MaskVT),
17412 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17413 Store->getMemOperand(), Store->getAddressingMode(),
17414 Store->isTruncatingStore(), /*IsCompress*/ false);
17415 }
17416 }
17417
17418 break;
17419 }
17420 case ISD::SPLAT_VECTOR: {
17421 EVT VT = N->getValueType(0);
17422 // Only perform this combine on legal MVT types.
17423 if (!isTypeLegal(VT))
17424 break;
17425 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17426 DAG, Subtarget))
17427 return Gather;
17428 break;
17429 }
17430 case ISD::BUILD_VECTOR:
17431 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17432 return V;
17433 break;
17435 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17436 return V;
17437 break;
17439 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17440 return V;
17441 break;
17442 case RISCVISD::VFMV_V_F_VL: {
17443 const MVT VT = N->getSimpleValueType(0);
17444 SDValue Passthru = N->getOperand(0);
17445 SDValue Scalar = N->getOperand(1);
17446 SDValue VL = N->getOperand(2);
17447
17448 // If VL is 1, we can use vfmv.s.f.
17449 if (isOneConstant(VL))
17450 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17451 break;
17452 }
17453 case RISCVISD::VMV_V_X_VL: {
17454 const MVT VT = N->getSimpleValueType(0);
17455 SDValue Passthru = N->getOperand(0);
17456 SDValue Scalar = N->getOperand(1);
17457 SDValue VL = N->getOperand(2);
17458
17459 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17460 // scalar input.
17461 unsigned ScalarSize = Scalar.getValueSizeInBits();
17462 unsigned EltWidth = VT.getScalarSizeInBits();
17463 if (ScalarSize > EltWidth && Passthru.isUndef())
17464 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17465 return SDValue(N, 0);
17466
17467 // If VL is 1 and the scalar value won't benefit from immediate, we can
17468 // use vmv.s.x.
17469 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17470 if (isOneConstant(VL) &&
17471 (!Const || Const->isZero() ||
17472 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17473 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17474
17475 break;
17476 }
17477 case RISCVISD::VFMV_S_F_VL: {
17478 SDValue Src = N->getOperand(1);
17479 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17480 // into an undef vector.
17481 // TODO: Could use a vslide or vmv.v.v for non-undef.
17482 if (N->getOperand(0).isUndef() &&
17483 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17484 isNullConstant(Src.getOperand(1)) &&
17485 Src.getOperand(0).getValueType().isScalableVector()) {
17486 EVT VT = N->getValueType(0);
17487 EVT SrcVT = Src.getOperand(0).getValueType();
17489 // Widths match, just return the original vector.
17490 if (SrcVT == VT)
17491 return Src.getOperand(0);
17492 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17493 }
17494 [[fallthrough]];
17495 }
17496 case RISCVISD::VMV_S_X_VL: {
17497 const MVT VT = N->getSimpleValueType(0);
17498 SDValue Passthru = N->getOperand(0);
17499 SDValue Scalar = N->getOperand(1);
17500 SDValue VL = N->getOperand(2);
17501
17502 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17503 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17504 return Scalar.getOperand(0);
17505
17506 // Use M1 or smaller to avoid over constraining register allocation
17507 const MVT M1VT = getLMUL1VT(VT);
17508 if (M1VT.bitsLT(VT)) {
17509 SDValue M1Passthru =
17510 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17511 DAG.getVectorIdxConstant(0, DL));
17512 SDValue Result =
17513 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17514 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17515 DAG.getVectorIdxConstant(0, DL));
17516 return Result;
17517 }
17518
17519 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17520 // higher would involve overly constraining the register allocator for
17521 // no purpose.
17522 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17523 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17524 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17525 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17526
17527 break;
17528 }
17529 case RISCVISD::VMV_X_S: {
17530 SDValue Vec = N->getOperand(0);
17531 MVT VecVT = N->getOperand(0).getSimpleValueType();
17532 const MVT M1VT = getLMUL1VT(VecVT);
17533 if (M1VT.bitsLT(VecVT)) {
17534 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17535 DAG.getVectorIdxConstant(0, DL));
17536 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17537 }
17538 break;
17539 }
17543 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17544 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17545 switch (IntNo) {
17546 // By default we do not combine any intrinsic.
17547 default:
17548 return SDValue();
17549 case Intrinsic::riscv_masked_strided_load: {
17550 MVT VT = N->getSimpleValueType(0);
17551 auto *Load = cast<MemIntrinsicSDNode>(N);
17552 SDValue PassThru = N->getOperand(2);
17553 SDValue Base = N->getOperand(3);
17554 SDValue Stride = N->getOperand(4);
17555 SDValue Mask = N->getOperand(5);
17556
17557 // If the stride is equal to the element size in bytes, we can use
17558 // a masked.load.
17559 const unsigned ElementSize = VT.getScalarStoreSize();
17560 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17561 StrideC && StrideC->getZExtValue() == ElementSize)
17562 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
17563 DAG.getUNDEF(XLenVT), Mask, PassThru,
17564 Load->getMemoryVT(), Load->getMemOperand(),
17566 return SDValue();
17567 }
17568 case Intrinsic::riscv_masked_strided_store: {
17569 auto *Store = cast<MemIntrinsicSDNode>(N);
17570 SDValue Value = N->getOperand(2);
17571 SDValue Base = N->getOperand(3);
17572 SDValue Stride = N->getOperand(4);
17573 SDValue Mask = N->getOperand(5);
17574
17575 // If the stride is equal to the element size in bytes, we can use
17576 // a masked.store.
17577 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
17578 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17579 StrideC && StrideC->getZExtValue() == ElementSize)
17580 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
17581 DAG.getUNDEF(XLenVT), Mask,
17582 Value.getValueType(), Store->getMemOperand(),
17583 ISD::UNINDEXED, false);
17584 return SDValue();
17585 }
17586 case Intrinsic::riscv_vcpop:
17587 case Intrinsic::riscv_vcpop_mask:
17588 case Intrinsic::riscv_vfirst:
17589 case Intrinsic::riscv_vfirst_mask: {
17590 SDValue VL = N->getOperand(2);
17591 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17592 IntNo == Intrinsic::riscv_vfirst_mask)
17593 VL = N->getOperand(3);
17594 if (!isNullConstant(VL))
17595 return SDValue();
17596 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17597 SDLoc DL(N);
17598 EVT VT = N->getValueType(0);
17599 if (IntNo == Intrinsic::riscv_vfirst ||
17600 IntNo == Intrinsic::riscv_vfirst_mask)
17601 return DAG.getConstant(-1, DL, VT);
17602 return DAG.getConstant(0, DL, VT);
17603 }
17604 }
17605 }
17606 case ISD::BITCAST: {
17608 SDValue N0 = N->getOperand(0);
17609 EVT VT = N->getValueType(0);
17610 EVT SrcVT = N0.getValueType();
17611 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17612 // type, widen both sides to avoid a trip through memory.
17613 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17614 VT.isScalarInteger()) {
17615 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17616 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17617 Ops[0] = N0;
17618 SDLoc DL(N);
17619 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17620 N0 = DAG.getBitcast(MVT::i8, N0);
17621 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17622 }
17623
17624 return SDValue();
17625 }
17626 }
17627
17628 return SDValue();
17629}
17630
17632 EVT XVT, unsigned KeptBits) const {
17633 // For vectors, we don't have a preference..
17634 if (XVT.isVector())
17635 return false;
17636
17637 if (XVT != MVT::i32 && XVT != MVT::i64)
17638 return false;
17639
17640 // We can use sext.w for RV64 or an srai 31 on RV32.
17641 if (KeptBits == 32 || KeptBits == 64)
17642 return true;
17643
17644 // With Zbb we can use sext.h/sext.b.
17645 return Subtarget.hasStdExtZbb() &&
17646 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17647 KeptBits == 16);
17648}
17649
17651 const SDNode *N, CombineLevel Level) const {
17652 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17653 N->getOpcode() == ISD::SRL) &&
17654 "Expected shift op");
17655
17656 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17657 // materialised in fewer instructions than `(OP _, c1)`:
17658 //
17659 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17660 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17661 SDValue N0 = N->getOperand(0);
17662 EVT Ty = N0.getValueType();
17663 if (Ty.isScalarInteger() &&
17664 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17665 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17666 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17667 if (C1 && C2) {
17668 const APInt &C1Int = C1->getAPIntValue();
17669 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17670
17671 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17672 // and the combine should happen, to potentially allow further combines
17673 // later.
17674 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17675 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17676 return true;
17677
17678 // We can materialise `c1` in an add immediate, so it's "free", and the
17679 // combine should be prevented.
17680 if (C1Int.getSignificantBits() <= 64 &&
17682 return false;
17683
17684 // Neither constant will fit into an immediate, so find materialisation
17685 // costs.
17686 int C1Cost =
17687 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17688 /*CompressionCost*/ true);
17689 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17690 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17691 /*CompressionCost*/ true);
17692
17693 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17694 // combine should be prevented.
17695 if (C1Cost < ShiftedC1Cost)
17696 return false;
17697 }
17698 }
17699 return true;
17700}
17701
17703 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17704 TargetLoweringOpt &TLO) const {
17705 // Delay this optimization as late as possible.
17706 if (!TLO.LegalOps)
17707 return false;
17708
17709 EVT VT = Op.getValueType();
17710 if (VT.isVector())
17711 return false;
17712
17713 unsigned Opcode = Op.getOpcode();
17714 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17715 return false;
17716
17717 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17718 if (!C)
17719 return false;
17720
17721 const APInt &Mask = C->getAPIntValue();
17722
17723 // Clear all non-demanded bits initially.
17724 APInt ShrunkMask = Mask & DemandedBits;
17725
17726 // Try to make a smaller immediate by setting undemanded bits.
17727
17728 APInt ExpandedMask = Mask | ~DemandedBits;
17729
17730 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17731 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17732 };
17733 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17734 if (NewMask == Mask)
17735 return true;
17736 SDLoc DL(Op);
17737 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17738 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17739 Op.getOperand(0), NewC);
17740 return TLO.CombineTo(Op, NewOp);
17741 };
17742
17743 // If the shrunk mask fits in sign extended 12 bits, let the target
17744 // independent code apply it.
17745 if (ShrunkMask.isSignedIntN(12))
17746 return false;
17747
17748 // And has a few special cases for zext.
17749 if (Opcode == ISD::AND) {
17750 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17751 // otherwise use SLLI + SRLI.
17752 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17753 if (IsLegalMask(NewMask))
17754 return UseMask(NewMask);
17755
17756 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17757 if (VT == MVT::i64) {
17758 APInt NewMask = APInt(64, 0xffffffff);
17759 if (IsLegalMask(NewMask))
17760 return UseMask(NewMask);
17761 }
17762 }
17763
17764 // For the remaining optimizations, we need to be able to make a negative
17765 // number through a combination of mask and undemanded bits.
17766 if (!ExpandedMask.isNegative())
17767 return false;
17768
17769 // What is the fewest number of bits we need to represent the negative number.
17770 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17771
17772 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17773 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17774 // If we can't create a simm12, we shouldn't change opaque constants.
17775 APInt NewMask = ShrunkMask;
17776 if (MinSignedBits <= 12)
17777 NewMask.setBitsFrom(11);
17778 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17779 NewMask.setBitsFrom(31);
17780 else
17781 return false;
17782
17783 // Check that our new mask is a subset of the demanded mask.
17784 assert(IsLegalMask(NewMask));
17785 return UseMask(NewMask);
17786}
17787
17788static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17789 static const uint64_t GREVMasks[] = {
17790 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17791 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17792
17793 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17794 unsigned Shift = 1 << Stage;
17795 if (ShAmt & Shift) {
17796 uint64_t Mask = GREVMasks[Stage];
17797 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17798 if (IsGORC)
17799 Res |= x;
17800 x = Res;
17801 }
17802 }
17803
17804 return x;
17805}
17806
17808 KnownBits &Known,
17809 const APInt &DemandedElts,
17810 const SelectionDAG &DAG,
17811 unsigned Depth) const {
17812 unsigned BitWidth = Known.getBitWidth();
17813 unsigned Opc = Op.getOpcode();
17814 assert((Opc >= ISD::BUILTIN_OP_END ||
17815 Opc == ISD::INTRINSIC_WO_CHAIN ||
17816 Opc == ISD::INTRINSIC_W_CHAIN ||
17817 Opc == ISD::INTRINSIC_VOID) &&
17818 "Should use MaskedValueIsZero if you don't know whether Op"
17819 " is a target node!");
17820
17821 Known.resetAll();
17822 switch (Opc) {
17823 default: break;
17824 case RISCVISD::SELECT_CC: {
17825 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17826 // If we don't know any bits, early out.
17827 if (Known.isUnknown())
17828 break;
17829 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17830
17831 // Only known if known in both the LHS and RHS.
17832 Known = Known.intersectWith(Known2);
17833 break;
17834 }
17837 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17838 // Result is either all zero or operand 0. We can propagate zeros, but not
17839 // ones.
17840 Known.One.clearAllBits();
17841 break;
17842 case RISCVISD::REMUW: {
17843 KnownBits Known2;
17844 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17845 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17846 // We only care about the lower 32 bits.
17847 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17848 // Restore the original width by sign extending.
17849 Known = Known.sext(BitWidth);
17850 break;
17851 }
17852 case RISCVISD::DIVUW: {
17853 KnownBits Known2;
17854 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17855 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17856 // We only care about the lower 32 bits.
17857 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17858 // Restore the original width by sign extending.
17859 Known = Known.sext(BitWidth);
17860 break;
17861 }
17862 case RISCVISD::SLLW: {
17863 KnownBits Known2;
17864 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17865 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17866 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17867 // Restore the original width by sign extending.
17868 Known = Known.sext(BitWidth);
17869 break;
17870 }
17871 case RISCVISD::CTZW: {
17872 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17873 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17874 unsigned LowBits = llvm::bit_width(PossibleTZ);
17875 Known.Zero.setBitsFrom(LowBits);
17876 break;
17877 }
17878 case RISCVISD::CLZW: {
17879 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17880 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17881 unsigned LowBits = llvm::bit_width(PossibleLZ);
17882 Known.Zero.setBitsFrom(LowBits);
17883 break;
17884 }
17885 case RISCVISD::BREV8:
17886 case RISCVISD::ORC_B: {
17887 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17888 // control value of 7 is equivalent to brev8 and orc.b.
17889 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17890 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17891 // To compute zeros, we need to invert the value and invert it back after.
17892 Known.Zero =
17893 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17894 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17895 break;
17896 }
17897 case RISCVISD::READ_VLENB: {
17898 // We can use the minimum and maximum VLEN values to bound VLENB. We
17899 // know VLEN must be a power of two.
17900 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17901 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17902 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17903 Known.Zero.setLowBits(Log2_32(MinVLenB));
17904 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17905 if (MaxVLenB == MinVLenB)
17906 Known.One.setBit(Log2_32(MinVLenB));
17907 break;
17908 }
17909 case RISCVISD::FCLASS: {
17910 // fclass will only set one of the low 10 bits.
17911 Known.Zero.setBitsFrom(10);
17912 break;
17913 }
17916 unsigned IntNo =
17917 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17918 switch (IntNo) {
17919 default:
17920 // We can't do anything for most intrinsics.
17921 break;
17922 case Intrinsic::riscv_vsetvli:
17923 case Intrinsic::riscv_vsetvlimax: {
17924 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17925 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17926 RISCVII::VLMUL VLMUL =
17927 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17928 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17929 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17930 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17931 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17932
17933 // Result of vsetvli must be not larger than AVL.
17934 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17935 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17936
17937 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17938 if (BitWidth > KnownZeroFirstBit)
17939 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17940 break;
17941 }
17942 }
17943 break;
17944 }
17945 }
17946}
17947
17949 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17950 unsigned Depth) const {
17951 switch (Op.getOpcode()) {
17952 default:
17953 break;
17954 case RISCVISD::SELECT_CC: {
17955 unsigned Tmp =
17956 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17957 if (Tmp == 1) return 1; // Early out.
17958 unsigned Tmp2 =
17959 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17960 return std::min(Tmp, Tmp2);
17961 }
17964 // Output is either all zero or operand 0. We can propagate sign bit count
17965 // from operand 0.
17966 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17967 case RISCVISD::ABSW: {
17968 // We expand this at isel to negw+max. The result will have 33 sign bits
17969 // if the input has at least 33 sign bits.
17970 unsigned Tmp =
17971 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17972 if (Tmp < 33) return 1;
17973 return 33;
17974 }
17975 case RISCVISD::SLLW:
17976 case RISCVISD::SRAW:
17977 case RISCVISD::SRLW:
17978 case RISCVISD::DIVW:
17979 case RISCVISD::DIVUW:
17980 case RISCVISD::REMUW:
17981 case RISCVISD::ROLW:
17982 case RISCVISD::RORW:
17987 // TODO: As the result is sign-extended, this is conservatively correct. A
17988 // more precise answer could be calculated for SRAW depending on known
17989 // bits in the shift amount.
17990 return 33;
17991 case RISCVISD::VMV_X_S: {
17992 // The number of sign bits of the scalar result is computed by obtaining the
17993 // element type of the input vector operand, subtracting its width from the
17994 // XLEN, and then adding one (sign bit within the element type). If the
17995 // element type is wider than XLen, the least-significant XLEN bits are
17996 // taken.
17997 unsigned XLen = Subtarget.getXLen();
17998 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17999 if (EltBits <= XLen)
18000 return XLen - EltBits + 1;
18001 break;
18002 }
18004 unsigned IntNo = Op.getConstantOperandVal(1);
18005 switch (IntNo) {
18006 default:
18007 break;
18008 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18009 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18010 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18011 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18012 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18013 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18014 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18015 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18016 case Intrinsic::riscv_masked_cmpxchg_i64:
18017 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18018 // narrow atomic operation. These are implemented using atomic
18019 // operations at the minimum supported atomicrmw/cmpxchg width whose
18020 // result is then sign extended to XLEN. With +A, the minimum width is
18021 // 32 for both 64 and 32.
18022 assert(Subtarget.getXLen() == 64);
18024 assert(Subtarget.hasStdExtA());
18025 return 33;
18026 }
18027 break;
18028 }
18029 }
18030
18031 return 1;
18032}
18033
18035 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18036 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18037
18038 // TODO: Add more target nodes.
18039 switch (Op.getOpcode()) {
18041 // Integer select_cc cannot create poison.
18042 // TODO: What are the FP poison semantics?
18043 // TODO: This instruction blocks poison from the unselected operand, can
18044 // we do anything with that?
18045 return !Op.getValueType().isInteger();
18046 }
18048 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18049}
18050
18051const Constant *
18053 assert(Ld && "Unexpected null LoadSDNode");
18054 if (!ISD::isNormalLoad(Ld))
18055 return nullptr;
18056
18057 SDValue Ptr = Ld->getBasePtr();
18058
18059 // Only constant pools with no offset are supported.
18060 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18061 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18062 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18063 CNode->getOffset() != 0)
18064 return nullptr;
18065
18066 return CNode;
18067 };
18068
18069 // Simple case, LLA.
18070 if (Ptr.getOpcode() == RISCVISD::LLA) {
18071 auto *CNode = GetSupportedConstantPool(Ptr);
18072 if (!CNode || CNode->getTargetFlags() != 0)
18073 return nullptr;
18074
18075 return CNode->getConstVal();
18076 }
18077
18078 // Look for a HI and ADD_LO pair.
18079 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18080 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18081 return nullptr;
18082
18083 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18084 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18085
18086 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18087 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18088 return nullptr;
18089
18090 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18091 return nullptr;
18092
18093 return CNodeLo->getConstVal();
18094}
18095
18097 MachineBasicBlock *BB) {
18098 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18099
18100 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18101 // Should the count have wrapped while it was being read, we need to try
18102 // again.
18103 // For example:
18104 // ```
18105 // read:
18106 // csrrs x3, counterh # load high word of counter
18107 // csrrs x2, counter # load low word of counter
18108 // csrrs x4, counterh # load high word of counter
18109 // bne x3, x4, read # check if high word reads match, otherwise try again
18110 // ```
18111
18112 MachineFunction &MF = *BB->getParent();
18113 const BasicBlock *LLVMBB = BB->getBasicBlock();
18115
18116 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
18117 MF.insert(It, LoopMBB);
18118
18119 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
18120 MF.insert(It, DoneMBB);
18121
18122 // Transfer the remainder of BB and its successor edges to DoneMBB.
18123 DoneMBB->splice(DoneMBB->begin(), BB,
18124 std::next(MachineBasicBlock::iterator(MI)), BB->end());
18126
18127 BB->addSuccessor(LoopMBB);
18128
18130 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18131 Register LoReg = MI.getOperand(0).getReg();
18132 Register HiReg = MI.getOperand(1).getReg();
18133 int64_t LoCounter = MI.getOperand(2).getImm();
18134 int64_t HiCounter = MI.getOperand(3).getImm();
18135 DebugLoc DL = MI.getDebugLoc();
18136
18138 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
18139 .addImm(HiCounter)
18140 .addReg(RISCV::X0);
18141 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
18142 .addImm(LoCounter)
18143 .addReg(RISCV::X0);
18144 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
18145 .addImm(HiCounter)
18146 .addReg(RISCV::X0);
18147
18148 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
18149 .addReg(HiReg)
18150 .addReg(ReadAgainReg)
18151 .addMBB(LoopMBB);
18152
18153 LoopMBB->addSuccessor(LoopMBB);
18154 LoopMBB->addSuccessor(DoneMBB);
18155
18156 MI.eraseFromParent();
18157
18158 return DoneMBB;
18159}
18160
18163 const RISCVSubtarget &Subtarget) {
18164 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18165
18166 MachineFunction &MF = *BB->getParent();
18167 DebugLoc DL = MI.getDebugLoc();
18170 Register LoReg = MI.getOperand(0).getReg();
18171 Register HiReg = MI.getOperand(1).getReg();
18172 Register SrcReg = MI.getOperand(2).getReg();
18173
18174 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18175 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18176
18177 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18178 RI, Register());
18180 MachineMemOperand *MMOLo =
18184 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18185 .addFrameIndex(FI)
18186 .addImm(0)
18187 .addMemOperand(MMOLo);
18188 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18189 .addFrameIndex(FI)
18190 .addImm(4)
18191 .addMemOperand(MMOHi);
18192 MI.eraseFromParent(); // The pseudo instruction is gone now.
18193 return BB;
18194}
18195
18198 const RISCVSubtarget &Subtarget) {
18199 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18200 "Unexpected instruction");
18201
18202 MachineFunction &MF = *BB->getParent();
18203 DebugLoc DL = MI.getDebugLoc();
18206 Register DstReg = MI.getOperand(0).getReg();
18207 Register LoReg = MI.getOperand(1).getReg();
18208 Register HiReg = MI.getOperand(2).getReg();
18209
18210 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18211 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18212
18214 MachineMemOperand *MMOLo =
18218 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18219 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
18220 .addFrameIndex(FI)
18221 .addImm(0)
18222 .addMemOperand(MMOLo);
18223 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18224 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
18225 .addFrameIndex(FI)
18226 .addImm(4)
18227 .addMemOperand(MMOHi);
18228 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
18229 MI.eraseFromParent(); // The pseudo instruction is gone now.
18230 return BB;
18231}
18232
18234 switch (MI.getOpcode()) {
18235 default:
18236 return false;
18237 case RISCV::Select_GPR_Using_CC_GPR:
18238 case RISCV::Select_GPR_Using_CC_Imm:
18239 case RISCV::Select_FPR16_Using_CC_GPR:
18240 case RISCV::Select_FPR16INX_Using_CC_GPR:
18241 case RISCV::Select_FPR32_Using_CC_GPR:
18242 case RISCV::Select_FPR32INX_Using_CC_GPR:
18243 case RISCV::Select_FPR64_Using_CC_GPR:
18244 case RISCV::Select_FPR64INX_Using_CC_GPR:
18245 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18246 return true;
18247 }
18248}
18249
18251 unsigned RelOpcode, unsigned EqOpcode,
18252 const RISCVSubtarget &Subtarget) {
18253 DebugLoc DL = MI.getDebugLoc();
18254 Register DstReg = MI.getOperand(0).getReg();
18255 Register Src1Reg = MI.getOperand(1).getReg();
18256 Register Src2Reg = MI.getOperand(2).getReg();
18258 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18260
18261 // Save the current FFLAGS.
18262 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
18263
18264 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
18265 .addReg(Src1Reg)
18266 .addReg(Src2Reg);
18269
18270 // Restore the FFLAGS.
18271 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18272 .addReg(SavedFFlags, RegState::Kill);
18273
18274 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
18275 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
18276 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
18277 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
18280
18281 // Erase the pseudoinstruction.
18282 MI.eraseFromParent();
18283 return BB;
18284}
18285
18286static MachineBasicBlock *
18288 MachineBasicBlock *ThisMBB,
18289 const RISCVSubtarget &Subtarget) {
18290 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
18291 // Without this, custom-inserter would have generated:
18292 //
18293 // A
18294 // | \
18295 // | B
18296 // | /
18297 // C
18298 // | \
18299 // | D
18300 // | /
18301 // E
18302 //
18303 // A: X = ...; Y = ...
18304 // B: empty
18305 // C: Z = PHI [X, A], [Y, B]
18306 // D: empty
18307 // E: PHI [X, C], [Z, D]
18308 //
18309 // If we lower both Select_FPRX_ in a single step, we can instead generate:
18310 //
18311 // A
18312 // | \
18313 // | C
18314 // | /|
18315 // |/ |
18316 // | |
18317 // | D
18318 // | /
18319 // E
18320 //
18321 // A: X = ...; Y = ...
18322 // D: empty
18323 // E: PHI [X, A], [X, C], [Y, D]
18324
18325 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18326 const DebugLoc &DL = First.getDebugLoc();
18327 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
18328 MachineFunction *F = ThisMBB->getParent();
18329 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
18330 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
18331 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
18332 MachineFunction::iterator It = ++ThisMBB->getIterator();
18333 F->insert(It, FirstMBB);
18334 F->insert(It, SecondMBB);
18335 F->insert(It, SinkMBB);
18336
18337 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
18338 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
18340 ThisMBB->end());
18341 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
18342
18343 // Fallthrough block for ThisMBB.
18344 ThisMBB->addSuccessor(FirstMBB);
18345 // Fallthrough block for FirstMBB.
18346 FirstMBB->addSuccessor(SecondMBB);
18347 ThisMBB->addSuccessor(SinkMBB);
18348 FirstMBB->addSuccessor(SinkMBB);
18349 // This is fallthrough.
18350 SecondMBB->addSuccessor(SinkMBB);
18351
18352 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
18353 Register FLHS = First.getOperand(1).getReg();
18354 Register FRHS = First.getOperand(2).getReg();
18355 // Insert appropriate branch.
18356 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
18357 .addReg(FLHS)
18358 .addReg(FRHS)
18359 .addMBB(SinkMBB);
18360
18361 Register SLHS = Second.getOperand(1).getReg();
18362 Register SRHS = Second.getOperand(2).getReg();
18363 Register Op1Reg4 = First.getOperand(4).getReg();
18364 Register Op1Reg5 = First.getOperand(5).getReg();
18365
18366 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
18367 // Insert appropriate branch.
18368 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
18369 .addReg(SLHS)
18370 .addReg(SRHS)
18371 .addMBB(SinkMBB);
18372
18373 Register DestReg = Second.getOperand(0).getReg();
18374 Register Op2Reg4 = Second.getOperand(4).getReg();
18375 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
18376 .addReg(Op2Reg4)
18377 .addMBB(ThisMBB)
18378 .addReg(Op1Reg4)
18379 .addMBB(FirstMBB)
18380 .addReg(Op1Reg5)
18381 .addMBB(SecondMBB);
18382
18383 // Now remove the Select_FPRX_s.
18384 First.eraseFromParent();
18385 Second.eraseFromParent();
18386 return SinkMBB;
18387}
18388
18391 const RISCVSubtarget &Subtarget) {
18392 // To "insert" Select_* instructions, we actually have to insert the triangle
18393 // control-flow pattern. The incoming instructions know the destination vreg
18394 // to set, the condition code register to branch on, the true/false values to
18395 // select between, and the condcode to use to select the appropriate branch.
18396 //
18397 // We produce the following control flow:
18398 // HeadMBB
18399 // | \
18400 // | IfFalseMBB
18401 // | /
18402 // TailMBB
18403 //
18404 // When we find a sequence of selects we attempt to optimize their emission
18405 // by sharing the control flow. Currently we only handle cases where we have
18406 // multiple selects with the exact same condition (same LHS, RHS and CC).
18407 // The selects may be interleaved with other instructions if the other
18408 // instructions meet some requirements we deem safe:
18409 // - They are not pseudo instructions.
18410 // - They are debug instructions. Otherwise,
18411 // - They do not have side-effects, do not access memory and their inputs do
18412 // not depend on the results of the select pseudo-instructions.
18413 // The TrueV/FalseV operands of the selects cannot depend on the result of
18414 // previous selects in the sequence.
18415 // These conditions could be further relaxed. See the X86 target for a
18416 // related approach and more information.
18417 //
18418 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18419 // is checked here and handled by a separate function -
18420 // EmitLoweredCascadedSelect.
18421 Register LHS = MI.getOperand(1).getReg();
18422 Register RHS;
18423 if (MI.getOperand(2).isReg())
18424 RHS = MI.getOperand(2).getReg();
18425 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18426
18427 SmallVector<MachineInstr *, 4> SelectDebugValues;
18428 SmallSet<Register, 4> SelectDests;
18429 SelectDests.insert(MI.getOperand(0).getReg());
18430
18431 MachineInstr *LastSelectPseudo = &MI;
18432 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18433 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18434 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18435 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18436 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18437 Next->getOperand(5).isKill()) {
18438 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18439 }
18440
18441 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18442 SequenceMBBI != E; ++SequenceMBBI) {
18443 if (SequenceMBBI->isDebugInstr())
18444 continue;
18445 if (isSelectPseudo(*SequenceMBBI)) {
18446 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18447 !SequenceMBBI->getOperand(2).isReg() ||
18448 SequenceMBBI->getOperand(2).getReg() != RHS ||
18449 SequenceMBBI->getOperand(3).getImm() != CC ||
18450 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18451 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18452 break;
18453 LastSelectPseudo = &*SequenceMBBI;
18454 SequenceMBBI->collectDebugValues(SelectDebugValues);
18455 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18456 continue;
18457 }
18458 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18459 SequenceMBBI->mayLoadOrStore() ||
18460 SequenceMBBI->usesCustomInsertionHook())
18461 break;
18462 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18463 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18464 }))
18465 break;
18466 }
18467
18468 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18469 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18470 DebugLoc DL = MI.getDebugLoc();
18472
18473 MachineBasicBlock *HeadMBB = BB;
18474 MachineFunction *F = BB->getParent();
18475 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18476 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18477
18478 F->insert(I, IfFalseMBB);
18479 F->insert(I, TailMBB);
18480
18481 // Transfer debug instructions associated with the selects to TailMBB.
18482 for (MachineInstr *DebugInstr : SelectDebugValues) {
18483 TailMBB->push_back(DebugInstr->removeFromParent());
18484 }
18485
18486 // Move all instructions after the sequence to TailMBB.
18487 TailMBB->splice(TailMBB->end(), HeadMBB,
18488 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18489 // Update machine-CFG edges by transferring all successors of the current
18490 // block to the new block which will contain the Phi nodes for the selects.
18491 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18492 // Set the successors for HeadMBB.
18493 HeadMBB->addSuccessor(IfFalseMBB);
18494 HeadMBB->addSuccessor(TailMBB);
18495
18496 // Insert appropriate branch.
18497 if (MI.getOperand(2).isImm())
18498 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18499 .addReg(LHS)
18500 .addImm(MI.getOperand(2).getImm())
18501 .addMBB(TailMBB);
18502 else
18503 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18504 .addReg(LHS)
18505 .addReg(RHS)
18506 .addMBB(TailMBB);
18507
18508 // IfFalseMBB just falls through to TailMBB.
18509 IfFalseMBB->addSuccessor(TailMBB);
18510
18511 // Create PHIs for all of the select pseudo-instructions.
18512 auto SelectMBBI = MI.getIterator();
18513 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18514 auto InsertionPoint = TailMBB->begin();
18515 while (SelectMBBI != SelectEnd) {
18516 auto Next = std::next(SelectMBBI);
18517 if (isSelectPseudo(*SelectMBBI)) {
18518 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18519 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18520 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18521 .addReg(SelectMBBI->getOperand(4).getReg())
18522 .addMBB(HeadMBB)
18523 .addReg(SelectMBBI->getOperand(5).getReg())
18524 .addMBB(IfFalseMBB);
18525 SelectMBBI->eraseFromParent();
18526 }
18527 SelectMBBI = Next;
18528 }
18529
18530 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18531 return TailMBB;
18532}
18533
18534// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18535static const RISCV::RISCVMaskedPseudoInfo *
18536lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18538 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18539 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18541 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18542 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18543 return Masked;
18544}
18545
18548 unsigned CVTXOpc) {
18549 DebugLoc DL = MI.getDebugLoc();
18550
18552
18554 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18555
18556 // Save the old value of FFLAGS.
18557 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18558
18559 assert(MI.getNumOperands() == 7);
18560
18561 // Emit a VFCVT_X_F
18562 const TargetRegisterInfo *TRI =
18564 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18565 Register Tmp = MRI.createVirtualRegister(RC);
18566 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18567 .add(MI.getOperand(1))
18568 .add(MI.getOperand(2))
18569 .add(MI.getOperand(3))
18570 .add(MachineOperand::CreateImm(7)) // frm = DYN
18571 .add(MI.getOperand(4))
18572 .add(MI.getOperand(5))
18573 .add(MI.getOperand(6))
18574 .add(MachineOperand::CreateReg(RISCV::FRM,
18575 /*IsDef*/ false,
18576 /*IsImp*/ true));
18577
18578 // Emit a VFCVT_F_X
18579 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18580 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18581 // There is no E8 variant for VFCVT_F_X.
18582 assert(Log2SEW >= 4);
18583 unsigned CVTFOpc =
18584 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18585 ->MaskedPseudo;
18586
18587 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18588 .add(MI.getOperand(0))
18589 .add(MI.getOperand(1))
18590 .addReg(Tmp)
18591 .add(MI.getOperand(3))
18592 .add(MachineOperand::CreateImm(7)) // frm = DYN
18593 .add(MI.getOperand(4))
18594 .add(MI.getOperand(5))
18595 .add(MI.getOperand(6))
18596 .add(MachineOperand::CreateReg(RISCV::FRM,
18597 /*IsDef*/ false,
18598 /*IsImp*/ true));
18599
18600 // Restore FFLAGS.
18601 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18602 .addReg(SavedFFLAGS, RegState::Kill);
18603
18604 // Erase the pseudoinstruction.
18605 MI.eraseFromParent();
18606 return BB;
18607}
18608
18610 const RISCVSubtarget &Subtarget) {
18611 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18612 const TargetRegisterClass *RC;
18613 switch (MI.getOpcode()) {
18614 default:
18615 llvm_unreachable("Unexpected opcode");
18616 case RISCV::PseudoFROUND_H:
18617 CmpOpc = RISCV::FLT_H;
18618 F2IOpc = RISCV::FCVT_W_H;
18619 I2FOpc = RISCV::FCVT_H_W;
18620 FSGNJOpc = RISCV::FSGNJ_H;
18621 FSGNJXOpc = RISCV::FSGNJX_H;
18622 RC = &RISCV::FPR16RegClass;
18623 break;
18624 case RISCV::PseudoFROUND_H_INX:
18625 CmpOpc = RISCV::FLT_H_INX;
18626 F2IOpc = RISCV::FCVT_W_H_INX;
18627 I2FOpc = RISCV::FCVT_H_W_INX;
18628 FSGNJOpc = RISCV::FSGNJ_H_INX;
18629 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18630 RC = &RISCV::GPRF16RegClass;
18631 break;
18632 case RISCV::PseudoFROUND_S:
18633 CmpOpc = RISCV::FLT_S;
18634 F2IOpc = RISCV::FCVT_W_S;
18635 I2FOpc = RISCV::FCVT_S_W;
18636 FSGNJOpc = RISCV::FSGNJ_S;
18637 FSGNJXOpc = RISCV::FSGNJX_S;
18638 RC = &RISCV::FPR32RegClass;
18639 break;
18640 case RISCV::PseudoFROUND_S_INX:
18641 CmpOpc = RISCV::FLT_S_INX;
18642 F2IOpc = RISCV::FCVT_W_S_INX;
18643 I2FOpc = RISCV::FCVT_S_W_INX;
18644 FSGNJOpc = RISCV::FSGNJ_S_INX;
18645 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18646 RC = &RISCV::GPRF32RegClass;
18647 break;
18648 case RISCV::PseudoFROUND_D:
18649 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18650 CmpOpc = RISCV::FLT_D;
18651 F2IOpc = RISCV::FCVT_L_D;
18652 I2FOpc = RISCV::FCVT_D_L;
18653 FSGNJOpc = RISCV::FSGNJ_D;
18654 FSGNJXOpc = RISCV::FSGNJX_D;
18655 RC = &RISCV::FPR64RegClass;
18656 break;
18657 case RISCV::PseudoFROUND_D_INX:
18658 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18659 CmpOpc = RISCV::FLT_D_INX;
18660 F2IOpc = RISCV::FCVT_L_D_INX;
18661 I2FOpc = RISCV::FCVT_D_L_INX;
18662 FSGNJOpc = RISCV::FSGNJ_D_INX;
18663 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18664 RC = &RISCV::GPRRegClass;
18665 break;
18666 }
18667
18668 const BasicBlock *BB = MBB->getBasicBlock();
18669 DebugLoc DL = MI.getDebugLoc();
18671
18673 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18674 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18675
18676 F->insert(I, CvtMBB);
18677 F->insert(I, DoneMBB);
18678 // Move all instructions after the sequence to DoneMBB.
18679 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18680 MBB->end());
18681 // Update machine-CFG edges by transferring all successors of the current
18682 // block to the new block which will contain the Phi nodes for the selects.
18684 // Set the successors for MBB.
18685 MBB->addSuccessor(CvtMBB);
18686 MBB->addSuccessor(DoneMBB);
18687
18688 Register DstReg = MI.getOperand(0).getReg();
18689 Register SrcReg = MI.getOperand(1).getReg();
18690 Register MaxReg = MI.getOperand(2).getReg();
18691 int64_t FRM = MI.getOperand(3).getImm();
18692
18693 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18695
18696 Register FabsReg = MRI.createVirtualRegister(RC);
18697 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18698
18699 // Compare the FP value to the max value.
18700 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18701 auto MIB =
18702 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18705
18706 // Insert branch.
18707 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18708 .addReg(CmpReg)
18709 .addReg(RISCV::X0)
18710 .addMBB(DoneMBB);
18711
18712 CvtMBB->addSuccessor(DoneMBB);
18713
18714 // Convert to integer.
18715 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18716 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18719
18720 // Convert back to FP.
18721 Register I2FReg = MRI.createVirtualRegister(RC);
18722 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18725
18726 // Restore the sign bit.
18727 Register CvtReg = MRI.createVirtualRegister(RC);
18728 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18729
18730 // Merge the results.
18731 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18732 .addReg(SrcReg)
18733 .addMBB(MBB)
18734 .addReg(CvtReg)
18735 .addMBB(CvtMBB);
18736
18737 MI.eraseFromParent();
18738 return DoneMBB;
18739}
18740
18743 MachineBasicBlock *BB) const {
18744 switch (MI.getOpcode()) {
18745 default:
18746 llvm_unreachable("Unexpected instr type to insert");
18747 case RISCV::ReadCounterWide:
18748 assert(!Subtarget.is64Bit() &&
18749 "ReadCounterWide is only to be used on riscv32");
18750 return emitReadCounterWidePseudo(MI, BB);
18751 case RISCV::Select_GPR_Using_CC_GPR:
18752 case RISCV::Select_GPR_Using_CC_Imm:
18753 case RISCV::Select_FPR16_Using_CC_GPR:
18754 case RISCV::Select_FPR16INX_Using_CC_GPR:
18755 case RISCV::Select_FPR32_Using_CC_GPR:
18756 case RISCV::Select_FPR32INX_Using_CC_GPR:
18757 case RISCV::Select_FPR64_Using_CC_GPR:
18758 case RISCV::Select_FPR64INX_Using_CC_GPR:
18759 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18760 return emitSelectPseudo(MI, BB, Subtarget);
18761 case RISCV::BuildPairF64Pseudo:
18762 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18763 case RISCV::SplitF64Pseudo:
18764 return emitSplitF64Pseudo(MI, BB, Subtarget);
18765 case RISCV::PseudoQuietFLE_H:
18766 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18767 case RISCV::PseudoQuietFLE_H_INX:
18768 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18769 case RISCV::PseudoQuietFLT_H:
18770 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18771 case RISCV::PseudoQuietFLT_H_INX:
18772 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18773 case RISCV::PseudoQuietFLE_S:
18774 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18775 case RISCV::PseudoQuietFLE_S_INX:
18776 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18777 case RISCV::PseudoQuietFLT_S:
18778 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18779 case RISCV::PseudoQuietFLT_S_INX:
18780 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18781 case RISCV::PseudoQuietFLE_D:
18782 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18783 case RISCV::PseudoQuietFLE_D_INX:
18784 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18785 case RISCV::PseudoQuietFLE_D_IN32X:
18786 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18787 Subtarget);
18788 case RISCV::PseudoQuietFLT_D:
18789 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18790 case RISCV::PseudoQuietFLT_D_INX:
18791 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18792 case RISCV::PseudoQuietFLT_D_IN32X:
18793 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18794 Subtarget);
18795
18796 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18797 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18798 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18799 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18800 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18801 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18802 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18803 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18804 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18805 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18806 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18807 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18808 case RISCV::PseudoFROUND_H:
18809 case RISCV::PseudoFROUND_H_INX:
18810 case RISCV::PseudoFROUND_S:
18811 case RISCV::PseudoFROUND_S_INX:
18812 case RISCV::PseudoFROUND_D:
18813 case RISCV::PseudoFROUND_D_INX:
18814 case RISCV::PseudoFROUND_D_IN32X:
18815 return emitFROUND(MI, BB, Subtarget);
18816 case TargetOpcode::STATEPOINT:
18817 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18818 // while jal call instruction (where statepoint will be lowered at the end)
18819 // has implicit def. This def is early-clobber as it will be set at
18820 // the moment of the call and earlier than any use is read.
18821 // Add this implicit dead def here as a workaround.
18822 MI.addOperand(*MI.getMF(),
18824 RISCV::X1, /*isDef*/ true,
18825 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18826 /*isUndef*/ false, /*isEarlyClobber*/ true));
18827 [[fallthrough]];
18828 case TargetOpcode::STACKMAP:
18829 case TargetOpcode::PATCHPOINT:
18830 if (!Subtarget.is64Bit())
18831 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18832 "supported on 64-bit targets");
18833 return emitPatchPoint(MI, BB);
18834 }
18835}
18836
18838 SDNode *Node) const {
18839 // Add FRM dependency to any instructions with dynamic rounding mode.
18840 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18841 if (Idx < 0) {
18842 // Vector pseudos have FRM index indicated by TSFlags.
18843 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18844 if (Idx < 0)
18845 return;
18846 }
18847 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18848 return;
18849 // If the instruction already reads FRM, don't add another read.
18850 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18851 return;
18852 MI.addOperand(
18853 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18854}
18855
18856// Calling Convention Implementation.
18857// The expectations for frontend ABI lowering vary from target to target.
18858// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18859// details, but this is a longer term goal. For now, we simply try to keep the
18860// role of the frontend as simple and well-defined as possible. The rules can
18861// be summarised as:
18862// * Never split up large scalar arguments. We handle them here.
18863// * If a hardfloat calling convention is being used, and the struct may be
18864// passed in a pair of registers (fp+fp, int+fp), and both registers are
18865// available, then pass as two separate arguments. If either the GPRs or FPRs
18866// are exhausted, then pass according to the rule below.
18867// * If a struct could never be passed in registers or directly in a stack
18868// slot (as it is larger than 2*XLEN and the floating point rules don't
18869// apply), then pass it using a pointer with the byval attribute.
18870// * If a struct is less than 2*XLEN, then coerce to either a two-element
18871// word-sized array or a 2*XLEN scalar (depending on alignment).
18872// * The frontend can determine whether a struct is returned by reference or
18873// not based on its size and fields. If it will be returned by reference, the
18874// frontend must modify the prototype so a pointer with the sret annotation is
18875// passed as the first argument. This is not necessary for large scalar
18876// returns.
18877// * Struct return values and varargs should be coerced to structs containing
18878// register-size fields in the same situations they would be for fixed
18879// arguments.
18880
18881static const MCPhysReg ArgFPR16s[] = {
18882 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18883 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18884};
18885static const MCPhysReg ArgFPR32s[] = {
18886 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18887 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18888};
18889static const MCPhysReg ArgFPR64s[] = {
18890 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18891 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18892};
18893// This is an interim calling convention and it may be changed in the future.
18894static const MCPhysReg ArgVRs[] = {
18895 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18896 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18897 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18898static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18899 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18900 RISCV::V20M2, RISCV::V22M2};
18901static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18902 RISCV::V20M4};
18903static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18904
18906 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18907 // the ILP32E ABI.
18908 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18909 RISCV::X13, RISCV::X14, RISCV::X15,
18910 RISCV::X16, RISCV::X17};
18911 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18912 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18913 RISCV::X13, RISCV::X14, RISCV::X15};
18914
18915 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18916 return ArrayRef(ArgEGPRs);
18917
18918 return ArrayRef(ArgIGPRs);
18919}
18920
18922 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18923 // for save-restore libcall, so we don't use them.
18924 // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
18925 static const MCPhysReg FastCCIGPRs[] = {
18926 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
18927 RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
18928
18929 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18930 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18931 RISCV::X13, RISCV::X14, RISCV::X15};
18932
18933 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18934 return ArrayRef(FastCCEGPRs);
18935
18936 return ArrayRef(FastCCIGPRs);
18937}
18938
18939// Pass a 2*XLEN argument that has been split into two XLEN values through
18940// registers or the stack as necessary.
18941static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18942 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18943 MVT ValVT2, MVT LocVT2,
18944 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18945 unsigned XLenInBytes = XLen / 8;
18946 const RISCVSubtarget &STI =
18949
18950 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18951 // At least one half can be passed via register.
18952 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18953 VA1.getLocVT(), CCValAssign::Full));
18954 } else {
18955 // Both halves must be passed on the stack, with proper alignment.
18956 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18957 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18958 Align StackAlign(XLenInBytes);
18959 if (!EABI || XLen != 32)
18960 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18961 State.addLoc(
18963 State.AllocateStack(XLenInBytes, StackAlign),
18964 VA1.getLocVT(), CCValAssign::Full));
18966 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18967 LocVT2, CCValAssign::Full));
18968 return false;
18969 }
18970
18971 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18972 // The second half can also be passed via register.
18973 State.addLoc(
18974 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18975 } else {
18976 // The second half is passed via the stack, without additional alignment.
18978 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18979 LocVT2, CCValAssign::Full));
18980 }
18981
18982 return false;
18983}
18984
18985// Implements the RISC-V calling convention. Returns true upon failure.
18986bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18987 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18988 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18989 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18990 RVVArgDispatcher &RVVDispatcher) {
18991 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18992 assert(XLen == 32 || XLen == 64);
18993 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18994
18995 // Static chain parameter must not be passed in normal argument registers,
18996 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18997 if (ArgFlags.isNest()) {
18998 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18999 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19000 return false;
19001 }
19002 }
19003
19004 // Any return value split in to more than two values can't be returned
19005 // directly. Vectors are returned via the available vector registers.
19006 if (!LocVT.isVector() && IsRet && ValNo > 1)
19007 return true;
19008
19009 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
19010 // variadic argument, or if no F16/F32 argument registers are available.
19011 bool UseGPRForF16_F32 = true;
19012 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
19013 // variadic argument, or if no F64 argument registers are available.
19014 bool UseGPRForF64 = true;
19015
19016 switch (ABI) {
19017 default:
19018 llvm_unreachable("Unexpected ABI");
19021 case RISCVABI::ABI_LP64:
19023 break;
19026 UseGPRForF16_F32 = !IsFixed;
19027 break;
19030 UseGPRForF16_F32 = !IsFixed;
19031 UseGPRForF64 = !IsFixed;
19032 break;
19033 }
19034
19035 // FPR16, FPR32, and FPR64 alias each other.
19036 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
19037 UseGPRForF16_F32 = true;
19038 UseGPRForF64 = true;
19039 }
19040
19041 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
19042 // similar local variables rather than directly checking against the target
19043 // ABI.
19044
19045 if (UseGPRForF16_F32 &&
19046 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
19047 LocVT = XLenVT;
19048 LocInfo = CCValAssign::BCvt;
19049 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
19050 LocVT = MVT::i64;
19051 LocInfo = CCValAssign::BCvt;
19052 }
19053
19055
19056 // If this is a variadic argument, the RISC-V calling convention requires
19057 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
19058 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
19059 // be used regardless of whether the original argument was split during
19060 // legalisation or not. The argument will not be passed by registers if the
19061 // original type is larger than 2*XLEN, so the register alignment rule does
19062 // not apply.
19063 // TODO: To be compatible with GCC's behaviors, we don't align registers
19064 // currently if we are using ILP32E calling convention. This behavior may be
19065 // changed when RV32E/ILP32E is ratified.
19066 unsigned TwoXLenInBytes = (2 * XLen) / 8;
19067 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
19068 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
19069 ABI != RISCVABI::ABI_ILP32E) {
19070 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
19071 // Skip 'odd' register if necessary.
19072 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
19073 State.AllocateReg(ArgGPRs);
19074 }
19075
19076 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
19077 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
19078 State.getPendingArgFlags();
19079
19080 assert(PendingLocs.size() == PendingArgFlags.size() &&
19081 "PendingLocs and PendingArgFlags out of sync");
19082
19083 // Handle passing f64 on RV32D with a soft float ABI or when floating point
19084 // registers are exhausted.
19085 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
19086 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
19087 // Depending on available argument GPRS, f64 may be passed in a pair of
19088 // GPRs, split between a GPR and the stack, or passed completely on the
19089 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
19090 // cases.
19091 Register Reg = State.AllocateReg(ArgGPRs);
19092 if (!Reg) {
19093 unsigned StackOffset = State.AllocateStack(8, Align(8));
19094 State.addLoc(
19095 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19096 return false;
19097 }
19098 LocVT = MVT::i32;
19099 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19100 Register HiReg = State.AllocateReg(ArgGPRs);
19101 if (HiReg) {
19102 State.addLoc(
19103 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
19104 } else {
19105 unsigned StackOffset = State.AllocateStack(4, Align(4));
19106 State.addLoc(
19107 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19108 }
19109 return false;
19110 }
19111
19112 // Fixed-length vectors are located in the corresponding scalable-vector
19113 // container types.
19114 if (ValVT.isFixedLengthVector())
19115 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19116
19117 // Split arguments might be passed indirectly, so keep track of the pending
19118 // values. Split vectors are passed via a mix of registers and indirectly, so
19119 // treat them as we would any other argument.
19120 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
19121 LocVT = XLenVT;
19122 LocInfo = CCValAssign::Indirect;
19123 PendingLocs.push_back(
19124 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
19125 PendingArgFlags.push_back(ArgFlags);
19126 if (!ArgFlags.isSplitEnd()) {
19127 return false;
19128 }
19129 }
19130
19131 // If the split argument only had two elements, it should be passed directly
19132 // in registers or on the stack.
19133 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
19134 PendingLocs.size() <= 2) {
19135 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
19136 // Apply the normal calling convention rules to the first half of the
19137 // split argument.
19138 CCValAssign VA = PendingLocs[0];
19139 ISD::ArgFlagsTy AF = PendingArgFlags[0];
19140 PendingLocs.clear();
19141 PendingArgFlags.clear();
19142 return CC_RISCVAssign2XLen(
19143 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
19144 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
19145 }
19146
19147 // Allocate to a register if possible, or else a stack slot.
19148 Register Reg;
19149 unsigned StoreSizeBytes = XLen / 8;
19150 Align StackAlign = Align(XLen / 8);
19151
19152 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
19153 Reg = State.AllocateReg(ArgFPR16s);
19154 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
19155 Reg = State.AllocateReg(ArgFPR32s);
19156 else if (ValVT == MVT::f64 && !UseGPRForF64)
19157 Reg = State.AllocateReg(ArgFPR64s);
19158 else if (ValVT.isVector()) {
19159 Reg = RVVDispatcher.getNextPhysReg();
19160 if (!Reg) {
19161 // For return values, the vector must be passed fully via registers or
19162 // via the stack.
19163 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
19164 // but we're using all of them.
19165 if (IsRet)
19166 return true;
19167 // Try using a GPR to pass the address
19168 if ((Reg = State.AllocateReg(ArgGPRs))) {
19169 LocVT = XLenVT;
19170 LocInfo = CCValAssign::Indirect;
19171 } else if (ValVT.isScalableVector()) {
19172 LocVT = XLenVT;
19173 LocInfo = CCValAssign::Indirect;
19174 } else {
19175 // Pass fixed-length vectors on the stack.
19176 LocVT = ValVT;
19177 StoreSizeBytes = ValVT.getStoreSize();
19178 // Align vectors to their element sizes, being careful for vXi1
19179 // vectors.
19180 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
19181 }
19182 }
19183 } else {
19184 Reg = State.AllocateReg(ArgGPRs);
19185 }
19186
19187 unsigned StackOffset =
19188 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
19189
19190 // If we reach this point and PendingLocs is non-empty, we must be at the
19191 // end of a split argument that must be passed indirectly.
19192 if (!PendingLocs.empty()) {
19193 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
19194 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
19195
19196 for (auto &It : PendingLocs) {
19197 if (Reg)
19198 It.convertToReg(Reg);
19199 else
19200 It.convertToMem(StackOffset);
19201 State.addLoc(It);
19202 }
19203 PendingLocs.clear();
19204 PendingArgFlags.clear();
19205 return false;
19206 }
19207
19208 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
19209 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
19210 "Expected an XLenVT or vector types at this stage");
19211
19212 if (Reg) {
19213 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19214 return false;
19215 }
19216
19217 // When a scalar floating-point value is passed on the stack, no
19218 // bit-conversion is needed.
19219 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
19220 assert(!ValVT.isVector());
19221 LocVT = ValVT;
19222 LocInfo = CCValAssign::Full;
19223 }
19224 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19225 return false;
19226}
19227
19228template <typename ArgTy>
19229static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
19230 for (const auto &ArgIdx : enumerate(Args)) {
19231 MVT ArgVT = ArgIdx.value().VT;
19232 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
19233 return ArgIdx.index();
19234 }
19235 return std::nullopt;
19236}
19237
19238void RISCVTargetLowering::analyzeInputArgs(
19239 MachineFunction &MF, CCState &CCInfo,
19240 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19241 RISCVCCAssignFn Fn) const {
19242 unsigned NumArgs = Ins.size();
19244
19245 RVVArgDispatcher Dispatcher;
19246 if (IsRet) {
19247 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
19248 } else {
19249 SmallVector<Type *, 4> TypeList;
19250 for (const Argument &Arg : MF.getFunction().args())
19251 TypeList.push_back(Arg.getType());
19252 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
19253 }
19254
19255 for (unsigned i = 0; i != NumArgs; ++i) {
19256 MVT ArgVT = Ins[i].VT;
19257 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19258
19259 Type *ArgTy = nullptr;
19260 if (IsRet)
19261 ArgTy = FType->getReturnType();
19262 else if (Ins[i].isOrigArg())
19263 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19264
19266 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
19267 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
19268 Dispatcher)) {
19269 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19270 << ArgVT << '\n');
19271 llvm_unreachable(nullptr);
19272 }
19273 }
19274}
19275
19276void RISCVTargetLowering::analyzeOutputArgs(
19277 MachineFunction &MF, CCState &CCInfo,
19278 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19279 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19280 unsigned NumArgs = Outs.size();
19281
19282 SmallVector<Type *, 4> TypeList;
19283 if (IsRet)
19284 TypeList.push_back(MF.getFunction().getReturnType());
19285 else if (CLI)
19286 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
19287 TypeList.push_back(Arg.Ty);
19288 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
19289
19290 for (unsigned i = 0; i != NumArgs; i++) {
19291 MVT ArgVT = Outs[i].VT;
19292 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19293 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19294
19296 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
19297 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
19298 Dispatcher)) {
19299 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19300 << ArgVT << "\n");
19301 llvm_unreachable(nullptr);
19302 }
19303 }
19304}
19305
19306// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19307// values.
19309 const CCValAssign &VA, const SDLoc &DL,
19310 const RISCVSubtarget &Subtarget) {
19311 switch (VA.getLocInfo()) {
19312 default:
19313 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19314 case CCValAssign::Full:
19316 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19317 break;
19318 case CCValAssign::BCvt:
19319 if (VA.getLocVT().isInteger() &&
19320 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19321 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19322 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
19323 if (RV64LegalI32) {
19324 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
19325 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
19326 } else {
19327 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19328 }
19329 } else {
19330 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19331 }
19332 break;
19333 }
19334 return Val;
19335}
19336
19337// The caller is responsible for loading the full value if the argument is
19338// passed with CCValAssign::Indirect.
19340 const CCValAssign &VA, const SDLoc &DL,
19341 const ISD::InputArg &In,
19342 const RISCVTargetLowering &TLI) {
19345 EVT LocVT = VA.getLocVT();
19346 SDValue Val;
19347 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19348 Register VReg = RegInfo.createVirtualRegister(RC);
19349 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19350 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19351
19352 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19353 if (In.isOrigArg()) {
19354 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19355 if (OrigArg->getType()->isIntegerTy()) {
19356 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19357 // An input zero extended from i31 can also be considered sign extended.
19358 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19359 (BitWidth < 32 && In.Flags.isZExt())) {
19361 RVFI->addSExt32Register(VReg);
19362 }
19363 }
19364 }
19365
19367 return Val;
19368
19369 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19370}
19371
19373 const CCValAssign &VA, const SDLoc &DL,
19374 const RISCVSubtarget &Subtarget) {
19375 EVT LocVT = VA.getLocVT();
19376
19377 switch (VA.getLocInfo()) {
19378 default:
19379 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19380 case CCValAssign::Full:
19381 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19382 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
19383 break;
19384 case CCValAssign::BCvt:
19385 if (LocVT.isInteger() &&
19386 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19387 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19388 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
19389 if (RV64LegalI32) {
19390 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19391 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
19392 } else {
19393 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19394 }
19395 } else {
19396 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19397 }
19398 break;
19399 }
19400 return Val;
19401}
19402
19403// The caller is responsible for loading the full value if the argument is
19404// passed with CCValAssign::Indirect.
19406 const CCValAssign &VA, const SDLoc &DL) {
19408 MachineFrameInfo &MFI = MF.getFrameInfo();
19409 EVT LocVT = VA.getLocVT();
19410 EVT ValVT = VA.getValVT();
19412 if (ValVT.isScalableVector()) {
19413 // When the value is a scalable vector, we save the pointer which points to
19414 // the scalable vector value in the stack. The ValVT will be the pointer
19415 // type, instead of the scalable vector type.
19416 ValVT = LocVT;
19417 }
19418 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19419 /*IsImmutable=*/true);
19420 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19421 SDValue Val;
19422
19423 ISD::LoadExtType ExtType;
19424 switch (VA.getLocInfo()) {
19425 default:
19426 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19427 case CCValAssign::Full:
19429 case CCValAssign::BCvt:
19430 ExtType = ISD::NON_EXTLOAD;
19431 break;
19432 }
19433 Val = DAG.getExtLoad(
19434 ExtType, DL, LocVT, Chain, FIN,
19436 return Val;
19437}
19438
19440 const CCValAssign &VA,
19441 const CCValAssign &HiVA,
19442 const SDLoc &DL) {
19443 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19444 "Unexpected VA");
19446 MachineFrameInfo &MFI = MF.getFrameInfo();
19448
19449 assert(VA.isRegLoc() && "Expected register VA assignment");
19450
19451 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19452 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19453 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19454 SDValue Hi;
19455 if (HiVA.isMemLoc()) {
19456 // Second half of f64 is passed on the stack.
19457 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19458 /*IsImmutable=*/true);
19459 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19460 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19462 } else {
19463 // Second half of f64 is passed in another GPR.
19464 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19465 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19466 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19467 }
19468 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19469}
19470
19471// FastCC has less than 1% performance improvement for some particular
19472// benchmark. But theoretically, it may has benenfit for some cases.
19474 unsigned ValNo, MVT ValVT, MVT LocVT,
19475 CCValAssign::LocInfo LocInfo,
19476 ISD::ArgFlagsTy ArgFlags, CCState &State,
19477 bool IsFixed, bool IsRet, Type *OrigTy,
19478 const RISCVTargetLowering &TLI,
19479 RVVArgDispatcher &RVVDispatcher) {
19480 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19481 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19482 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19483 return false;
19484 }
19485 }
19486
19487 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
19488
19489 if (LocVT == MVT::f16 &&
19490 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
19491 static const MCPhysReg FPR16List[] = {
19492 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
19493 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
19494 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
19495 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
19496 if (unsigned Reg = State.AllocateReg(FPR16List)) {
19497 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19498 return false;
19499 }
19500 }
19501
19502 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19503 static const MCPhysReg FPR32List[] = {
19504 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
19505 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
19506 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
19507 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
19508 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19509 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19510 return false;
19511 }
19512 }
19513
19514 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19515 static const MCPhysReg FPR64List[] = {
19516 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
19517 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
19518 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
19519 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
19520 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19521 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19522 return false;
19523 }
19524 }
19525
19526 // Check if there is an available GPR before hitting the stack.
19527 if ((LocVT == MVT::f16 &&
19528 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19529 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19530 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19531 Subtarget.hasStdExtZdinx())) {
19532 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19533 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19534 return false;
19535 }
19536 }
19537
19538 if (LocVT == MVT::f16) {
19539 unsigned Offset2 = State.AllocateStack(2, Align(2));
19540 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19541 return false;
19542 }
19543
19544 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19545 unsigned Offset4 = State.AllocateStack(4, Align(4));
19546 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19547 return false;
19548 }
19549
19550 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19551 unsigned Offset5 = State.AllocateStack(8, Align(8));
19552 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19553 return false;
19554 }
19555
19556 if (LocVT.isVector()) {
19557 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19558 if (AllocatedVReg) {
19559 // Fixed-length vectors are located in the corresponding scalable-vector
19560 // container types.
19561 if (ValVT.isFixedLengthVector())
19562 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19563 State.addLoc(
19564 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19565 } else {
19566 // Try and pass the address via a "fast" GPR.
19567 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19568 LocInfo = CCValAssign::Indirect;
19569 LocVT = TLI.getSubtarget().getXLenVT();
19570 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19571 } else if (ValVT.isFixedLengthVector()) {
19572 auto StackAlign =
19574 unsigned StackOffset =
19575 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19576 State.addLoc(
19577 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19578 } else {
19579 // Can't pass scalable vectors on the stack.
19580 return true;
19581 }
19582 }
19583
19584 return false;
19585 }
19586
19587 return true; // CC didn't match.
19588}
19589
19590bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19591 CCValAssign::LocInfo LocInfo,
19592 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19593 if (ArgFlags.isNest()) {
19595 "Attribute 'nest' is not supported in GHC calling convention");
19596 }
19597
19598 static const MCPhysReg GPRList[] = {
19599 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19600 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19601
19602 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19603 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19604 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19605 if (unsigned Reg = State.AllocateReg(GPRList)) {
19606 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19607 return false;
19608 }
19609 }
19610
19611 const RISCVSubtarget &Subtarget =
19613
19614 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19615 // Pass in STG registers: F1, ..., F6
19616 // fs0 ... fs5
19617 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19618 RISCV::F18_F, RISCV::F19_F,
19619 RISCV::F20_F, RISCV::F21_F};
19620 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19621 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19622 return false;
19623 }
19624 }
19625
19626 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19627 // Pass in STG registers: D1, ..., D6
19628 // fs6 ... fs11
19629 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19630 RISCV::F24_D, RISCV::F25_D,
19631 RISCV::F26_D, RISCV::F27_D};
19632 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19633 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19634 return false;
19635 }
19636 }
19637
19638 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19639 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19640 Subtarget.is64Bit())) {
19641 if (unsigned Reg = State.AllocateReg(GPRList)) {
19642 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19643 return false;
19644 }
19645 }
19646
19647 report_fatal_error("No registers left in GHC calling convention");
19648 return true;
19649}
19650
19651// Transform physical registers into virtual registers.
19653 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19654 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19655 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19656
19658
19659 switch (CallConv) {
19660 default:
19661 report_fatal_error("Unsupported calling convention");
19662 case CallingConv::C:
19663 case CallingConv::Fast:
19665 case CallingConv::GRAAL:
19667 break;
19668 case CallingConv::GHC:
19669 if (Subtarget.hasStdExtE())
19670 report_fatal_error("GHC calling convention is not supported on RVE!");
19671 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19672 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19673 "(Zdinx/D) instruction set extensions");
19674 }
19675
19676 const Function &Func = MF.getFunction();
19677 if (Func.hasFnAttribute("interrupt")) {
19678 if (!Func.arg_empty())
19680 "Functions with the interrupt attribute cannot have arguments!");
19681
19682 StringRef Kind =
19683 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19684
19685 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19687 "Function interrupt attribute argument not supported!");
19688 }
19689
19690 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19691 MVT XLenVT = Subtarget.getXLenVT();
19692 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19693 // Used with vargs to acumulate store chains.
19694 std::vector<SDValue> OutChains;
19695
19696 // Assign locations to all of the incoming arguments.
19698 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19699
19700 if (CallConv == CallingConv::GHC)
19702 else
19703 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19705 : RISCV::CC_RISCV);
19706
19707 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19708 CCValAssign &VA = ArgLocs[i];
19709 SDValue ArgValue;
19710 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19711 // case.
19712 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19713 assert(VA.needsCustom());
19714 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19715 } else if (VA.isRegLoc())
19716 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19717 else
19718 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19719
19720 if (VA.getLocInfo() == CCValAssign::Indirect) {
19721 // If the original argument was split and passed by reference (e.g. i128
19722 // on RV32), we need to load all parts of it here (using the same
19723 // address). Vectors may be partly split to registers and partly to the
19724 // stack, in which case the base address is partly offset and subsequent
19725 // stores are relative to that.
19726 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19728 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19729 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19730 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19731 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19732 CCValAssign &PartVA = ArgLocs[i + 1];
19733 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19734 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19735 if (PartVA.getValVT().isScalableVector())
19736 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19737 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19738 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19740 ++i;
19741 ++InsIdx;
19742 }
19743 continue;
19744 }
19745 InVals.push_back(ArgValue);
19746 }
19747
19748 if (any_of(ArgLocs,
19749 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19750 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19751
19752 if (IsVarArg) {
19753 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19754 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19755 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19756 MachineFrameInfo &MFI = MF.getFrameInfo();
19757 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19759
19760 // Size of the vararg save area. For now, the varargs save area is either
19761 // zero or large enough to hold a0-a7.
19762 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19763 int FI;
19764
19765 // If all registers are allocated, then all varargs must be passed on the
19766 // stack and we don't need to save any argregs.
19767 if (VarArgsSaveSize == 0) {
19768 int VaArgOffset = CCInfo.getStackSize();
19769 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19770 } else {
19771 int VaArgOffset = -VarArgsSaveSize;
19772 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19773
19774 // If saving an odd number of registers then create an extra stack slot to
19775 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19776 // offsets to even-numbered registered remain 2*XLEN-aligned.
19777 if (Idx % 2) {
19779 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19780 VarArgsSaveSize += XLenInBytes;
19781 }
19782
19783 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19784
19785 // Copy the integer registers that may have been used for passing varargs
19786 // to the vararg save area.
19787 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19788 const Register Reg = RegInfo.createVirtualRegister(RC);
19789 RegInfo.addLiveIn(ArgRegs[I], Reg);
19790 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19791 SDValue Store = DAG.getStore(
19792 Chain, DL, ArgValue, FIN,
19793 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19794 OutChains.push_back(Store);
19795 FIN =
19796 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19797 }
19798 }
19799
19800 // Record the frame index of the first variable argument
19801 // which is a value necessary to VASTART.
19802 RVFI->setVarArgsFrameIndex(FI);
19803 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19804 }
19805
19806 // All stores are grouped in one node to allow the matching between
19807 // the size of Ins and InVals. This only happens for vararg functions.
19808 if (!OutChains.empty()) {
19809 OutChains.push_back(Chain);
19810 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19811 }
19812
19813 return Chain;
19814}
19815
19816/// isEligibleForTailCallOptimization - Check whether the call is eligible
19817/// for tail call optimization.
19818/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19819bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19820 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19821 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19822
19823 auto CalleeCC = CLI.CallConv;
19824 auto &Outs = CLI.Outs;
19825 auto &Caller = MF.getFunction();
19826 auto CallerCC = Caller.getCallingConv();
19827
19828 // Exception-handling functions need a special set of instructions to
19829 // indicate a return to the hardware. Tail-calling another function would
19830 // probably break this.
19831 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19832 // should be expanded as new function attributes are introduced.
19833 if (Caller.hasFnAttribute("interrupt"))
19834 return false;
19835
19836 // Do not tail call opt if the stack is used to pass parameters.
19837 if (CCInfo.getStackSize() != 0)
19838 return false;
19839
19840 // Do not tail call opt if any parameters need to be passed indirectly.
19841 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19842 // passed indirectly. So the address of the value will be passed in a
19843 // register, or if not available, then the address is put on the stack. In
19844 // order to pass indirectly, space on the stack often needs to be allocated
19845 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19846 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19847 // are passed CCValAssign::Indirect.
19848 for (auto &VA : ArgLocs)
19849 if (VA.getLocInfo() == CCValAssign::Indirect)
19850 return false;
19851
19852 // Do not tail call opt if either caller or callee uses struct return
19853 // semantics.
19854 auto IsCallerStructRet = Caller.hasStructRetAttr();
19855 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19856 if (IsCallerStructRet || IsCalleeStructRet)
19857 return false;
19858
19859 // The callee has to preserve all registers the caller needs to preserve.
19860 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19861 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19862 if (CalleeCC != CallerCC) {
19863 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19864 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19865 return false;
19866 }
19867
19868 // Byval parameters hand the function a pointer directly into the stack area
19869 // we want to reuse during a tail call. Working around this *is* possible
19870 // but less efficient and uglier in LowerCall.
19871 for (auto &Arg : Outs)
19872 if (Arg.Flags.isByVal())
19873 return false;
19874
19875 return true;
19876}
19877
19879 return DAG.getDataLayout().getPrefTypeAlign(
19880 VT.getTypeForEVT(*DAG.getContext()));
19881}
19882
19883// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19884// and output parameter nodes.
19886 SmallVectorImpl<SDValue> &InVals) const {
19887 SelectionDAG &DAG = CLI.DAG;
19888 SDLoc &DL = CLI.DL;
19890 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19892 SDValue Chain = CLI.Chain;
19893 SDValue Callee = CLI.Callee;
19894 bool &IsTailCall = CLI.IsTailCall;
19895 CallingConv::ID CallConv = CLI.CallConv;
19896 bool IsVarArg = CLI.IsVarArg;
19897 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19898 MVT XLenVT = Subtarget.getXLenVT();
19899
19901
19902 // Analyze the operands of the call, assigning locations to each operand.
19904 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19905
19906 if (CallConv == CallingConv::GHC) {
19907 if (Subtarget.hasStdExtE())
19908 report_fatal_error("GHC calling convention is not supported on RVE!");
19910 } else
19911 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19913 : RISCV::CC_RISCV);
19914
19915 // Check if it's really possible to do a tail call.
19916 if (IsTailCall)
19917 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19918
19919 if (IsTailCall)
19920 ++NumTailCalls;
19921 else if (CLI.CB && CLI.CB->isMustTailCall())
19922 report_fatal_error("failed to perform tail call elimination on a call "
19923 "site marked musttail");
19924
19925 // Get a count of how many bytes are to be pushed on the stack.
19926 unsigned NumBytes = ArgCCInfo.getStackSize();
19927
19928 // Create local copies for byval args
19929 SmallVector<SDValue, 8> ByValArgs;
19930 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19931 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19932 if (!Flags.isByVal())
19933 continue;
19934
19935 SDValue Arg = OutVals[i];
19936 unsigned Size = Flags.getByValSize();
19937 Align Alignment = Flags.getNonZeroByValAlign();
19938
19939 int FI =
19940 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19941 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19942 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19943
19944 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19945 /*IsVolatile=*/false,
19946 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
19948 ByValArgs.push_back(FIPtr);
19949 }
19950
19951 if (!IsTailCall)
19952 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19953
19954 // Copy argument values to their designated locations.
19956 SmallVector<SDValue, 8> MemOpChains;
19957 SDValue StackPtr;
19958 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19959 ++i, ++OutIdx) {
19960 CCValAssign &VA = ArgLocs[i];
19961 SDValue ArgValue = OutVals[OutIdx];
19962 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19963
19964 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19965 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19966 assert(VA.isRegLoc() && "Expected register VA assignment");
19967 assert(VA.needsCustom());
19968 SDValue SplitF64 = DAG.getNode(
19969 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19970 SDValue Lo = SplitF64.getValue(0);
19971 SDValue Hi = SplitF64.getValue(1);
19972
19973 Register RegLo = VA.getLocReg();
19974 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19975
19976 // Get the CCValAssign for the Hi part.
19977 CCValAssign &HiVA = ArgLocs[++i];
19978
19979 if (HiVA.isMemLoc()) {
19980 // Second half of f64 is passed on the stack.
19981 if (!StackPtr.getNode())
19982 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19984 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19985 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19986 // Emit the store.
19987 MemOpChains.push_back(
19988 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19989 } else {
19990 // Second half of f64 is passed in another GPR.
19991 Register RegHigh = HiVA.getLocReg();
19992 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19993 }
19994 continue;
19995 }
19996
19997 // Promote the value if needed.
19998 // For now, only handle fully promoted and indirect arguments.
19999 if (VA.getLocInfo() == CCValAssign::Indirect) {
20000 // Store the argument in a stack slot and pass its address.
20001 Align StackAlign =
20002 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20003 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20004 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20005 // If the original argument was split (e.g. i128), we need
20006 // to store the required parts of it here (and pass just one address).
20007 // Vectors may be partly split to registers and partly to the stack, in
20008 // which case the base address is partly offset and subsequent stores are
20009 // relative to that.
20010 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20011 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20012 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20013 // Calculate the total size to store. We don't have access to what we're
20014 // actually storing other than performing the loop and collecting the
20015 // info.
20017 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20018 SDValue PartValue = OutVals[OutIdx + 1];
20019 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20020 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20021 EVT PartVT = PartValue.getValueType();
20022 if (PartVT.isScalableVector())
20023 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20024 StoredSize += PartVT.getStoreSize();
20025 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20026 Parts.push_back(std::make_pair(PartValue, Offset));
20027 ++i;
20028 ++OutIdx;
20029 }
20030 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20031 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20032 MemOpChains.push_back(
20033 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20035 for (const auto &Part : Parts) {
20036 SDValue PartValue = Part.first;
20037 SDValue PartOffset = Part.second;
20039 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20040 MemOpChains.push_back(
20041 DAG.getStore(Chain, DL, PartValue, Address,
20043 }
20044 ArgValue = SpillSlot;
20045 } else {
20046 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20047 }
20048
20049 // Use local copy if it is a byval arg.
20050 if (Flags.isByVal())
20051 ArgValue = ByValArgs[j++];
20052
20053 if (VA.isRegLoc()) {
20054 // Queue up the argument copies and emit them at the end.
20055 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20056 } else {
20057 assert(VA.isMemLoc() && "Argument not register or memory");
20058 assert(!IsTailCall && "Tail call not allowed if stack is used "
20059 "for passing parameters");
20060
20061 // Work out the address of the stack slot.
20062 if (!StackPtr.getNode())
20063 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20065 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20067
20068 // Emit the store.
20069 MemOpChains.push_back(
20070 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
20071 }
20072 }
20073
20074 // Join the stores, which are independent of one another.
20075 if (!MemOpChains.empty())
20076 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20077
20078 SDValue Glue;
20079
20080 // Build a sequence of copy-to-reg nodes, chained and glued together.
20081 for (auto &Reg : RegsToPass) {
20082 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20083 Glue = Chain.getValue(1);
20084 }
20085
20086 // Validate that none of the argument registers have been marked as
20087 // reserved, if so report an error. Do the same for the return address if this
20088 // is not a tailcall.
20089 validateCCReservedRegs(RegsToPass, MF);
20090 if (!IsTailCall &&
20093 MF.getFunction(),
20094 "Return address register required, but has been reserved."});
20095
20096 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20097 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20098 // split it and then direct call can be matched by PseudoCALL.
20099 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20100 const GlobalValue *GV = S->getGlobal();
20101 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20102 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20103 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20104 }
20105
20106 // The first call operand is the chain and the second is the target address.
20108 Ops.push_back(Chain);
20109 Ops.push_back(Callee);
20110
20111 // Add argument registers to the end of the list so that they are
20112 // known live into the call.
20113 for (auto &Reg : RegsToPass)
20114 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20115
20116 if (!IsTailCall) {
20117 // Add a register mask operand representing the call-preserved registers.
20118 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20119 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20120 assert(Mask && "Missing call preserved mask for calling convention");
20121 Ops.push_back(DAG.getRegisterMask(Mask));
20122 }
20123
20124 // Glue the call to the argument copies, if any.
20125 if (Glue.getNode())
20126 Ops.push_back(Glue);
20127
20128 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20129 "Unexpected CFI type for a direct call");
20130
20131 // Emit the call.
20132 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20133
20134 if (IsTailCall) {
20136 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
20137 if (CLI.CFIType)
20138 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20139 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20140 return Ret;
20141 }
20142
20143 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
20144 if (CLI.CFIType)
20145 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20146 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20147 Glue = Chain.getValue(1);
20148
20149 // Mark the end of the call, which is glued to the call itself.
20150 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20151 Glue = Chain.getValue(1);
20152
20153 // Assign locations to each value returned by this call.
20155 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20156 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
20157
20158 // Copy all of the result registers out of their specified physreg.
20159 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20160 auto &VA = RVLocs[i];
20161 // Copy the value out
20162 SDValue RetValue =
20163 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20164 // Glue the RetValue to the end of the call sequence
20165 Chain = RetValue.getValue(1);
20166 Glue = RetValue.getValue(2);
20167
20168 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20169 assert(VA.needsCustom());
20170 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20171 MVT::i32, Glue);
20172 Chain = RetValue2.getValue(1);
20173 Glue = RetValue2.getValue(2);
20174 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20175 RetValue2);
20176 }
20177
20178 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20179
20180 InVals.push_back(RetValue);
20181 }
20182
20183 return Chain;
20184}
20185
20187 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20188 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20190 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20191
20192 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
20193
20194 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20195 MVT VT = Outs[i].VT;
20196 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20197 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
20198 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
20199 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
20200 nullptr, *this, Dispatcher))
20201 return false;
20202 }
20203 return true;
20204}
20205
20206SDValue
20208 bool IsVarArg,
20210 const SmallVectorImpl<SDValue> &OutVals,
20211 const SDLoc &DL, SelectionDAG &DAG) const {
20213 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20214
20215 // Stores the assignment of the return value to a location.
20217
20218 // Info about the registers and stack slot.
20219 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20220 *DAG.getContext());
20221
20222 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20223 nullptr, RISCV::CC_RISCV);
20224
20225 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20226 report_fatal_error("GHC functions return void only");
20227
20228 SDValue Glue;
20229 SmallVector<SDValue, 4> RetOps(1, Chain);
20230
20231 // Copy the result values into the output registers.
20232 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20233 SDValue Val = OutVals[OutIdx];
20234 CCValAssign &VA = RVLocs[i];
20235 assert(VA.isRegLoc() && "Can only return in registers!");
20236
20237 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20238 // Handle returning f64 on RV32D with a soft float ABI.
20239 assert(VA.isRegLoc() && "Expected return via registers");
20240 assert(VA.needsCustom());
20241 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20242 DAG.getVTList(MVT::i32, MVT::i32), Val);
20243 SDValue Lo = SplitF64.getValue(0);
20244 SDValue Hi = SplitF64.getValue(1);
20245 Register RegLo = VA.getLocReg();
20246 Register RegHi = RVLocs[++i].getLocReg();
20247
20248 if (STI.isRegisterReservedByUser(RegLo) ||
20249 STI.isRegisterReservedByUser(RegHi))
20251 MF.getFunction(),
20252 "Return value register required, but has been reserved."});
20253
20254 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20255 Glue = Chain.getValue(1);
20256 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20257 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20258 Glue = Chain.getValue(1);
20259 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20260 } else {
20261 // Handle a 'normal' return.
20262 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20263 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20264
20265 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20267 MF.getFunction(),
20268 "Return value register required, but has been reserved."});
20269
20270 // Guarantee that all emitted copies are stuck together.
20271 Glue = Chain.getValue(1);
20272 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20273 }
20274 }
20275
20276 RetOps[0] = Chain; // Update chain.
20277
20278 // Add the glue node if we have it.
20279 if (Glue.getNode()) {
20280 RetOps.push_back(Glue);
20281 }
20282
20283 if (any_of(RVLocs,
20284 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20285 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20286
20287 unsigned RetOpc = RISCVISD::RET_GLUE;
20288 // Interrupt service routines use different return instructions.
20289 const Function &Func = DAG.getMachineFunction().getFunction();
20290 if (Func.hasFnAttribute("interrupt")) {
20291 if (!Func.getReturnType()->isVoidTy())
20293 "Functions with the interrupt attribute must have void return type!");
20294
20296 StringRef Kind =
20297 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20298
20299 if (Kind == "supervisor")
20300 RetOpc = RISCVISD::SRET_GLUE;
20301 else
20302 RetOpc = RISCVISD::MRET_GLUE;
20303 }
20304
20305 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20306}
20307
20308void RISCVTargetLowering::validateCCReservedRegs(
20309 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20310 MachineFunction &MF) const {
20311 const Function &F = MF.getFunction();
20312 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20313
20314 if (llvm::any_of(Regs, [&STI](auto Reg) {
20315 return STI.isRegisterReservedByUser(Reg.first);
20316 }))
20317 F.getContext().diagnose(DiagnosticInfoUnsupported{
20318 F, "Argument register required, but has been reserved."});
20319}
20320
20321// Check if the result of the node is only used as a return value, as
20322// otherwise we can't perform a tail-call.
20324 if (N->getNumValues() != 1)
20325 return false;
20326 if (!N->hasNUsesOfValue(1, 0))
20327 return false;
20328
20329 SDNode *Copy = *N->use_begin();
20330
20331 if (Copy->getOpcode() == ISD::BITCAST) {
20332 return isUsedByReturnOnly(Copy, Chain);
20333 }
20334
20335 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20336 // with soft float ABIs.
20337 if (Copy->getOpcode() != ISD::CopyToReg) {
20338 return false;
20339 }
20340
20341 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20342 // isn't safe to perform a tail call.
20343 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20344 return false;
20345
20346 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20347 bool HasRet = false;
20348 for (SDNode *Node : Copy->uses()) {
20349 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20350 return false;
20351 HasRet = true;
20352 }
20353 if (!HasRet)
20354 return false;
20355
20356 Chain = Copy->getOperand(0);
20357 return true;
20358}
20359
20361 return CI->isTailCall();
20362}
20363
20364const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20365#define NODE_NAME_CASE(NODE) \
20366 case RISCVISD::NODE: \
20367 return "RISCVISD::" #NODE;
20368 // clang-format off
20369 switch ((RISCVISD::NodeType)Opcode) {
20371 break;
20372 NODE_NAME_CASE(RET_GLUE)
20373 NODE_NAME_CASE(SRET_GLUE)
20374 NODE_NAME_CASE(MRET_GLUE)
20375 NODE_NAME_CASE(CALL)
20376 NODE_NAME_CASE(SELECT_CC)
20377 NODE_NAME_CASE(BR_CC)
20378 NODE_NAME_CASE(BuildPairF64)
20379 NODE_NAME_CASE(SplitF64)
20380 NODE_NAME_CASE(TAIL)
20381 NODE_NAME_CASE(ADD_LO)
20382 NODE_NAME_CASE(HI)
20383 NODE_NAME_CASE(LLA)
20384 NODE_NAME_CASE(ADD_TPREL)
20385 NODE_NAME_CASE(MULHSU)
20386 NODE_NAME_CASE(SHL_ADD)
20387 NODE_NAME_CASE(SLLW)
20388 NODE_NAME_CASE(SRAW)
20389 NODE_NAME_CASE(SRLW)
20390 NODE_NAME_CASE(DIVW)
20391 NODE_NAME_CASE(DIVUW)
20392 NODE_NAME_CASE(REMUW)
20393 NODE_NAME_CASE(ROLW)
20394 NODE_NAME_CASE(RORW)
20395 NODE_NAME_CASE(CLZW)
20396 NODE_NAME_CASE(CTZW)
20397 NODE_NAME_CASE(ABSW)
20398 NODE_NAME_CASE(FMV_H_X)
20399 NODE_NAME_CASE(FMV_X_ANYEXTH)
20400 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20401 NODE_NAME_CASE(FMV_W_X_RV64)
20402 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20403 NODE_NAME_CASE(FCVT_X)
20404 NODE_NAME_CASE(FCVT_XU)
20405 NODE_NAME_CASE(FCVT_W_RV64)
20406 NODE_NAME_CASE(FCVT_WU_RV64)
20407 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20408 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20409 NODE_NAME_CASE(FP_ROUND_BF16)
20410 NODE_NAME_CASE(FP_EXTEND_BF16)
20411 NODE_NAME_CASE(FROUND)
20412 NODE_NAME_CASE(FCLASS)
20413 NODE_NAME_CASE(FMAX)
20414 NODE_NAME_CASE(FMIN)
20415 NODE_NAME_CASE(READ_COUNTER_WIDE)
20416 NODE_NAME_CASE(BREV8)
20417 NODE_NAME_CASE(ORC_B)
20418 NODE_NAME_CASE(ZIP)
20419 NODE_NAME_CASE(UNZIP)
20420 NODE_NAME_CASE(CLMUL)
20421 NODE_NAME_CASE(CLMULH)
20422 NODE_NAME_CASE(CLMULR)
20423 NODE_NAME_CASE(MOPR)
20424 NODE_NAME_CASE(MOPRR)
20425 NODE_NAME_CASE(SHA256SIG0)
20426 NODE_NAME_CASE(SHA256SIG1)
20427 NODE_NAME_CASE(SHA256SUM0)
20428 NODE_NAME_CASE(SHA256SUM1)
20429 NODE_NAME_CASE(SM4KS)
20430 NODE_NAME_CASE(SM4ED)
20431 NODE_NAME_CASE(SM3P0)
20432 NODE_NAME_CASE(SM3P1)
20433 NODE_NAME_CASE(TH_LWD)
20434 NODE_NAME_CASE(TH_LWUD)
20435 NODE_NAME_CASE(TH_LDD)
20436 NODE_NAME_CASE(TH_SWD)
20437 NODE_NAME_CASE(TH_SDD)
20438 NODE_NAME_CASE(VMV_V_V_VL)
20439 NODE_NAME_CASE(VMV_V_X_VL)
20440 NODE_NAME_CASE(VFMV_V_F_VL)
20441 NODE_NAME_CASE(VMV_X_S)
20442 NODE_NAME_CASE(VMV_S_X_VL)
20443 NODE_NAME_CASE(VFMV_S_F_VL)
20444 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20445 NODE_NAME_CASE(READ_VLENB)
20446 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20447 NODE_NAME_CASE(VSLIDEUP_VL)
20448 NODE_NAME_CASE(VSLIDE1UP_VL)
20449 NODE_NAME_CASE(VSLIDEDOWN_VL)
20450 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20451 NODE_NAME_CASE(VFSLIDE1UP_VL)
20452 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20453 NODE_NAME_CASE(VID_VL)
20454 NODE_NAME_CASE(VFNCVT_ROD_VL)
20455 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20456 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20457 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20458 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20459 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20460 NODE_NAME_CASE(VECREDUCE_AND_VL)
20461 NODE_NAME_CASE(VECREDUCE_OR_VL)
20462 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20463 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20464 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20465 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20466 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20467 NODE_NAME_CASE(ADD_VL)
20468 NODE_NAME_CASE(AND_VL)
20469 NODE_NAME_CASE(MUL_VL)
20470 NODE_NAME_CASE(OR_VL)
20471 NODE_NAME_CASE(SDIV_VL)
20472 NODE_NAME_CASE(SHL_VL)
20473 NODE_NAME_CASE(SREM_VL)
20474 NODE_NAME_CASE(SRA_VL)
20475 NODE_NAME_CASE(SRL_VL)
20476 NODE_NAME_CASE(ROTL_VL)
20477 NODE_NAME_CASE(ROTR_VL)
20478 NODE_NAME_CASE(SUB_VL)
20479 NODE_NAME_CASE(UDIV_VL)
20480 NODE_NAME_CASE(UREM_VL)
20481 NODE_NAME_CASE(XOR_VL)
20482 NODE_NAME_CASE(AVGFLOORS_VL)
20483 NODE_NAME_CASE(AVGFLOORU_VL)
20484 NODE_NAME_CASE(AVGCEILS_VL)
20485 NODE_NAME_CASE(AVGCEILU_VL)
20486 NODE_NAME_CASE(SADDSAT_VL)
20487 NODE_NAME_CASE(UADDSAT_VL)
20488 NODE_NAME_CASE(SSUBSAT_VL)
20489 NODE_NAME_CASE(USUBSAT_VL)
20490 NODE_NAME_CASE(VNCLIP_VL)
20491 NODE_NAME_CASE(VNCLIPU_VL)
20492 NODE_NAME_CASE(FADD_VL)
20493 NODE_NAME_CASE(FSUB_VL)
20494 NODE_NAME_CASE(FMUL_VL)
20495 NODE_NAME_CASE(FDIV_VL)
20496 NODE_NAME_CASE(FNEG_VL)
20497 NODE_NAME_CASE(FABS_VL)
20498 NODE_NAME_CASE(FSQRT_VL)
20499 NODE_NAME_CASE(FCLASS_VL)
20500 NODE_NAME_CASE(VFMADD_VL)
20501 NODE_NAME_CASE(VFNMADD_VL)
20502 NODE_NAME_CASE(VFMSUB_VL)
20503 NODE_NAME_CASE(VFNMSUB_VL)
20504 NODE_NAME_CASE(VFWMADD_VL)
20505 NODE_NAME_CASE(VFWNMADD_VL)
20506 NODE_NAME_CASE(VFWMSUB_VL)
20507 NODE_NAME_CASE(VFWNMSUB_VL)
20508 NODE_NAME_CASE(FCOPYSIGN_VL)
20509 NODE_NAME_CASE(SMIN_VL)
20510 NODE_NAME_CASE(SMAX_VL)
20511 NODE_NAME_CASE(UMIN_VL)
20512 NODE_NAME_CASE(UMAX_VL)
20513 NODE_NAME_CASE(BITREVERSE_VL)
20514 NODE_NAME_CASE(BSWAP_VL)
20515 NODE_NAME_CASE(CTLZ_VL)
20516 NODE_NAME_CASE(CTTZ_VL)
20517 NODE_NAME_CASE(CTPOP_VL)
20518 NODE_NAME_CASE(VFMIN_VL)
20519 NODE_NAME_CASE(VFMAX_VL)
20520 NODE_NAME_CASE(MULHS_VL)
20521 NODE_NAME_CASE(MULHU_VL)
20522 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20523 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20524 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20525 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20526 NODE_NAME_CASE(VFCVT_X_F_VL)
20527 NODE_NAME_CASE(VFCVT_XU_F_VL)
20528 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20529 NODE_NAME_CASE(SINT_TO_FP_VL)
20530 NODE_NAME_CASE(UINT_TO_FP_VL)
20531 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20532 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20533 NODE_NAME_CASE(FP_EXTEND_VL)
20534 NODE_NAME_CASE(FP_ROUND_VL)
20535 NODE_NAME_CASE(STRICT_FADD_VL)
20536 NODE_NAME_CASE(STRICT_FSUB_VL)
20537 NODE_NAME_CASE(STRICT_FMUL_VL)
20538 NODE_NAME_CASE(STRICT_FDIV_VL)
20539 NODE_NAME_CASE(STRICT_FSQRT_VL)
20540 NODE_NAME_CASE(STRICT_VFMADD_VL)
20541 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20542 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20543 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20544 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20545 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20546 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20547 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20548 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20549 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20550 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20551 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20552 NODE_NAME_CASE(STRICT_FSETCC_VL)
20553 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20554 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20555 NODE_NAME_CASE(VWMUL_VL)
20556 NODE_NAME_CASE(VWMULU_VL)
20557 NODE_NAME_CASE(VWMULSU_VL)
20558 NODE_NAME_CASE(VWADD_VL)
20559 NODE_NAME_CASE(VWADDU_VL)
20560 NODE_NAME_CASE(VWSUB_VL)
20561 NODE_NAME_CASE(VWSUBU_VL)
20562 NODE_NAME_CASE(VWADD_W_VL)
20563 NODE_NAME_CASE(VWADDU_W_VL)
20564 NODE_NAME_CASE(VWSUB_W_VL)
20565 NODE_NAME_CASE(VWSUBU_W_VL)
20566 NODE_NAME_CASE(VWSLL_VL)
20567 NODE_NAME_CASE(VFWMUL_VL)
20568 NODE_NAME_CASE(VFWADD_VL)
20569 NODE_NAME_CASE(VFWSUB_VL)
20570 NODE_NAME_CASE(VFWADD_W_VL)
20571 NODE_NAME_CASE(VFWSUB_W_VL)
20572 NODE_NAME_CASE(VWMACC_VL)
20573 NODE_NAME_CASE(VWMACCU_VL)
20574 NODE_NAME_CASE(VWMACCSU_VL)
20575 NODE_NAME_CASE(VNSRL_VL)
20576 NODE_NAME_CASE(SETCC_VL)
20577 NODE_NAME_CASE(VMERGE_VL)
20578 NODE_NAME_CASE(VMAND_VL)
20579 NODE_NAME_CASE(VMOR_VL)
20580 NODE_NAME_CASE(VMXOR_VL)
20581 NODE_NAME_CASE(VMCLR_VL)
20582 NODE_NAME_CASE(VMSET_VL)
20583 NODE_NAME_CASE(VRGATHER_VX_VL)
20584 NODE_NAME_CASE(VRGATHER_VV_VL)
20585 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20586 NODE_NAME_CASE(VSEXT_VL)
20587 NODE_NAME_CASE(VZEXT_VL)
20588 NODE_NAME_CASE(VCPOP_VL)
20589 NODE_NAME_CASE(VFIRST_VL)
20590 NODE_NAME_CASE(READ_CSR)
20591 NODE_NAME_CASE(WRITE_CSR)
20592 NODE_NAME_CASE(SWAP_CSR)
20593 NODE_NAME_CASE(CZERO_EQZ)
20594 NODE_NAME_CASE(CZERO_NEZ)
20595 NODE_NAME_CASE(SW_GUARDED_BRIND)
20596 NODE_NAME_CASE(SF_VC_XV_SE)
20597 NODE_NAME_CASE(SF_VC_IV_SE)
20598 NODE_NAME_CASE(SF_VC_VV_SE)
20599 NODE_NAME_CASE(SF_VC_FV_SE)
20600 NODE_NAME_CASE(SF_VC_XVV_SE)
20601 NODE_NAME_CASE(SF_VC_IVV_SE)
20602 NODE_NAME_CASE(SF_VC_VVV_SE)
20603 NODE_NAME_CASE(SF_VC_FVV_SE)
20604 NODE_NAME_CASE(SF_VC_XVW_SE)
20605 NODE_NAME_CASE(SF_VC_IVW_SE)
20606 NODE_NAME_CASE(SF_VC_VVW_SE)
20607 NODE_NAME_CASE(SF_VC_FVW_SE)
20608 NODE_NAME_CASE(SF_VC_V_X_SE)
20609 NODE_NAME_CASE(SF_VC_V_I_SE)
20610 NODE_NAME_CASE(SF_VC_V_XV_SE)
20611 NODE_NAME_CASE(SF_VC_V_IV_SE)
20612 NODE_NAME_CASE(SF_VC_V_VV_SE)
20613 NODE_NAME_CASE(SF_VC_V_FV_SE)
20614 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20615 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20616 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20617 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20618 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20619 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20620 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20621 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20622 }
20623 // clang-format on
20624 return nullptr;
20625#undef NODE_NAME_CASE
20626}
20627
20628/// getConstraintType - Given a constraint letter, return the type of
20629/// constraint it is for this target.
20632 if (Constraint.size() == 1) {
20633 switch (Constraint[0]) {
20634 default:
20635 break;
20636 case 'f':
20637 return C_RegisterClass;
20638 case 'I':
20639 case 'J':
20640 case 'K':
20641 return C_Immediate;
20642 case 'A':
20643 return C_Memory;
20644 case 's':
20645 case 'S': // A symbolic address
20646 return C_Other;
20647 }
20648 } else {
20649 if (Constraint == "vr" || Constraint == "vm")
20650 return C_RegisterClass;
20651 }
20652 return TargetLowering::getConstraintType(Constraint);
20653}
20654
20655std::pair<unsigned, const TargetRegisterClass *>
20657 StringRef Constraint,
20658 MVT VT) const {
20659 // First, see if this is a constraint that directly corresponds to a RISC-V
20660 // register class.
20661 if (Constraint.size() == 1) {
20662 switch (Constraint[0]) {
20663 case 'r':
20664 // TODO: Support fixed vectors up to XLen for P extension?
20665 if (VT.isVector())
20666 break;
20667 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20668 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20669 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20670 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20671 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20672 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20673 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20674 case 'f':
20675 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20676 return std::make_pair(0U, &RISCV::FPR16RegClass);
20677 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20678 return std::make_pair(0U, &RISCV::FPR32RegClass);
20679 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20680 return std::make_pair(0U, &RISCV::FPR64RegClass);
20681 break;
20682 default:
20683 break;
20684 }
20685 } else if (Constraint == "vr") {
20686 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20687 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20688 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20689 return std::make_pair(0U, RC);
20690 }
20691 } else if (Constraint == "vm") {
20692 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20693 return std::make_pair(0U, &RISCV::VMV0RegClass);
20694 }
20695
20696 // Clang will correctly decode the usage of register name aliases into their
20697 // official names. However, other frontends like `rustc` do not. This allows
20698 // users of these frontends to use the ABI names for registers in LLVM-style
20699 // register constraints.
20700 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20701 .Case("{zero}", RISCV::X0)
20702 .Case("{ra}", RISCV::X1)
20703 .Case("{sp}", RISCV::X2)
20704 .Case("{gp}", RISCV::X3)
20705 .Case("{tp}", RISCV::X4)
20706 .Case("{t0}", RISCV::X5)
20707 .Case("{t1}", RISCV::X6)
20708 .Case("{t2}", RISCV::X7)
20709 .Cases("{s0}", "{fp}", RISCV::X8)
20710 .Case("{s1}", RISCV::X9)
20711 .Case("{a0}", RISCV::X10)
20712 .Case("{a1}", RISCV::X11)
20713 .Case("{a2}", RISCV::X12)
20714 .Case("{a3}", RISCV::X13)
20715 .Case("{a4}", RISCV::X14)
20716 .Case("{a5}", RISCV::X15)
20717 .Case("{a6}", RISCV::X16)
20718 .Case("{a7}", RISCV::X17)
20719 .Case("{s2}", RISCV::X18)
20720 .Case("{s3}", RISCV::X19)
20721 .Case("{s4}", RISCV::X20)
20722 .Case("{s5}", RISCV::X21)
20723 .Case("{s6}", RISCV::X22)
20724 .Case("{s7}", RISCV::X23)
20725 .Case("{s8}", RISCV::X24)
20726 .Case("{s9}", RISCV::X25)
20727 .Case("{s10}", RISCV::X26)
20728 .Case("{s11}", RISCV::X27)
20729 .Case("{t3}", RISCV::X28)
20730 .Case("{t4}", RISCV::X29)
20731 .Case("{t5}", RISCV::X30)
20732 .Case("{t6}", RISCV::X31)
20733 .Default(RISCV::NoRegister);
20734 if (XRegFromAlias != RISCV::NoRegister)
20735 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20736
20737 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20738 // TableGen record rather than the AsmName to choose registers for InlineAsm
20739 // constraints, plus we want to match those names to the widest floating point
20740 // register type available, manually select floating point registers here.
20741 //
20742 // The second case is the ABI name of the register, so that frontends can also
20743 // use the ABI names in register constraint lists.
20744 if (Subtarget.hasStdExtF()) {
20745 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20746 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20747 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20748 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20749 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20750 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20751 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20752 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20753 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20754 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20755 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20756 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20757 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20758 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20759 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20760 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20761 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20762 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20763 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20764 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20765 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20766 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20767 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20768 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20769 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20770 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20771 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20772 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20773 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20774 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20775 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20776 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20777 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20778 .Default(RISCV::NoRegister);
20779 if (FReg != RISCV::NoRegister) {
20780 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20781 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20782 unsigned RegNo = FReg - RISCV::F0_F;
20783 unsigned DReg = RISCV::F0_D + RegNo;
20784 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20785 }
20786 if (VT == MVT::f32 || VT == MVT::Other)
20787 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20788 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20789 unsigned RegNo = FReg - RISCV::F0_F;
20790 unsigned HReg = RISCV::F0_H + RegNo;
20791 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20792 }
20793 }
20794 }
20795
20796 if (Subtarget.hasVInstructions()) {
20797 Register VReg = StringSwitch<Register>(Constraint.lower())
20798 .Case("{v0}", RISCV::V0)
20799 .Case("{v1}", RISCV::V1)
20800 .Case("{v2}", RISCV::V2)
20801 .Case("{v3}", RISCV::V3)
20802 .Case("{v4}", RISCV::V4)
20803 .Case("{v5}", RISCV::V5)
20804 .Case("{v6}", RISCV::V6)
20805 .Case("{v7}", RISCV::V7)
20806 .Case("{v8}", RISCV::V8)
20807 .Case("{v9}", RISCV::V9)
20808 .Case("{v10}", RISCV::V10)
20809 .Case("{v11}", RISCV::V11)
20810 .Case("{v12}", RISCV::V12)
20811 .Case("{v13}", RISCV::V13)
20812 .Case("{v14}", RISCV::V14)
20813 .Case("{v15}", RISCV::V15)
20814 .Case("{v16}", RISCV::V16)
20815 .Case("{v17}", RISCV::V17)
20816 .Case("{v18}", RISCV::V18)
20817 .Case("{v19}", RISCV::V19)
20818 .Case("{v20}", RISCV::V20)
20819 .Case("{v21}", RISCV::V21)
20820 .Case("{v22}", RISCV::V22)
20821 .Case("{v23}", RISCV::V23)
20822 .Case("{v24}", RISCV::V24)
20823 .Case("{v25}", RISCV::V25)
20824 .Case("{v26}", RISCV::V26)
20825 .Case("{v27}", RISCV::V27)
20826 .Case("{v28}", RISCV::V28)
20827 .Case("{v29}", RISCV::V29)
20828 .Case("{v30}", RISCV::V30)
20829 .Case("{v31}", RISCV::V31)
20830 .Default(RISCV::NoRegister);
20831 if (VReg != RISCV::NoRegister) {
20832 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20833 return std::make_pair(VReg, &RISCV::VMRegClass);
20834 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20835 return std::make_pair(VReg, &RISCV::VRRegClass);
20836 for (const auto *RC :
20837 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20838 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20839 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20840 return std::make_pair(VReg, RC);
20841 }
20842 }
20843 }
20844 }
20845
20846 std::pair<Register, const TargetRegisterClass *> Res =
20848
20849 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20850 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20851 // Subtarget into account.
20852 if (Res.second == &RISCV::GPRF16RegClass ||
20853 Res.second == &RISCV::GPRF32RegClass ||
20854 Res.second == &RISCV::GPRPairRegClass)
20855 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20856
20857 return Res;
20858}
20859
20862 // Currently only support length 1 constraints.
20863 if (ConstraintCode.size() == 1) {
20864 switch (ConstraintCode[0]) {
20865 case 'A':
20867 default:
20868 break;
20869 }
20870 }
20871
20872 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20873}
20874
20876 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20877 SelectionDAG &DAG) const {
20878 // Currently only support length 1 constraints.
20879 if (Constraint.size() == 1) {
20880 switch (Constraint[0]) {
20881 case 'I':
20882 // Validate & create a 12-bit signed immediate operand.
20883 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20884 uint64_t CVal = C->getSExtValue();
20885 if (isInt<12>(CVal))
20886 Ops.push_back(
20887 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20888 }
20889 return;
20890 case 'J':
20891 // Validate & create an integer zero operand.
20892 if (isNullConstant(Op))
20893 Ops.push_back(
20894 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20895 return;
20896 case 'K':
20897 // Validate & create a 5-bit unsigned immediate operand.
20898 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20899 uint64_t CVal = C->getZExtValue();
20900 if (isUInt<5>(CVal))
20901 Ops.push_back(
20902 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20903 }
20904 return;
20905 case 'S':
20907 return;
20908 default:
20909 break;
20910 }
20911 }
20912 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20913}
20914
20916 Instruction *Inst,
20917 AtomicOrdering Ord) const {
20918 if (Subtarget.hasStdExtZtso()) {
20919 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20920 return Builder.CreateFence(Ord);
20921 return nullptr;
20922 }
20923
20924 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20925 return Builder.CreateFence(Ord);
20926 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20927 return Builder.CreateFence(AtomicOrdering::Release);
20928 return nullptr;
20929}
20930
20932 Instruction *Inst,
20933 AtomicOrdering Ord) const {
20934 if (Subtarget.hasStdExtZtso()) {
20935 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20936 return Builder.CreateFence(Ord);
20937 return nullptr;
20938 }
20939
20940 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20941 return Builder.CreateFence(AtomicOrdering::Acquire);
20942 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20945 return nullptr;
20946}
20947
20950 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20951 // point operations can't be used in an lr/sc sequence without breaking the
20952 // forward-progress guarantee.
20953 if (AI->isFloatingPointOperation() ||
20957
20958 // Don't expand forced atomics, we want to have __sync libcalls instead.
20959 if (Subtarget.hasForcedAtomics())
20961
20962 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20963 if (AI->getOperation() == AtomicRMWInst::Nand) {
20964 if (Subtarget.hasStdExtZacas() &&
20965 (Size >= 32 || Subtarget.hasStdExtZabha()))
20967 if (Size < 32)
20969 }
20970
20971 if (Size < 32 && !Subtarget.hasStdExtZabha())
20973
20975}
20976
20977static Intrinsic::ID
20979 if (XLen == 32) {
20980 switch (BinOp) {
20981 default:
20982 llvm_unreachable("Unexpected AtomicRMW BinOp");
20984 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20985 case AtomicRMWInst::Add:
20986 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20987 case AtomicRMWInst::Sub:
20988 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20990 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20991 case AtomicRMWInst::Max:
20992 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20993 case AtomicRMWInst::Min:
20994 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20996 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20998 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20999 }
21000 }
21001
21002 if (XLen == 64) {
21003 switch (BinOp) {
21004 default:
21005 llvm_unreachable("Unexpected AtomicRMW BinOp");
21007 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21008 case AtomicRMWInst::Add:
21009 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21010 case AtomicRMWInst::Sub:
21011 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21013 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21014 case AtomicRMWInst::Max:
21015 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21016 case AtomicRMWInst::Min:
21017 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21019 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21021 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21022 }
21023 }
21024
21025 llvm_unreachable("Unexpected XLen\n");
21026}
21027
21029 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21030 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21031 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21032 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21033 // mask, as this produces better code than the LR/SC loop emitted by
21034 // int_riscv_masked_atomicrmw_xchg.
21035 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21036 isa<ConstantInt>(AI->getValOperand())) {
21037 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21038 if (CVal->isZero())
21039 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21040 Builder.CreateNot(Mask, "Inv_Mask"),
21041 AI->getAlign(), Ord);
21042 if (CVal->isMinusOne())
21043 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21044 AI->getAlign(), Ord);
21045 }
21046
21047 unsigned XLen = Subtarget.getXLen();
21048 Value *Ordering =
21049 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21050 Type *Tys[] = {AlignedAddr->getType()};
21051 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
21052 AI->getModule(),
21054
21055 if (XLen == 64) {
21056 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21057 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21058 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21059 }
21060
21061 Value *Result;
21062
21063 // Must pass the shift amount needed to sign extend the loaded value prior
21064 // to performing a signed comparison for min/max. ShiftAmt is the number of
21065 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21066 // is the number of bits to left+right shift the value in order to
21067 // sign-extend.
21068 if (AI->getOperation() == AtomicRMWInst::Min ||
21070 const DataLayout &DL = AI->getDataLayout();
21071 unsigned ValWidth =
21072 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21073 Value *SextShamt =
21074 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21075 Result = Builder.CreateCall(LrwOpScwLoop,
21076 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21077 } else {
21078 Result =
21079 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21080 }
21081
21082 if (XLen == 64)
21083 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21084 return Result;
21085}
21086
21089 AtomicCmpXchgInst *CI) const {
21090 // Don't expand forced atomics, we want to have __sync libcalls instead.
21091 if (Subtarget.hasForcedAtomics())
21093
21095 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21096 (Size == 8 || Size == 16))
21099}
21100
21102 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21103 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21104 unsigned XLen = Subtarget.getXLen();
21105 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21106 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21107 if (XLen == 64) {
21108 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21109 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21110 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21111 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21112 }
21113 Type *Tys[] = {AlignedAddr->getType()};
21114 Function *MaskedCmpXchg =
21115 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
21116 Value *Result = Builder.CreateCall(
21117 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21118 if (XLen == 64)
21119 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21120 return Result;
21121}
21122
21124 EVT DataVT) const {
21125 // We have indexed loads for all supported EEW types. Indices are always
21126 // zero extended.
21127 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21128 isTypeLegal(Extend.getValueType()) &&
21129 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21130 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21131}
21132
21134 EVT VT) const {
21135 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21136 return false;
21137
21138 switch (FPVT.getSimpleVT().SimpleTy) {
21139 case MVT::f16:
21140 return Subtarget.hasStdExtZfhmin();
21141 case MVT::f32:
21142 return Subtarget.hasStdExtF();
21143 case MVT::f64:
21144 return Subtarget.hasStdExtD();
21145 default:
21146 return false;
21147 }
21148}
21149
21151 // If we are using the small code model, we can reduce size of jump table
21152 // entry to 4 bytes.
21153 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21156 }
21158}
21159
21161 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21162 unsigned uid, MCContext &Ctx) const {
21163 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21165 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21166}
21167
21169 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21170 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21171 // a power of two as well.
21172 // FIXME: This doesn't work for zve32, but that's already broken
21173 // elsewhere for the same reason.
21174 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21175 static_assert(RISCV::RVVBitsPerBlock == 64,
21176 "RVVBitsPerBlock changed, audit needed");
21177 return true;
21178}
21179
21181 SDValue &Offset,
21183 SelectionDAG &DAG) const {
21184 // Target does not support indexed loads.
21185 if (!Subtarget.hasVendorXTHeadMemIdx())
21186 return false;
21187
21188 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21189 return false;
21190
21191 Base = Op->getOperand(0);
21192 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21193 int64_t RHSC = RHS->getSExtValue();
21194 if (Op->getOpcode() == ISD::SUB)
21195 RHSC = -(uint64_t)RHSC;
21196
21197 // The constants that can be encoded in the THeadMemIdx instructions
21198 // are of the form (sign_extend(imm5) << imm2).
21199 bool isLegalIndexedOffset = false;
21200 for (unsigned i = 0; i < 4; i++)
21201 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21202 isLegalIndexedOffset = true;
21203 break;
21204 }
21205
21206 if (!isLegalIndexedOffset)
21207 return false;
21208
21209 Offset = Op->getOperand(1);
21210 return true;
21211 }
21212
21213 return false;
21214}
21215
21217 SDValue &Offset,
21219 SelectionDAG &DAG) const {
21220 EVT VT;
21221 SDValue Ptr;
21222 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21223 VT = LD->getMemoryVT();
21224 Ptr = LD->getBasePtr();
21225 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21226 VT = ST->getMemoryVT();
21227 Ptr = ST->getBasePtr();
21228 } else
21229 return false;
21230
21231 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21232 return false;
21233
21234 AM = ISD::PRE_INC;
21235 return true;
21236}
21237
21239 SDValue &Base,
21240 SDValue &Offset,
21242 SelectionDAG &DAG) const {
21243 if (Subtarget.hasVendorXCVmem()) {
21244 if (Op->getOpcode() != ISD::ADD)
21245 return false;
21246
21247 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21248 Base = LS->getBasePtr();
21249 else
21250 return false;
21251
21252 if (Base == Op->getOperand(0))
21253 Offset = Op->getOperand(1);
21254 else if (Base == Op->getOperand(1))
21255 Offset = Op->getOperand(0);
21256 else
21257 return false;
21258
21259 AM = ISD::POST_INC;
21260 return true;
21261 }
21262
21263 EVT VT;
21264 SDValue Ptr;
21265 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21266 VT = LD->getMemoryVT();
21267 Ptr = LD->getBasePtr();
21268 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21269 VT = ST->getMemoryVT();
21270 Ptr = ST->getBasePtr();
21271 } else
21272 return false;
21273
21274 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21275 return false;
21276 // Post-indexing updates the base, so it's not a valid transform
21277 // if that's not the same as the load's pointer.
21278 if (Ptr != Base)
21279 return false;
21280
21281 AM = ISD::POST_INC;
21282 return true;
21283}
21284
21286 EVT VT) const {
21287 EVT SVT = VT.getScalarType();
21288
21289 if (!SVT.isSimple())
21290 return false;
21291
21292 switch (SVT.getSimpleVT().SimpleTy) {
21293 case MVT::f16:
21294 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21295 : Subtarget.hasStdExtZfhOrZhinx();
21296 case MVT::f32:
21297 return Subtarget.hasStdExtFOrZfinx();
21298 case MVT::f64:
21299 return Subtarget.hasStdExtDOrZdinx();
21300 default:
21301 break;
21302 }
21303
21304 return false;
21305}
21306
21308 // Zacas will use amocas.w which does not require extension.
21309 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21310}
21311
21313 const Constant *PersonalityFn) const {
21314 return RISCV::X10;
21315}
21316
21318 const Constant *PersonalityFn) const {
21319 return RISCV::X11;
21320}
21321
21323 // Return false to suppress the unnecessary extensions if the LibCall
21324 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21325 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21326 Type.getSizeInBits() < Subtarget.getXLen()))
21327 return false;
21328
21329 return true;
21330}
21331
21333 if (Subtarget.is64Bit() && Type == MVT::i32)
21334 return true;
21335
21336 return IsSigned;
21337}
21338
21340 SDValue C) const {
21341 // Check integral scalar types.
21342 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21343 if (!VT.isScalarInteger())
21344 return false;
21345
21346 // Omit the optimization if the sub target has the M extension and the data
21347 // size exceeds XLen.
21348 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21349 return false;
21350
21351 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
21352 // Break the MUL to a SLLI and an ADD/SUB.
21353 const APInt &Imm = ConstNode->getAPIntValue();
21354 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21355 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21356 return true;
21357
21358 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21359 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21360 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21361 (Imm - 8).isPowerOf2()))
21362 return true;
21363
21364 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21365 // a pair of LUI/ADDI.
21366 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21367 ConstNode->hasOneUse()) {
21368 APInt ImmS = Imm.ashr(Imm.countr_zero());
21369 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21370 (1 - ImmS).isPowerOf2())
21371 return true;
21372 }
21373 }
21374
21375 return false;
21376}
21377
21379 SDValue ConstNode) const {
21380 // Let the DAGCombiner decide for vectors.
21381 EVT VT = AddNode.getValueType();
21382 if (VT.isVector())
21383 return true;
21384
21385 // Let the DAGCombiner decide for larger types.
21386 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21387 return true;
21388
21389 // It is worse if c1 is simm12 while c1*c2 is not.
21390 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21391 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21392 const APInt &C1 = C1Node->getAPIntValue();
21393 const APInt &C2 = C2Node->getAPIntValue();
21394 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21395 return false;
21396
21397 // Default to true and let the DAGCombiner decide.
21398 return true;
21399}
21400
21402 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21403 unsigned *Fast) const {
21404 if (!VT.isVector()) {
21405 if (Fast)
21406 *Fast = Subtarget.enableUnalignedScalarMem();
21407 return Subtarget.enableUnalignedScalarMem();
21408 }
21409
21410 // All vector implementations must support element alignment
21411 EVT ElemVT = VT.getVectorElementType();
21412 if (Alignment >= ElemVT.getStoreSize()) {
21413 if (Fast)
21414 *Fast = 1;
21415 return true;
21416 }
21417
21418 // Note: We lower an unmasked unaligned vector access to an equally sized
21419 // e8 element type access. Given this, we effectively support all unmasked
21420 // misaligned accesses. TODO: Work through the codegen implications of
21421 // allowing such accesses to be formed, and considered fast.
21422 if (Fast)
21423 *Fast = Subtarget.enableUnalignedVectorMem();
21424 return Subtarget.enableUnalignedVectorMem();
21425}
21426
21427
21429 const AttributeList &FuncAttributes) const {
21430 if (!Subtarget.hasVInstructions())
21431 return MVT::Other;
21432
21433 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21434 return MVT::Other;
21435
21436 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21437 // has an expansion threshold, and we want the number of hardware memory
21438 // operations to correspond roughly to that threshold. LMUL>1 operations
21439 // are typically expanded linearly internally, and thus correspond to more
21440 // than one actual memory operation. Note that store merging and load
21441 // combining will typically form larger LMUL operations from the LMUL1
21442 // operations emitted here, and that's okay because combining isn't
21443 // introducing new memory operations; it's just merging existing ones.
21444 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21445 if (Op.size() < MinVLenInBytes)
21446 // TODO: Figure out short memops. For the moment, do the default thing
21447 // which ends up using scalar sequences.
21448 return MVT::Other;
21449
21450 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21451 // a large scalar constant and instead use vmv.v.x/i to do the
21452 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21453 // maximize the chance we can encode the size in the vsetvli.
21454 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21455 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21456
21457 // Do we have sufficient alignment for our preferred VT? If not, revert
21458 // to largest size allowed by our alignment criteria.
21459 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21460 Align RequiredAlign(PreferredVT.getStoreSize());
21461 if (Op.isFixedDstAlign())
21462 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21463 if (Op.isMemcpy())
21464 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21465 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21466 }
21467 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21468}
21469
21471 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21472 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21473 bool IsABIRegCopy = CC.has_value();
21474 EVT ValueVT = Val.getValueType();
21475 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21476 PartVT == MVT::f32) {
21477 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21478 // nan, and cast to f32.
21479 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21480 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21481 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21482 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21483 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21484 Parts[0] = Val;
21485 return true;
21486 }
21487
21488 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21489 LLVMContext &Context = *DAG.getContext();
21490 EVT ValueEltVT = ValueVT.getVectorElementType();
21491 EVT PartEltVT = PartVT.getVectorElementType();
21492 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21493 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21494 if (PartVTBitSize % ValueVTBitSize == 0) {
21495 assert(PartVTBitSize >= ValueVTBitSize);
21496 // If the element types are different, bitcast to the same element type of
21497 // PartVT first.
21498 // Give an example here, we want copy a <vscale x 1 x i8> value to
21499 // <vscale x 4 x i16>.
21500 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21501 // subvector, then we can bitcast to <vscale x 4 x i16>.
21502 if (ValueEltVT != PartEltVT) {
21503 if (PartVTBitSize > ValueVTBitSize) {
21504 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21505 assert(Count != 0 && "The number of element should not be zero.");
21506 EVT SameEltTypeVT =
21507 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21508 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21509 DAG.getUNDEF(SameEltTypeVT), Val,
21510 DAG.getVectorIdxConstant(0, DL));
21511 }
21512 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21513 } else {
21514 Val =
21515 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21516 Val, DAG.getVectorIdxConstant(0, DL));
21517 }
21518 Parts[0] = Val;
21519 return true;
21520 }
21521 }
21522 return false;
21523}
21524
21526 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21527 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21528 bool IsABIRegCopy = CC.has_value();
21529 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21530 PartVT == MVT::f32) {
21531 SDValue Val = Parts[0];
21532
21533 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21534 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21535 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21536 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21537 return Val;
21538 }
21539
21540 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21541 LLVMContext &Context = *DAG.getContext();
21542 SDValue Val = Parts[0];
21543 EVT ValueEltVT = ValueVT.getVectorElementType();
21544 EVT PartEltVT = PartVT.getVectorElementType();
21545 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21546 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21547 if (PartVTBitSize % ValueVTBitSize == 0) {
21548 assert(PartVTBitSize >= ValueVTBitSize);
21549 EVT SameEltTypeVT = ValueVT;
21550 // If the element types are different, convert it to the same element type
21551 // of PartVT.
21552 // Give an example here, we want copy a <vscale x 1 x i8> value from
21553 // <vscale x 4 x i16>.
21554 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21555 // then we can extract <vscale x 1 x i8>.
21556 if (ValueEltVT != PartEltVT) {
21557 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21558 assert(Count != 0 && "The number of element should not be zero.");
21559 SameEltTypeVT =
21560 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21561 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21562 }
21563 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21564 DAG.getVectorIdxConstant(0, DL));
21565 return Val;
21566 }
21567 }
21568 return SDValue();
21569}
21570
21572 // When aggressively optimizing for code size, we prefer to use a div
21573 // instruction, as it is usually smaller than the alternative sequence.
21574 // TODO: Add vector division?
21575 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21576 return OptSize && !VT.isVector();
21577}
21578
21580 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21581 // some situation.
21582 unsigned Opc = N->getOpcode();
21583 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21584 return false;
21585 return true;
21586}
21587
21588static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21589 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21590 Function *ThreadPointerFunc =
21591 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21592 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21593 IRB.CreateCall(ThreadPointerFunc), Offset);
21594}
21595
21597 // Fuchsia provides a fixed TLS slot for the stack cookie.
21598 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21599 if (Subtarget.isTargetFuchsia())
21600 return useTpOffset(IRB, -0x10);
21601
21602 // Android provides a fixed TLS slot for the stack cookie. See the definition
21603 // of TLS_SLOT_STACK_GUARD in
21604 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21605 if (Subtarget.isTargetAndroid())
21606 return useTpOffset(IRB, -0x18);
21607
21609}
21610
21612 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21613 const DataLayout &DL) const {
21614 EVT VT = getValueType(DL, VTy);
21615 // Don't lower vlseg/vsseg for vector types that can't be split.
21616 if (!isTypeLegal(VT))
21617 return false;
21618
21620 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21621 Alignment))
21622 return false;
21623
21624 MVT ContainerVT = VT.getSimpleVT();
21625
21626 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21627 if (!Subtarget.useRVVForFixedLengthVectors())
21628 return false;
21629 // Sometimes the interleaved access pass picks up splats as interleaves of
21630 // one element. Don't lower these.
21631 if (FVTy->getNumElements() < 2)
21632 return false;
21633
21635 } else {
21636 // The intrinsics for scalable vectors are not overloaded on pointer type
21637 // and can only handle the default address space.
21638 if (AddrSpace)
21639 return false;
21640 }
21641
21642 // Need to make sure that EMUL * NFIELDS ≤ 8
21643 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21644 if (Fractional)
21645 return true;
21646 return Factor * LMUL <= 8;
21647}
21648
21650 Align Alignment) const {
21651 if (!Subtarget.hasVInstructions())
21652 return false;
21653
21654 // Only support fixed vectors if we know the minimum vector size.
21655 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21656 return false;
21657
21658 EVT ScalarType = DataType.getScalarType();
21659 if (!isLegalElementTypeForRVV(ScalarType))
21660 return false;
21661
21662 if (!Subtarget.enableUnalignedVectorMem() &&
21663 Alignment < ScalarType.getStoreSize())
21664 return false;
21665
21666 return true;
21667}
21668
21670 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21671 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21672 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21673 Intrinsic::riscv_seg8_load};
21674
21675/// Lower an interleaved load into a vlsegN intrinsic.
21676///
21677/// E.g. Lower an interleaved load (Factor = 2):
21678/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21679/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21680/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21681///
21682/// Into:
21683/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21684/// %ptr, i64 4)
21685/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21686/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21689 ArrayRef<unsigned> Indices, unsigned Factor) const {
21690 IRBuilder<> Builder(LI);
21691
21692 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21693 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21695 LI->getDataLayout()))
21696 return false;
21697
21698 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21699
21700 Function *VlsegNFunc =
21702 {VTy, LI->getPointerOperandType(), XLenTy});
21703
21704 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21705
21706 CallInst *VlsegN =
21707 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21708
21709 for (unsigned i = 0; i < Shuffles.size(); i++) {
21710 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21711 Shuffles[i]->replaceAllUsesWith(SubVec);
21712 }
21713
21714 return true;
21715}
21716
21718 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21719 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21720 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21721 Intrinsic::riscv_seg8_store};
21722
21723/// Lower an interleaved store into a vssegN intrinsic.
21724///
21725/// E.g. Lower an interleaved store (Factor = 3):
21726/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21727/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21728/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21729///
21730/// Into:
21731/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21732/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21733/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21734/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21735/// %ptr, i32 4)
21736///
21737/// Note that the new shufflevectors will be removed and we'll only generate one
21738/// vsseg3 instruction in CodeGen.
21740 ShuffleVectorInst *SVI,
21741 unsigned Factor) const {
21742 IRBuilder<> Builder(SI);
21743 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21744 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21745 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21746 ShuffleVTy->getNumElements() / Factor);
21747 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21748 SI->getPointerAddressSpace(),
21749 SI->getDataLayout()))
21750 return false;
21751
21752 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21753
21754 Function *VssegNFunc =
21755 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21756 {VTy, SI->getPointerOperandType(), XLenTy});
21757
21758 auto Mask = SVI->getShuffleMask();
21760
21761 for (unsigned i = 0; i < Factor; i++) {
21762 Value *Shuffle = Builder.CreateShuffleVector(
21763 SVI->getOperand(0), SVI->getOperand(1),
21764 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21765 Ops.push_back(Shuffle);
21766 }
21767 // This VL should be OK (should be executable in one vsseg instruction,
21768 // potentially under larger LMULs) because we checked that the fixed vector
21769 // type fits in isLegalInterleavedAccessType
21770 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21771 Ops.append({SI->getPointerOperand(), VL});
21772
21773 Builder.CreateCall(VssegNFunc, Ops);
21774
21775 return true;
21776}
21777
21779 LoadInst *LI) const {
21780 assert(LI->isSimple());
21781 IRBuilder<> Builder(LI);
21782
21783 // Only deinterleave2 supported at present.
21784 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21785 return false;
21786
21787 unsigned Factor = 2;
21788
21789 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21790 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21791
21792 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21794 LI->getDataLayout()))
21795 return false;
21796
21797 Function *VlsegNFunc;
21798 Value *VL;
21799 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21801
21802 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21803 VlsegNFunc = Intrinsic::getDeclaration(
21804 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21805 {ResVTy, LI->getPointerOperandType(), XLenTy});
21806 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21807 } else {
21808 static const Intrinsic::ID IntrIds[] = {
21809 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21810 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21811 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21812 Intrinsic::riscv_vlseg8};
21813
21814 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21815 {ResVTy, XLenTy});
21816 VL = Constant::getAllOnesValue(XLenTy);
21817 Ops.append(Factor, PoisonValue::get(ResVTy));
21818 }
21819
21820 Ops.append({LI->getPointerOperand(), VL});
21821
21822 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21823 DI->replaceAllUsesWith(Vlseg);
21824
21825 return true;
21826}
21827
21829 StoreInst *SI) const {
21830 assert(SI->isSimple());
21831 IRBuilder<> Builder(SI);
21832
21833 // Only interleave2 supported at present.
21834 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21835 return false;
21836
21837 unsigned Factor = 2;
21838
21839 VectorType *VTy = cast<VectorType>(II->getType());
21840 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21841
21842 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21843 SI->getPointerAddressSpace(),
21844 SI->getDataLayout()))
21845 return false;
21846
21847 Function *VssegNFunc;
21848 Value *VL;
21849 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21850
21851 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21852 VssegNFunc = Intrinsic::getDeclaration(
21853 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21854 {InVTy, SI->getPointerOperandType(), XLenTy});
21855 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21856 } else {
21857 static const Intrinsic::ID IntrIds[] = {
21858 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21859 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21860 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21861 Intrinsic::riscv_vsseg8};
21862
21863 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21864 {InVTy, XLenTy});
21865 VL = Constant::getAllOnesValue(XLenTy);
21866 }
21867
21868 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21869 SI->getPointerOperand(), VL});
21870
21871 return true;
21872}
21873
21877 const TargetInstrInfo *TII) const {
21878 assert(MBBI->isCall() && MBBI->getCFIType() &&
21879 "Invalid call instruction for a KCFI check");
21880 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21881 MBBI->getOpcode()));
21882
21883 MachineOperand &Target = MBBI->getOperand(0);
21884 Target.setIsRenamable(false);
21885
21886 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21887 .addReg(Target.getReg())
21888 .addImm(MBBI->getCFIType())
21889 .getInstr();
21890}
21891
21892#define GET_REGISTER_MATCHER
21893#include "RISCVGenAsmMatcher.inc"
21894
21897 const MachineFunction &MF) const {
21899 if (Reg == RISCV::NoRegister)
21901 if (Reg == RISCV::NoRegister)
21903 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21904 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21905 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21906 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21907 StringRef(RegName) + "\"."));
21908 return Reg;
21909}
21910
21913 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21914
21915 if (NontemporalInfo == nullptr)
21917
21918 // 1 for default value work as __RISCV_NTLH_ALL
21919 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21920 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21921 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21922 // 5 -> __RISCV_NTLH_ALL
21923 int NontemporalLevel = 5;
21924 const MDNode *RISCVNontemporalInfo =
21925 I.getMetadata("riscv-nontemporal-domain");
21926 if (RISCVNontemporalInfo != nullptr)
21927 NontemporalLevel =
21928 cast<ConstantInt>(
21929 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21930 ->getValue())
21931 ->getZExtValue();
21932
21933 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21934 "RISC-V target doesn't support this non-temporal domain.");
21935
21936 NontemporalLevel -= 2;
21938 if (NontemporalLevel & 0b1)
21939 Flags |= MONontemporalBit0;
21940 if (NontemporalLevel & 0b10)
21941 Flags |= MONontemporalBit1;
21942
21943 return Flags;
21944}
21945
21948
21949 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21951 TargetFlags |= (NodeFlags & MONontemporalBit0);
21952 TargetFlags |= (NodeFlags & MONontemporalBit1);
21953 return TargetFlags;
21954}
21955
21957 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21958 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21959}
21960
21962 if (VT.isScalableVector())
21963 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21964 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21965 return true;
21966 return Subtarget.hasStdExtZbb() &&
21967 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21968}
21969
21971 ISD::CondCode Cond) const {
21972 return isCtpopFast(VT) ? 0 : 1;
21973}
21974
21976
21977 // GISel support is in progress or complete for these opcodes.
21978 unsigned Op = Inst.getOpcode();
21979 if (Op == Instruction::Add || Op == Instruction::Sub ||
21980 Op == Instruction::And || Op == Instruction::Or ||
21981 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21982 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21983 Op == Instruction::Freeze || Op == Instruction::Store)
21984 return false;
21985
21986 if (Inst.getType()->isScalableTy())
21987 return true;
21988
21989 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21990 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21991 !isa<ReturnInst>(&Inst))
21992 return true;
21993
21994 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21995 if (AI->getAllocatedType()->isScalableTy())
21996 return true;
21997 }
21998
21999 return false;
22000}
22001
22002SDValue
22003RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22004 SelectionDAG &DAG,
22005 SmallVectorImpl<SDNode *> &Created) const {
22007 if (isIntDivCheap(N->getValueType(0), Attr))
22008 return SDValue(N, 0); // Lower SDIV as SDIV
22009
22010 // Only perform this transform if short forward branch opt is supported.
22011 if (!Subtarget.hasShortForwardBranchOpt())
22012 return SDValue();
22013 EVT VT = N->getValueType(0);
22014 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22015 return SDValue();
22016
22017 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22018 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22019 return SDValue();
22020 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22021}
22022
22023bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22024 EVT VT, const APInt &AndMask) const {
22025 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22026 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22028}
22029
22030unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22031 return Subtarget.getMinimumJumpTableEntries();
22032}
22033
22034// Handle single arg such as return value.
22035template <typename Arg>
22036void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
22037 // This lambda determines whether an array of types are constructed by
22038 // homogeneous vector types.
22039 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
22040 // First, extract the first element in the argument type.
22041 auto It = ArgList.begin();
22042 MVT FirstArgRegType = It->VT;
22043
22044 // Return if there is no return or the type needs split.
22045 if (It == ArgList.end() || It->Flags.isSplit())
22046 return false;
22047
22048 ++It;
22049
22050 // Return if this argument type contains only 1 element, or it's not a
22051 // vector type.
22052 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
22053 return false;
22054
22055 // Second, check if the following elements in this argument type are all the
22056 // same.
22057 for (; It != ArgList.end(); ++It)
22058 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
22059 return false;
22060
22061 return true;
22062 };
22063
22064 if (isHomogeneousScalableVectorType(ArgList)) {
22065 // Handle as tuple type
22066 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
22067 } else {
22068 // Handle as normal vector type
22069 bool FirstVMaskAssigned = false;
22070 for (const auto &OutArg : ArgList) {
22071 MVT RegisterVT = OutArg.VT;
22072
22073 // Skip non-RVV register type
22074 if (!RegisterVT.isVector())
22075 continue;
22076
22077 if (RegisterVT.isFixedLengthVector())
22078 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
22079
22080 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
22081 RVVArgInfos.push_back({1, RegisterVT, true});
22082 FirstVMaskAssigned = true;
22083 continue;
22084 }
22085
22086 RVVArgInfos.push_back({1, RegisterVT, false});
22087 }
22088 }
22089}
22090
22091// Handle multiple args.
22092template <>
22093void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
22094 const DataLayout &DL = MF->getDataLayout();
22095 const Function &F = MF->getFunction();
22096 LLVMContext &Context = F.getContext();
22097
22098 bool FirstVMaskAssigned = false;
22099 for (Type *Ty : TypeList) {
22100 StructType *STy = dyn_cast<StructType>(Ty);
22101 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
22102 Type *ElemTy = STy->getTypeAtIndex(0U);
22103 EVT VT = TLI->getValueType(DL, ElemTy);
22104 MVT RegisterVT =
22105 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
22106 unsigned NumRegs =
22107 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
22108
22109 RVVArgInfos.push_back(
22110 {NumRegs * STy->getNumElements(), RegisterVT, false});
22111 } else {
22112 SmallVector<EVT, 4> ValueVTs;
22113 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
22114
22115 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
22116 ++Value) {
22117 EVT VT = ValueVTs[Value];
22118 MVT RegisterVT =
22119 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
22120 unsigned NumRegs =
22121 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
22122
22123 // Skip non-RVV register type
22124 if (!RegisterVT.isVector())
22125 continue;
22126
22127 if (RegisterVT.isFixedLengthVector())
22128 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
22129
22130 if (!FirstVMaskAssigned &&
22131 RegisterVT.getVectorElementType() == MVT::i1) {
22132 RVVArgInfos.push_back({1, RegisterVT, true});
22133 FirstVMaskAssigned = true;
22134 --NumRegs;
22135 }
22136
22137 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
22138 }
22139 }
22140 }
22141}
22142
22143void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
22144 unsigned StartReg) {
22145 assert((StartReg % LMul) == 0 &&
22146 "Start register number should be multiple of lmul");
22147 const MCPhysReg *VRArrays;
22148 switch (LMul) {
22149 default:
22150 report_fatal_error("Invalid lmul");
22151 case 1:
22152 VRArrays = ArgVRs;
22153 break;
22154 case 2:
22155 VRArrays = ArgVRM2s;
22156 break;
22157 case 4:
22158 VRArrays = ArgVRM4s;
22159 break;
22160 case 8:
22161 VRArrays = ArgVRM8s;
22162 break;
22163 }
22164
22165 for (unsigned i = 0; i < NF; ++i)
22166 if (StartReg)
22167 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
22168 else
22169 AllocatedPhysRegs.push_back(MCPhysReg());
22170}
22171
22172/// This function determines if each RVV argument is passed by register, if the
22173/// argument can be assigned to a VR, then give it a specific register.
22174/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
22175void RVVArgDispatcher::compute() {
22176 uint32_t AssignedMap = 0;
22177 auto allocate = [&](const RVVArgInfo &ArgInfo) {
22178 // Allocate first vector mask argument to V0.
22179 if (ArgInfo.FirstVMask) {
22180 AllocatedPhysRegs.push_back(RISCV::V0);
22181 return;
22182 }
22183
22184 unsigned RegsNeeded = divideCeil(
22185 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
22186 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
22187 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
22188 StartReg += RegsNeeded) {
22189 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
22190 if ((AssignedMap & Map) == 0) {
22191 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
22192 AssignedMap |= Map;
22193 return;
22194 }
22195 }
22196
22197 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
22198 };
22199
22200 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
22201 allocate(RVVArgInfos[i]);
22202}
22203
22205 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
22206 return AllocatedPhysRegs[CurIdx++];
22207}
22208
22211 int JTI,
22212 SelectionDAG &DAG) const {
22213 if (Subtarget.hasStdExtZicfilp()) {
22214 // When Zicfilp enabled, we need to use software guarded branch for jump
22215 // table branch.
22216 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
22217 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
22218 Addr);
22219 }
22220 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22221}
22222
22224
22225#define GET_RISCVVIntrinsicsTable_IMPL
22226#include "RISCVGenSearchableTables.inc"
22227
22228} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1243
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1235
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1015
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1366
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1181
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1377
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:415
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:314
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1110
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1369
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:61
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
iterator_range< arg_iterator > args()
Definition: Function.h:855
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Argument * getArg(unsigned i) const
Definition: Function.h:849
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1884
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1839
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2038
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2499
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1859
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:734
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:390
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:744
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:840
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:874
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:785
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:688
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:780
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:811
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:857
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:890
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:778
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1167
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1163
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:751
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1380
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1411
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1310
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:742
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1196
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1313
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1072
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:811
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:818
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1396
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1400
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1269
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1274
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1410
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:941
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1308
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:931
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1309
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1451
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:913
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:802
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1229
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1393
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:750
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1262
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1397
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1029
@ STRICT_LROUND
Definition: ISDOpcodes.h:445
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:958
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1118
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1311
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1097
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:600
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:755
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1412
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1192
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1405
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:908
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:733
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1306
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:808
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1252
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:884
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:770
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1370
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1289
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1314
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1006
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1082
@ STRICT_LRINT
Definition: ISDOpcodes.h:447
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:826
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:605
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:916
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:764
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1413
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:950
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1304
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1025
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1305
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:864
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1223
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1249
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:650
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:448
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1303
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:897
@ STRICT_LLROUND
Definition: ISDOpcodes.h:446
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:883
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1401
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:814
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1187
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1111
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:791
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:594
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1536
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1536
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1523
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1457
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1574
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1554
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1619
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1528
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:323
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1042
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1002
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)