LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
527 if (!Subtarget.hasStdExtZfa())
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa())
553 else
555 }
556
557 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
559
560 if (Subtarget.hasStdExtDOrZdinx()) {
561 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
562
563 if (!Subtarget.is64Bit())
565
566 if (Subtarget.hasStdExtZfa()) {
567 setOperationAction(FPRndMode, MVT::f64, Legal);
569 } else {
570 if (Subtarget.is64Bit())
571 setOperationAction(FPRndMode, MVT::f64, Custom);
572
574 }
575
578 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
582 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
583 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
584 setOperationAction(FPOpToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
587 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
588 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
592 Subtarget.isSoftFPABI() ? LibCall : Custom);
595 }
596
597 if (Subtarget.is64Bit()) {
600 MVT::i32, Custom);
602 }
603
604 if (Subtarget.hasStdExtFOrZfinx()) {
606 Custom);
607
610 XLenVT, Legal);
611
612 if (RV64LegalI32 && Subtarget.is64Bit())
615 MVT::i32, Legal);
616
619 }
620
623 XLenVT, Custom);
624
626
627 if (Subtarget.is64Bit())
629
630 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
631 // Unfortunately this can't be determined just from the ISA naming string.
633 Subtarget.is64Bit() ? Legal : Custom);
635 Subtarget.is64Bit() ? Legal : Custom);
636
639 if (Subtarget.is64Bit())
641
642 if (Subtarget.hasStdExtZicbop()) {
644 }
645
646 if (Subtarget.hasStdExtA()) {
648 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
650 else
652 } else if (Subtarget.hasForcedAtomics()) {
654 } else {
656 }
657
659
661
662 if (Subtarget.hasVInstructions()) {
664
666 if (RV64LegalI32 && Subtarget.is64Bit())
668
669 // RVV intrinsics may have illegal operands.
670 // We also need to custom legalize vmv.x.s.
673 {MVT::i8, MVT::i16}, Custom);
674 if (Subtarget.is64Bit())
676 MVT::i32, Custom);
677 else
679 MVT::i64, Custom);
680
682 MVT::Other, Custom);
683
684 static const unsigned IntegerVPOps[] = {
685 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
686 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
687 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
688 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
689 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
690 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
691 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
692 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
693 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
694 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
695 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
696 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
697 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
698 ISD::VP_USUBSAT};
699
700 static const unsigned FloatingPointVPOps[] = {
701 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
702 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
703 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
704 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
705 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
706 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
707 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
708 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
709 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
710 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
711 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
712 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
713 ISD::EXPERIMENTAL_VP_SPLICE};
714
715 static const unsigned IntegerVecReduceOps[] = {
719
720 static const unsigned FloatingPointVecReduceOps[] = {
723
724 if (!Subtarget.is64Bit()) {
725 // We must custom-lower certain vXi64 operations on RV32 due to the vector
726 // element type being illegal.
728 MVT::i64, Custom);
729
730 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
731
732 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
733 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
734 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
735 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
736 MVT::i64, Custom);
737 }
738
739 for (MVT VT : BoolVecVTs) {
740 if (!isTypeLegal(VT))
741 continue;
742
744
745 // Mask VTs are custom-expanded into a series of standard nodes
749 VT, Custom);
750
752 Custom);
753
756 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
757 Expand);
758
759 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
760
763 Custom);
764
766 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
767 Custom);
768
769 // RVV has native int->float & float->int conversions where the
770 // element type sizes are within one power-of-two of each other. Any
771 // wider distances between type sizes have to be lowered as sequences
772 // which progressively narrow the gap in stages.
777 VT, Custom);
779 Custom);
780
781 // Expand all extending loads to types larger than this, and truncating
782 // stores from types larger than this.
784 setTruncStoreAction(VT, OtherVT, Expand);
786 OtherVT, Expand);
787 }
788
789 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
790 ISD::VP_TRUNCATE, ISD::VP_SETCC},
791 VT, Custom);
792
795
797
798 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
799 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
800
803 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
804 }
805
806 for (MVT VT : IntVecVTs) {
807 if (!isTypeLegal(VT))
808 continue;
809
812
813 // Vectors implement MULHS/MULHU.
815
816 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
817 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
819
821 Legal);
822
824
825 // Custom-lower extensions and truncations from/to mask types.
827 VT, Custom);
828
829 // RVV has native int->float & float->int conversions where the
830 // element type sizes are within one power-of-two of each other. Any
831 // wider distances between type sizes have to be lowered as sequences
832 // which progressively narrow the gap in stages.
837 VT, Custom);
839 Custom);
842 VT, Legal);
843
844 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
845 // nodes which truncate by one power of two at a time.
847
848 // Custom-lower insert/extract operations to simplify patterns.
850 Custom);
851
852 // Custom-lower reduction operations to set up the corresponding custom
853 // nodes' operands.
854 setOperationAction(IntegerVecReduceOps, VT, Custom);
855
856 setOperationAction(IntegerVPOps, VT, Custom);
857
859
861 VT, Custom);
862
864 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
865 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
866 VT, Custom);
867
870 VT, Custom);
871
874
876
878 setTruncStoreAction(VT, OtherVT, Expand);
880 OtherVT, Expand);
881 }
882
885
886 // Splice
888
889 if (Subtarget.hasStdExtZvkb()) {
891 setOperationAction(ISD::VP_BSWAP, VT, Custom);
892 } else {
893 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
895 }
896
897 if (Subtarget.hasStdExtZvbb()) {
899 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
900 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
901 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
902 VT, Custom);
903 } else {
904 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
906 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
907 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
908 VT, Expand);
909
910 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
911 // range of f32.
912 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
913 if (isTypeLegal(FloatVT)) {
915 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
916 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
917 VT, Custom);
918 }
919 }
920 }
921
922 // Expand various CCs to best match the RVV ISA, which natively supports UNE
923 // but no other unordered comparisons, and supports all ordered comparisons
924 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
925 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
926 // and we pattern-match those back to the "original", swapping operands once
927 // more. This way we catch both operations and both "vf" and "fv" forms with
928 // fewer patterns.
929 static const ISD::CondCode VFPCCToExpand[] = {
933 };
934
935 // TODO: support more ops.
936 static const unsigned ZvfhminPromoteOps[] = {
944
945 // TODO: support more vp ops.
946 static const unsigned ZvfhminPromoteVPOps[] = {
947 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
948 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
949 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
950 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
951 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
952 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
953 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
954 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
955 ISD::VP_FMAXIMUM};
956
957 // Sets common operation actions on RVV floating-point vector types.
958 const auto SetCommonVFPActions = [&](MVT VT) {
960 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
961 // sizes are within one power-of-two of each other. Therefore conversions
962 // between vXf16 and vXf64 must be lowered as sequences which convert via
963 // vXf32.
966 // Custom-lower insert/extract operations to simplify patterns.
968 Custom);
969 // Expand various condition codes (explained above).
970 setCondCodeAction(VFPCCToExpand, VT, Expand);
971
974
978 VT, Custom);
979
980 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
981
982 // Expand FP operations that need libcalls.
994
996
998
1000 VT, Custom);
1001
1003 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1004 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1005 VT, Custom);
1006
1009
1012 VT, Custom);
1013
1016
1018
1019 setOperationAction(FloatingPointVPOps, VT, Custom);
1020
1022 Custom);
1025 VT, Legal);
1030 VT, Custom);
1031 };
1032
1033 // Sets common extload/truncstore actions on RVV floating-point vector
1034 // types.
1035 const auto SetCommonVFPExtLoadTruncStoreActions =
1036 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1037 for (auto SmallVT : SmallerVTs) {
1038 setTruncStoreAction(VT, SmallVT, Expand);
1039 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1040 }
1041 };
1042
1043 if (Subtarget.hasVInstructionsF16()) {
1044 for (MVT VT : F16VecVTs) {
1045 if (!isTypeLegal(VT))
1046 continue;
1047 SetCommonVFPActions(VT);
1048 }
1049 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1050 for (MVT VT : F16VecVTs) {
1051 if (!isTypeLegal(VT))
1052 continue;
1055 Custom);
1056 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1057 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1058 Custom);
1061 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1062 VT, Custom);
1065 VT, Custom);
1066 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1068 // load/store
1070
1071 // Custom split nxv32f16 since nxv32f32 if not legal.
1072 if (VT == MVT::nxv32f16) {
1073 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1074 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1075 continue;
1076 }
1077 // Add more promote ops.
1078 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1079 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1080 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1081 }
1082 }
1083
1084 if (Subtarget.hasVInstructionsF32()) {
1085 for (MVT VT : F32VecVTs) {
1086 if (!isTypeLegal(VT))
1087 continue;
1088 SetCommonVFPActions(VT);
1089 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1090 }
1091 }
1092
1093 if (Subtarget.hasVInstructionsF64()) {
1094 for (MVT VT : F64VecVTs) {
1095 if (!isTypeLegal(VT))
1096 continue;
1097 SetCommonVFPActions(VT);
1098 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1099 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1100 }
1101 }
1102
1103 if (Subtarget.useRVVForFixedLengthVectors()) {
1105 if (!useRVVForFixedLengthVectorVT(VT))
1106 continue;
1107
1108 // By default everything must be expanded.
1109 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1112 setTruncStoreAction(VT, OtherVT, Expand);
1114 OtherVT, Expand);
1115 }
1116
1117 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1118 // expansion to a build_vector of 0s.
1120
1121 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1123 Custom);
1124
1126 Custom);
1127
1129 VT, Custom);
1130
1132
1134
1136
1138
1140
1142
1145 Custom);
1146
1148 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1149 Custom);
1150
1152 {
1161 },
1162 VT, Custom);
1164 Custom);
1165
1167
1168 // Operations below are different for between masks and other vectors.
1169 if (VT.getVectorElementType() == MVT::i1) {
1170 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1171 ISD::OR, ISD::XOR},
1172 VT, Custom);
1173
1174 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1175 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1176 VT, Custom);
1177
1178 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1179 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1180 continue;
1181 }
1182
1183 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1184 // it before type legalization for i64 vectors on RV32. It will then be
1185 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1186 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1187 // improvements first.
1188 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1191 }
1192
1195
1196 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1197 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1198 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1199 ISD::VP_SCATTER},
1200 VT, Custom);
1201
1205 VT, Custom);
1206
1209
1211
1212 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1213 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1215
1218 VT, Custom);
1219
1222
1225
1226 // Custom-lower reduction operations to set up the corresponding custom
1227 // nodes' operands.
1231 VT, Custom);
1232
1233 setOperationAction(IntegerVPOps, VT, Custom);
1234
1235 if (Subtarget.hasStdExtZvkb())
1237
1238 if (Subtarget.hasStdExtZvbb()) {
1241 VT, Custom);
1242 } else {
1243 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1244 // range of f32.
1245 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1246 if (isTypeLegal(FloatVT))
1249 Custom);
1250 }
1251 }
1252
1254 // There are no extending loads or truncating stores.
1255 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1256 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1257 setTruncStoreAction(VT, InnerVT, Expand);
1258 }
1259
1260 if (!useRVVForFixedLengthVectorVT(VT))
1261 continue;
1262
1263 // By default everything must be expanded.
1264 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1266
1267 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1268 // expansion to a build_vector of 0s.
1270
1271 if (VT.getVectorElementType() == MVT::f16 &&
1272 !Subtarget.hasVInstructionsF16()) {
1275 Custom);
1276 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1278 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1279 Custom);
1281 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1282 VT, Custom);
1285 VT, Custom);
1288 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1289 // Don't promote f16 vector operations to f32 if f32 vector type is
1290 // not legal.
1291 // TODO: could split the f16 vector into two vectors and do promotion.
1292 if (!isTypeLegal(F32VecVT))
1293 continue;
1294 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1295 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1296 continue;
1297 }
1298
1299 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1301 Custom);
1302
1306 VT, Custom);
1307
1310 VT, Custom);
1311
1312 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1313 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1314 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1315 ISD::VP_SCATTER},
1316 VT, Custom);
1317
1322 VT, Custom);
1323
1325
1328 VT, Custom);
1329
1330 setCondCodeAction(VFPCCToExpand, VT, Expand);
1331
1335
1337
1338 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1339
1340 setOperationAction(FloatingPointVPOps, VT, Custom);
1341
1343 Custom);
1350 VT, Custom);
1351 }
1352
1353 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1354 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1355 Custom);
1356 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1358 if (Subtarget.hasStdExtFOrZfinx())
1360 if (Subtarget.hasStdExtDOrZdinx())
1362 }
1363 }
1364
1365 if (Subtarget.hasStdExtA()) {
1367 if (RV64LegalI32 && Subtarget.is64Bit())
1369 }
1370
1371 if (Subtarget.hasForcedAtomics()) {
1372 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1378 XLenVT, LibCall);
1379 }
1380
1381 if (Subtarget.hasVendorXTHeadMemIdx()) {
1382 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1383 setIndexedLoadAction(im, MVT::i8, Legal);
1384 setIndexedStoreAction(im, MVT::i8, Legal);
1385 setIndexedLoadAction(im, MVT::i16, Legal);
1386 setIndexedStoreAction(im, MVT::i16, Legal);
1387 setIndexedLoadAction(im, MVT::i32, Legal);
1388 setIndexedStoreAction(im, MVT::i32, Legal);
1389
1390 if (Subtarget.is64Bit()) {
1391 setIndexedLoadAction(im, MVT::i64, Legal);
1392 setIndexedStoreAction(im, MVT::i64, Legal);
1393 }
1394 }
1395 }
1396
1397 // Function alignments.
1398 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1399 setMinFunctionAlignment(FunctionAlignment);
1400 // Set preferred alignments.
1403
1407 if (Subtarget.is64Bit())
1409
1410 if (Subtarget.hasStdExtFOrZfinx())
1412
1413 if (Subtarget.hasStdExtZbb())
1415
1416 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1418
1419 if (Subtarget.hasStdExtZbkb())
1421 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1423 if (Subtarget.hasStdExtFOrZfinx())
1426 if (Subtarget.hasVInstructions())
1428 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1431 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1434 if (Subtarget.hasVendorXTHeadMemPair())
1436 if (Subtarget.useRVVForFixedLengthVectors())
1438
1439 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1440 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1441
1442 // Disable strict node mutation.
1443 IsStrictFPEnabled = true;
1444}
1445
1447 LLVMContext &Context,
1448 EVT VT) const {
1449 if (!VT.isVector())
1450 return getPointerTy(DL);
1451 if (Subtarget.hasVInstructions() &&
1452 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1453 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1455}
1456
1457MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458 return Subtarget.getXLenVT();
1459}
1460
1461// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463 unsigned VF,
1464 bool IsScalable) const {
1465 if (!Subtarget.hasVInstructions())
1466 return true;
1467
1468 if (!IsScalable)
1469 return true;
1470
1471 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472 return true;
1473
1474 // Don't allow VF=1 if those types are't legal.
1475 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476 return true;
1477
1478 // VLEN=32 support is incomplete.
1479 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480 return true;
1481
1482 // The maximum VF is for the smallest element width with LMUL=8.
1483 // VF must be a power of 2.
1484 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1485 return VF > MaxVF || !isPowerOf2_32(VF);
1486}
1487
1488bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1489 return !Subtarget.hasVInstructions() ||
1490 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1491}
1492
1494 const CallInst &I,
1495 MachineFunction &MF,
1496 unsigned Intrinsic) const {
1497 auto &DL = I.getModule()->getDataLayout();
1498
1499 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500 bool IsUnitStrided, bool UsePtrVal = false) {
1502 // We can't use ptrVal if the intrinsic can access memory before the
1503 // pointer. This means we can't use it for strided or indexed intrinsics.
1504 if (UsePtrVal)
1505 Info.ptrVal = I.getArgOperand(PtrOp);
1506 else
1507 Info.fallbackAddressSpace =
1508 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1509 Type *MemTy;
1510 if (IsStore) {
1511 // Store value is the first operand.
1512 MemTy = I.getArgOperand(0)->getType();
1513 } else {
1514 // Use return type. If it's segment load, return type is a struct.
1515 MemTy = I.getType();
1516 if (MemTy->isStructTy())
1517 MemTy = MemTy->getStructElementType(0);
1518 }
1519 if (!IsUnitStrided)
1520 MemTy = MemTy->getScalarType();
1521
1522 Info.memVT = getValueType(DL, MemTy);
1523 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1525 Info.flags |=
1527 return true;
1528 };
1529
1530 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1532
1534 switch (Intrinsic) {
1535 default:
1536 return false;
1537 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545 case Intrinsic::riscv_masked_cmpxchg_i32:
1547 Info.memVT = MVT::i32;
1548 Info.ptrVal = I.getArgOperand(0);
1549 Info.offset = 0;
1550 Info.align = Align(4);
1553 return true;
1554 case Intrinsic::riscv_masked_strided_load:
1555 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1556 /*IsUnitStrided*/ false);
1557 case Intrinsic::riscv_masked_strided_store:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1559 /*IsUnitStrided*/ false);
1560 case Intrinsic::riscv_seg2_load:
1561 case Intrinsic::riscv_seg3_load:
1562 case Intrinsic::riscv_seg4_load:
1563 case Intrinsic::riscv_seg5_load:
1564 case Intrinsic::riscv_seg6_load:
1565 case Intrinsic::riscv_seg7_load:
1566 case Intrinsic::riscv_seg8_load:
1567 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1568 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1569 case Intrinsic::riscv_seg2_store:
1570 case Intrinsic::riscv_seg3_store:
1571 case Intrinsic::riscv_seg4_store:
1572 case Intrinsic::riscv_seg5_store:
1573 case Intrinsic::riscv_seg6_store:
1574 case Intrinsic::riscv_seg7_store:
1575 case Intrinsic::riscv_seg8_store:
1576 // Operands are (vec, ..., vec, ptr, vl)
1577 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1578 /*IsStore*/ true,
1579 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1580 case Intrinsic::riscv_vle:
1581 case Intrinsic::riscv_vle_mask:
1582 case Intrinsic::riscv_vleff:
1583 case Intrinsic::riscv_vleff_mask:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1585 /*IsStore*/ false,
1586 /*IsUnitStrided*/ true,
1587 /*UsePtrVal*/ true);
1588 case Intrinsic::riscv_vse:
1589 case Intrinsic::riscv_vse_mask:
1590 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1591 /*IsStore*/ true,
1592 /*IsUnitStrided*/ true,
1593 /*UsePtrVal*/ true);
1594 case Intrinsic::riscv_vlse:
1595 case Intrinsic::riscv_vlse_mask:
1596 case Intrinsic::riscv_vloxei:
1597 case Intrinsic::riscv_vloxei_mask:
1598 case Intrinsic::riscv_vluxei:
1599 case Intrinsic::riscv_vluxei_mask:
1600 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1601 /*IsStore*/ false,
1602 /*IsUnitStrided*/ false);
1603 case Intrinsic::riscv_vsse:
1604 case Intrinsic::riscv_vsse_mask:
1605 case Intrinsic::riscv_vsoxei:
1606 case Intrinsic::riscv_vsoxei_mask:
1607 case Intrinsic::riscv_vsuxei:
1608 case Intrinsic::riscv_vsuxei_mask:
1609 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1610 /*IsStore*/ true,
1611 /*IsUnitStrided*/ false);
1612 case Intrinsic::riscv_vlseg2:
1613 case Intrinsic::riscv_vlseg3:
1614 case Intrinsic::riscv_vlseg4:
1615 case Intrinsic::riscv_vlseg5:
1616 case Intrinsic::riscv_vlseg6:
1617 case Intrinsic::riscv_vlseg7:
1618 case Intrinsic::riscv_vlseg8:
1619 case Intrinsic::riscv_vlseg2ff:
1620 case Intrinsic::riscv_vlseg3ff:
1621 case Intrinsic::riscv_vlseg4ff:
1622 case Intrinsic::riscv_vlseg5ff:
1623 case Intrinsic::riscv_vlseg6ff:
1624 case Intrinsic::riscv_vlseg7ff:
1625 case Intrinsic::riscv_vlseg8ff:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1627 /*IsStore*/ false,
1628 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1629 case Intrinsic::riscv_vlseg2_mask:
1630 case Intrinsic::riscv_vlseg3_mask:
1631 case Intrinsic::riscv_vlseg4_mask:
1632 case Intrinsic::riscv_vlseg5_mask:
1633 case Intrinsic::riscv_vlseg6_mask:
1634 case Intrinsic::riscv_vlseg7_mask:
1635 case Intrinsic::riscv_vlseg8_mask:
1636 case Intrinsic::riscv_vlseg2ff_mask:
1637 case Intrinsic::riscv_vlseg3ff_mask:
1638 case Intrinsic::riscv_vlseg4ff_mask:
1639 case Intrinsic::riscv_vlseg5ff_mask:
1640 case Intrinsic::riscv_vlseg6ff_mask:
1641 case Intrinsic::riscv_vlseg7ff_mask:
1642 case Intrinsic::riscv_vlseg8ff_mask:
1643 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1644 /*IsStore*/ false,
1645 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1646 case Intrinsic::riscv_vlsseg2:
1647 case Intrinsic::riscv_vlsseg3:
1648 case Intrinsic::riscv_vlsseg4:
1649 case Intrinsic::riscv_vlsseg5:
1650 case Intrinsic::riscv_vlsseg6:
1651 case Intrinsic::riscv_vlsseg7:
1652 case Intrinsic::riscv_vlsseg8:
1653 case Intrinsic::riscv_vloxseg2:
1654 case Intrinsic::riscv_vloxseg3:
1655 case Intrinsic::riscv_vloxseg4:
1656 case Intrinsic::riscv_vloxseg5:
1657 case Intrinsic::riscv_vloxseg6:
1658 case Intrinsic::riscv_vloxseg7:
1659 case Intrinsic::riscv_vloxseg8:
1660 case Intrinsic::riscv_vluxseg2:
1661 case Intrinsic::riscv_vluxseg3:
1662 case Intrinsic::riscv_vluxseg4:
1663 case Intrinsic::riscv_vluxseg5:
1664 case Intrinsic::riscv_vluxseg6:
1665 case Intrinsic::riscv_vluxseg7:
1666 case Intrinsic::riscv_vluxseg8:
1667 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1668 /*IsStore*/ false,
1669 /*IsUnitStrided*/ false);
1670 case Intrinsic::riscv_vlsseg2_mask:
1671 case Intrinsic::riscv_vlsseg3_mask:
1672 case Intrinsic::riscv_vlsseg4_mask:
1673 case Intrinsic::riscv_vlsseg5_mask:
1674 case Intrinsic::riscv_vlsseg6_mask:
1675 case Intrinsic::riscv_vlsseg7_mask:
1676 case Intrinsic::riscv_vlsseg8_mask:
1677 case Intrinsic::riscv_vloxseg2_mask:
1678 case Intrinsic::riscv_vloxseg3_mask:
1679 case Intrinsic::riscv_vloxseg4_mask:
1680 case Intrinsic::riscv_vloxseg5_mask:
1681 case Intrinsic::riscv_vloxseg6_mask:
1682 case Intrinsic::riscv_vloxseg7_mask:
1683 case Intrinsic::riscv_vloxseg8_mask:
1684 case Intrinsic::riscv_vluxseg2_mask:
1685 case Intrinsic::riscv_vluxseg3_mask:
1686 case Intrinsic::riscv_vluxseg4_mask:
1687 case Intrinsic::riscv_vluxseg5_mask:
1688 case Intrinsic::riscv_vluxseg6_mask:
1689 case Intrinsic::riscv_vluxseg7_mask:
1690 case Intrinsic::riscv_vluxseg8_mask:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1692 /*IsStore*/ false,
1693 /*IsUnitStrided*/ false);
1694 case Intrinsic::riscv_vsseg2:
1695 case Intrinsic::riscv_vsseg3:
1696 case Intrinsic::riscv_vsseg4:
1697 case Intrinsic::riscv_vsseg5:
1698 case Intrinsic::riscv_vsseg6:
1699 case Intrinsic::riscv_vsseg7:
1700 case Intrinsic::riscv_vsseg8:
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false);
1704 case Intrinsic::riscv_vsseg2_mask:
1705 case Intrinsic::riscv_vsseg3_mask:
1706 case Intrinsic::riscv_vsseg4_mask:
1707 case Intrinsic::riscv_vsseg5_mask:
1708 case Intrinsic::riscv_vsseg6_mask:
1709 case Intrinsic::riscv_vsseg7_mask:
1710 case Intrinsic::riscv_vsseg8_mask:
1711 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1712 /*IsStore*/ true,
1713 /*IsUnitStrided*/ false);
1714 case Intrinsic::riscv_vssseg2:
1715 case Intrinsic::riscv_vssseg3:
1716 case Intrinsic::riscv_vssseg4:
1717 case Intrinsic::riscv_vssseg5:
1718 case Intrinsic::riscv_vssseg6:
1719 case Intrinsic::riscv_vssseg7:
1720 case Intrinsic::riscv_vssseg8:
1721 case Intrinsic::riscv_vsoxseg2:
1722 case Intrinsic::riscv_vsoxseg3:
1723 case Intrinsic::riscv_vsoxseg4:
1724 case Intrinsic::riscv_vsoxseg5:
1725 case Intrinsic::riscv_vsoxseg6:
1726 case Intrinsic::riscv_vsoxseg7:
1727 case Intrinsic::riscv_vsoxseg8:
1728 case Intrinsic::riscv_vsuxseg2:
1729 case Intrinsic::riscv_vsuxseg3:
1730 case Intrinsic::riscv_vsuxseg4:
1731 case Intrinsic::riscv_vsuxseg5:
1732 case Intrinsic::riscv_vsuxseg6:
1733 case Intrinsic::riscv_vsuxseg7:
1734 case Intrinsic::riscv_vsuxseg8:
1735 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1736 /*IsStore*/ true,
1737 /*IsUnitStrided*/ false);
1738 case Intrinsic::riscv_vssseg2_mask:
1739 case Intrinsic::riscv_vssseg3_mask:
1740 case Intrinsic::riscv_vssseg4_mask:
1741 case Intrinsic::riscv_vssseg5_mask:
1742 case Intrinsic::riscv_vssseg6_mask:
1743 case Intrinsic::riscv_vssseg7_mask:
1744 case Intrinsic::riscv_vssseg8_mask:
1745 case Intrinsic::riscv_vsoxseg2_mask:
1746 case Intrinsic::riscv_vsoxseg3_mask:
1747 case Intrinsic::riscv_vsoxseg4_mask:
1748 case Intrinsic::riscv_vsoxseg5_mask:
1749 case Intrinsic::riscv_vsoxseg6_mask:
1750 case Intrinsic::riscv_vsoxseg7_mask:
1751 case Intrinsic::riscv_vsoxseg8_mask:
1752 case Intrinsic::riscv_vsuxseg2_mask:
1753 case Intrinsic::riscv_vsuxseg3_mask:
1754 case Intrinsic::riscv_vsuxseg4_mask:
1755 case Intrinsic::riscv_vsuxseg5_mask:
1756 case Intrinsic::riscv_vsuxseg6_mask:
1757 case Intrinsic::riscv_vsuxseg7_mask:
1758 case Intrinsic::riscv_vsuxseg8_mask:
1759 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1760 /*IsStore*/ true,
1761 /*IsUnitStrided*/ false);
1762 }
1763}
1764
1766 const AddrMode &AM, Type *Ty,
1767 unsigned AS,
1768 Instruction *I) const {
1769 // No global is ever allowed as a base.
1770 if (AM.BaseGV)
1771 return false;
1772
1773 // RVV instructions only support register addressing.
1774 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1775 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1776
1777 // Require a 12-bit signed offset.
1778 if (!isInt<12>(AM.BaseOffs))
1779 return false;
1780
1781 switch (AM.Scale) {
1782 case 0: // "r+i" or just "i", depending on HasBaseReg.
1783 break;
1784 case 1:
1785 if (!AM.HasBaseReg) // allow "r+i".
1786 break;
1787 return false; // disallow "r+r" or "r+r+i".
1788 default:
1789 return false;
1790 }
1791
1792 return true;
1793}
1794
1796 return isInt<12>(Imm);
1797}
1798
1800 return isInt<12>(Imm);
1801}
1802
1803// On RV32, 64-bit integers are split into their high and low parts and held
1804// in two different registers, so the trunc is free since the low register can
1805// just be used.
1806// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1807// isTruncateFree?
1809 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1810 return false;
1811 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1812 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1813 return (SrcBits == 64 && DestBits == 32);
1814}
1815
1817 // We consider i64->i32 free on RV64 since we have good selection of W
1818 // instructions that make promoting operations back to i64 free in many cases.
1819 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1820 !DstVT.isInteger())
1821 return false;
1822 unsigned SrcBits = SrcVT.getSizeInBits();
1823 unsigned DestBits = DstVT.getSizeInBits();
1824 return (SrcBits == 64 && DestBits == 32);
1825}
1826
1828 // Zexts are free if they can be combined with a load.
1829 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1830 // poorly with type legalization of compares preferring sext.
1831 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1832 EVT MemVT = LD->getMemoryVT();
1833 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1834 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1835 LD->getExtensionType() == ISD::ZEXTLOAD))
1836 return true;
1837 }
1838
1839 return TargetLowering::isZExtFree(Val, VT2);
1840}
1841
1843 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1844}
1845
1847 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1848}
1849
1851 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1852}
1853
1855 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1856 Subtarget.hasVendorXCVbitmanip();
1857}
1858
1860 const Instruction &AndI) const {
1861 // We expect to be able to match a bit extraction instruction if the Zbs
1862 // extension is supported and the mask is a power of two. However, we
1863 // conservatively return false if the mask would fit in an ANDI instruction,
1864 // on the basis that it's possible the sinking+duplication of the AND in
1865 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1866 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1867 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1868 return false;
1869 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1870 if (!Mask)
1871 return false;
1872 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1873}
1874
1876 EVT VT = Y.getValueType();
1877
1878 // FIXME: Support vectors once we have tests.
1879 if (VT.isVector())
1880 return false;
1881
1882 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1883 !isa<ConstantSDNode>(Y);
1884}
1885
1887 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1888 if (Subtarget.hasStdExtZbs())
1889 return X.getValueType().isScalarInteger();
1890 auto *C = dyn_cast<ConstantSDNode>(Y);
1891 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1892 if (Subtarget.hasVendorXTHeadBs())
1893 return C != nullptr;
1894 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1895 return C && C->getAPIntValue().ule(10);
1896}
1897
1899 EVT VT) const {
1900 // Only enable for rvv.
1901 if (!VT.isVector() || !Subtarget.hasVInstructions())
1902 return false;
1903
1904 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1905 return false;
1906
1907 return true;
1908}
1909
1911 Type *Ty) const {
1912 assert(Ty->isIntegerTy());
1913
1914 unsigned BitSize = Ty->getIntegerBitWidth();
1915 if (BitSize > Subtarget.getXLen())
1916 return false;
1917
1918 // Fast path, assume 32-bit immediates are cheap.
1919 int64_t Val = Imm.getSExtValue();
1920 if (isInt<32>(Val))
1921 return true;
1922
1923 // A constant pool entry may be more aligned thant he load we're trying to
1924 // replace. If we don't support unaligned scalar mem, prefer the constant
1925 // pool.
1926 // TODO: Can the caller pass down the alignment?
1927 if (!Subtarget.enableUnalignedScalarMem())
1928 return true;
1929
1930 // Prefer to keep the load if it would require many instructions.
1931 // This uses the same threshold we use for constant pools but doesn't
1932 // check useConstantPoolForLargeInts.
1933 // TODO: Should we keep the load only when we're definitely going to emit a
1934 // constant pool?
1935
1937 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1938}
1939
1943 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1944 SelectionDAG &DAG) const {
1945 // One interesting pattern that we'd want to form is 'bit extract':
1946 // ((1 >> Y) & 1) ==/!= 0
1947 // But we also need to be careful not to try to reverse that fold.
1948
1949 // Is this '((1 >> Y) & 1)'?
1950 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1951 return false; // Keep the 'bit extract' pattern.
1952
1953 // Will this be '((1 >> Y) & 1)' after the transform?
1954 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1955 return true; // Do form the 'bit extract' pattern.
1956
1957 // If 'X' is a constant, and we transform, then we will immediately
1958 // try to undo the fold, thus causing endless combine loop.
1959 // So only do the transform if X is not a constant. This matches the default
1960 // implementation of this function.
1961 return !XC;
1962}
1963
1964bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1965 switch (Opcode) {
1966 case Instruction::Add:
1967 case Instruction::Sub:
1968 case Instruction::Mul:
1969 case Instruction::And:
1970 case Instruction::Or:
1971 case Instruction::Xor:
1972 case Instruction::FAdd:
1973 case Instruction::FSub:
1974 case Instruction::FMul:
1975 case Instruction::FDiv:
1976 case Instruction::ICmp:
1977 case Instruction::FCmp:
1978 return true;
1979 case Instruction::Shl:
1980 case Instruction::LShr:
1981 case Instruction::AShr:
1982 case Instruction::UDiv:
1983 case Instruction::SDiv:
1984 case Instruction::URem:
1985 case Instruction::SRem:
1986 return Operand == 1;
1987 default:
1988 return false;
1989 }
1990}
1991
1992
1994 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1995 return false;
1996
1997 if (canSplatOperand(I->getOpcode(), Operand))
1998 return true;
1999
2000 auto *II = dyn_cast<IntrinsicInst>(I);
2001 if (!II)
2002 return false;
2003
2004 switch (II->getIntrinsicID()) {
2005 case Intrinsic::fma:
2006 case Intrinsic::vp_fma:
2007 return Operand == 0 || Operand == 1;
2008 case Intrinsic::vp_shl:
2009 case Intrinsic::vp_lshr:
2010 case Intrinsic::vp_ashr:
2011 case Intrinsic::vp_udiv:
2012 case Intrinsic::vp_sdiv:
2013 case Intrinsic::vp_urem:
2014 case Intrinsic::vp_srem:
2015 case Intrinsic::ssub_sat:
2016 case Intrinsic::vp_ssub_sat:
2017 case Intrinsic::usub_sat:
2018 case Intrinsic::vp_usub_sat:
2019 return Operand == 1;
2020 // These intrinsics are commutative.
2021 case Intrinsic::vp_add:
2022 case Intrinsic::vp_mul:
2023 case Intrinsic::vp_and:
2024 case Intrinsic::vp_or:
2025 case Intrinsic::vp_xor:
2026 case Intrinsic::vp_fadd:
2027 case Intrinsic::vp_fmul:
2028 case Intrinsic::vp_icmp:
2029 case Intrinsic::vp_fcmp:
2030 case Intrinsic::smin:
2031 case Intrinsic::vp_smin:
2032 case Intrinsic::umin:
2033 case Intrinsic::vp_umin:
2034 case Intrinsic::smax:
2035 case Intrinsic::vp_smax:
2036 case Intrinsic::umax:
2037 case Intrinsic::vp_umax:
2038 case Intrinsic::sadd_sat:
2039 case Intrinsic::vp_sadd_sat:
2040 case Intrinsic::uadd_sat:
2041 case Intrinsic::vp_uadd_sat:
2042 // These intrinsics have 'vr' versions.
2043 case Intrinsic::vp_sub:
2044 case Intrinsic::vp_fsub:
2045 case Intrinsic::vp_fdiv:
2046 return Operand == 0 || Operand == 1;
2047 default:
2048 return false;
2049 }
2050}
2051
2052/// Check if sinking \p I's operands to I's basic block is profitable, because
2053/// the operands can be folded into a target instruction, e.g.
2054/// splats of scalars can fold into vector instructions.
2056 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2057 using namespace llvm::PatternMatch;
2058
2059 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2060 return false;
2061
2062 // Don't sink splat operands if the target prefers it. Some targets requires
2063 // S2V transfer buffers and we can run out of them copying the same value
2064 // repeatedly.
2065 // FIXME: It could still be worth doing if it would improve vector register
2066 // pressure and prevent a vector spill.
2067 if (!Subtarget.sinkSplatOperands())
2068 return false;
2069
2070 for (auto OpIdx : enumerate(I->operands())) {
2071 if (!canSplatOperand(I, OpIdx.index()))
2072 continue;
2073
2074 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2075 // Make sure we are not already sinking this operand
2076 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2077 continue;
2078
2079 // We are looking for a splat that can be sunk.
2081 m_Undef(), m_ZeroMask())))
2082 continue;
2083
2084 // Don't sink i1 splats.
2085 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2086 continue;
2087
2088 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2089 // and vector registers
2090 for (Use &U : Op->uses()) {
2091 Instruction *Insn = cast<Instruction>(U.getUser());
2092 if (!canSplatOperand(Insn, U.getOperandNo()))
2093 return false;
2094 }
2095
2096 Ops.push_back(&Op->getOperandUse(0));
2097 Ops.push_back(&OpIdx.value());
2098 }
2099 return true;
2100}
2101
2103 unsigned Opc = VecOp.getOpcode();
2104
2105 // Assume target opcodes can't be scalarized.
2106 // TODO - do we have any exceptions?
2107 if (Opc >= ISD::BUILTIN_OP_END)
2108 return false;
2109
2110 // If the vector op is not supported, try to convert to scalar.
2111 EVT VecVT = VecOp.getValueType();
2112 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2113 return true;
2114
2115 // If the vector op is supported, but the scalar op is not, the transform may
2116 // not be worthwhile.
2117 // Permit a vector binary operation can be converted to scalar binary
2118 // operation which is custom lowered with illegal type.
2119 EVT ScalarVT = VecVT.getScalarType();
2120 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2121 isOperationCustom(Opc, ScalarVT);
2122}
2123
2125 const GlobalAddressSDNode *GA) const {
2126 // In order to maximise the opportunity for common subexpression elimination,
2127 // keep a separate ADD node for the global address offset instead of folding
2128 // it in the global address node. Later peephole optimisations may choose to
2129 // fold it back in when profitable.
2130 return false;
2131}
2132
2133// Return one of the followings:
2134// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2135// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2136// positive counterpart, which will be materialized from the first returned
2137// element. The second returned element indicated that there should be a FNEG
2138// followed.
2139// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2140std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2141 EVT VT) const {
2142 if (!Subtarget.hasStdExtZfa())
2143 return std::make_pair(-1, false);
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return std::make_pair(-1, false);
2157
2159 if (Index < 0 && Imm.isNegative())
2160 // Try the combination of its positive counterpart + FNEG.
2161 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2162 else
2163 return std::make_pair(Index, false);
2164}
2165
2167 bool ForCodeSize) const {
2168 bool IsLegalVT = false;
2169 if (VT == MVT::f16)
2170 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171 else if (VT == MVT::f32)
2172 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173 else if (VT == MVT::f64)
2174 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175 else if (VT == MVT::bf16)
2176 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178 if (!IsLegalVT)
2179 return false;
2180
2181 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2182 return true;
2183
2184 // Cannot create a 64 bit floating-point immediate value for rv32.
2185 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186 // td can handle +0.0 or -0.0 already.
2187 // -0.0 can be created by fmv + fneg.
2188 return Imm.isZero();
2189 }
2190
2191 // Special case: fmv + fneg
2192 if (Imm.isNegZero())
2193 return true;
2194
2195 // Building an integer and then converting requires a fmv at the end of
2196 // the integer sequence.
2197 const int Cost =
2198 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2199 Subtarget);
2200 return Cost <= FPImmCost;
2201}
2202
2203// TODO: This is very conservative.
2205 unsigned Index) const {
2207 return false;
2208
2209 // Only support extracting a fixed from a fixed vector for now.
2210 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2211 return false;
2212
2213 EVT EltVT = ResVT.getVectorElementType();
2214 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2215
2216 // The smallest type we can slide is i8.
2217 // TODO: We can extract index 0 from a mask vector without a slide.
2218 if (EltVT == MVT::i1)
2219 return false;
2220
2221 unsigned ResElts = ResVT.getVectorNumElements();
2222 unsigned SrcElts = SrcVT.getVectorNumElements();
2223
2224 unsigned MinVLen = Subtarget.getRealMinVLen();
2225 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2226
2227 // If we're extracting only data from the first VLEN bits of the source
2228 // then we can always do this with an m1 vslidedown.vx. Restricting the
2229 // Index ensures we can use a vslidedown.vi.
2230 // TODO: We can generalize this when the exact VLEN is known.
2231 if (Index + ResElts <= MinVLMAX && Index < 31)
2232 return true;
2233
2234 // Convervatively only handle extracting half of a vector.
2235 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2236 // a cheap extract. However, this case is important in practice for
2237 // shuffled extracts of longer vectors. How resolve?
2238 if ((ResElts * 2) != SrcElts)
2239 return false;
2240
2241 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2242 // cheap.
2243 if (Index >= 32)
2244 return false;
2245
2246 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2247 // the upper half of a vector until we have more test coverage.
2248 return Index == 0 || Index == ResElts;
2249}
2250
2253 EVT VT) const {
2254 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2255 // We might still end up using a GPR but that will be decided based on ABI.
2256 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2257 !Subtarget.hasStdExtZfhminOrZhinxmin())
2258 return MVT::f32;
2259
2261
2262 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2263 return MVT::i64;
2264
2265 return PartVT;
2266}
2267
2270 EVT VT) const {
2271 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2272 // We might still end up using a GPR but that will be decided based on ABI.
2273 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2274 !Subtarget.hasStdExtZfhminOrZhinxmin())
2275 return 1;
2276
2278}
2279
2281 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2282 unsigned &NumIntermediates, MVT &RegisterVT) const {
2284 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2285
2286 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2287 IntermediateVT = MVT::i64;
2288
2289 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2290 RegisterVT = MVT::i64;
2291
2292 return NumRegs;
2293}
2294
2295// Changes the condition code and swaps operands if necessary, so the SetCC
2296// operation matches one of the comparisons supported directly by branches
2297// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2298// with 1/-1.
2299static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2300 ISD::CondCode &CC, SelectionDAG &DAG) {
2301 // If this is a single bit test that can't be handled by ANDI, shift the
2302 // bit to be tested to the MSB and perform a signed compare with 0.
2303 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2304 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2305 isa<ConstantSDNode>(LHS.getOperand(1))) {
2306 uint64_t Mask = LHS.getConstantOperandVal(1);
2307 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2308 unsigned ShAmt = 0;
2309 if (isPowerOf2_64(Mask)) {
2311 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2312 } else {
2313 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2314 }
2315
2316 LHS = LHS.getOperand(0);
2317 if (ShAmt != 0)
2318 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2319 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2320 return;
2321 }
2322 }
2323
2324 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2325 int64_t C = RHSC->getSExtValue();
2326 switch (CC) {
2327 default: break;
2328 case ISD::SETGT:
2329 // Convert X > -1 to X >= 0.
2330 if (C == -1) {
2331 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2332 CC = ISD::SETGE;
2333 return;
2334 }
2335 break;
2336 case ISD::SETLT:
2337 // Convert X < 1 to 0 >= X.
2338 if (C == 1) {
2339 RHS = LHS;
2340 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2341 CC = ISD::SETGE;
2342 return;
2343 }
2344 break;
2345 }
2346 }
2347
2348 switch (CC) {
2349 default:
2350 break;
2351 case ISD::SETGT:
2352 case ISD::SETLE:
2353 case ISD::SETUGT:
2354 case ISD::SETULE:
2356 std::swap(LHS, RHS);
2357 break;
2358 }
2359}
2360
2362 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2363 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2364 if (VT.getVectorElementType() == MVT::i1)
2365 KnownSize *= 8;
2366
2367 switch (KnownSize) {
2368 default:
2369 llvm_unreachable("Invalid LMUL.");
2370 case 8:
2372 case 16:
2374 case 32:
2376 case 64:
2378 case 128:
2380 case 256:
2382 case 512:
2384 }
2385}
2386
2388 switch (LMul) {
2389 default:
2390 llvm_unreachable("Invalid LMUL.");
2395 return RISCV::VRRegClassID;
2397 return RISCV::VRM2RegClassID;
2399 return RISCV::VRM4RegClassID;
2401 return RISCV::VRM8RegClassID;
2402 }
2403}
2404
2406 RISCVII::VLMUL LMUL = getLMUL(VT);
2407 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2408 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2409 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2410 LMUL == RISCVII::VLMUL::LMUL_1) {
2411 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2412 "Unexpected subreg numbering");
2413 return RISCV::sub_vrm1_0 + Index;
2414 }
2415 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2416 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2417 "Unexpected subreg numbering");
2418 return RISCV::sub_vrm2_0 + Index;
2419 }
2420 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2421 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2422 "Unexpected subreg numbering");
2423 return RISCV::sub_vrm4_0 + Index;
2424 }
2425 llvm_unreachable("Invalid vector type.");
2426}
2427
2429 if (VT.getVectorElementType() == MVT::i1)
2430 return RISCV::VRRegClassID;
2431 return getRegClassIDForLMUL(getLMUL(VT));
2432}
2433
2434// Attempt to decompose a subvector insert/extract between VecVT and
2435// SubVecVT via subregister indices. Returns the subregister index that
2436// can perform the subvector insert/extract with the given element index, as
2437// well as the index corresponding to any leftover subvectors that must be
2438// further inserted/extracted within the register class for SubVecVT.
2439std::pair<unsigned, unsigned>
2441 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2442 const RISCVRegisterInfo *TRI) {
2443 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2444 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2445 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2446 "Register classes not ordered");
2447 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2448 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2449 // Try to compose a subregister index that takes us from the incoming
2450 // LMUL>1 register class down to the outgoing one. At each step we half
2451 // the LMUL:
2452 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2453 // Note that this is not guaranteed to find a subregister index, such as
2454 // when we are extracting from one VR type to another.
2455 unsigned SubRegIdx = RISCV::NoSubRegister;
2456 for (const unsigned RCID :
2457 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2458 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2459 VecVT = VecVT.getHalfNumVectorElementsVT();
2460 bool IsHi =
2461 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2462 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2463 getSubregIndexByMVT(VecVT, IsHi));
2464 if (IsHi)
2465 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2466 }
2467 return {SubRegIdx, InsertExtractIdx};
2468}
2469
2470// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2471// stores for those types.
2472bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2473 return !Subtarget.useRVVForFixedLengthVectors() ||
2474 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2475}
2476
2478 if (!ScalarTy.isSimple())
2479 return false;
2480 switch (ScalarTy.getSimpleVT().SimpleTy) {
2481 case MVT::iPTR:
2482 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2483 case MVT::i8:
2484 case MVT::i16:
2485 case MVT::i32:
2486 return true;
2487 case MVT::i64:
2488 return Subtarget.hasVInstructionsI64();
2489 case MVT::f16:
2490 return Subtarget.hasVInstructionsF16();
2491 case MVT::f32:
2492 return Subtarget.hasVInstructionsF32();
2493 case MVT::f64:
2494 return Subtarget.hasVInstructionsF64();
2495 default:
2496 return false;
2497 }
2498}
2499
2500
2501unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2502 return NumRepeatedDivisors;
2503}
2504
2506 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2507 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2508 "Unexpected opcode");
2509 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2510 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2512 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2513 if (!II)
2514 return SDValue();
2515 return Op.getOperand(II->VLOperand + 1 + HasChain);
2516}
2517
2519 const RISCVSubtarget &Subtarget) {
2520 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2521 if (!Subtarget.useRVVForFixedLengthVectors())
2522 return false;
2523
2524 // We only support a set of vector types with a consistent maximum fixed size
2525 // across all supported vector element types to avoid legalization issues.
2526 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2527 // fixed-length vector type we support is 1024 bytes.
2528 if (VT.getFixedSizeInBits() > 1024 * 8)
2529 return false;
2530
2531 unsigned MinVLen = Subtarget.getRealMinVLen();
2532
2533 MVT EltVT = VT.getVectorElementType();
2534
2535 // Don't use RVV for vectors we cannot scalarize if required.
2536 switch (EltVT.SimpleTy) {
2537 // i1 is supported but has different rules.
2538 default:
2539 return false;
2540 case MVT::i1:
2541 // Masks can only use a single register.
2542 if (VT.getVectorNumElements() > MinVLen)
2543 return false;
2544 MinVLen /= 8;
2545 break;
2546 case MVT::i8:
2547 case MVT::i16:
2548 case MVT::i32:
2549 break;
2550 case MVT::i64:
2551 if (!Subtarget.hasVInstructionsI64())
2552 return false;
2553 break;
2554 case MVT::f16:
2555 if (!Subtarget.hasVInstructionsF16Minimal())
2556 return false;
2557 break;
2558 case MVT::f32:
2559 if (!Subtarget.hasVInstructionsF32())
2560 return false;
2561 break;
2562 case MVT::f64:
2563 if (!Subtarget.hasVInstructionsF64())
2564 return false;
2565 break;
2566 }
2567
2568 // Reject elements larger than ELEN.
2569 if (EltVT.getSizeInBits() > Subtarget.getELen())
2570 return false;
2571
2572 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2573 // Don't use RVV for types that don't fit.
2574 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2575 return false;
2576
2577 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2578 // the base fixed length RVV support in place.
2579 if (!VT.isPow2VectorType())
2580 return false;
2581
2582 return true;
2583}
2584
2585bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2586 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2587}
2588
2589// Return the largest legal scalable vector type that matches VT's element type.
2591 const RISCVSubtarget &Subtarget) {
2592 // This may be called before legal types are setup.
2593 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2594 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2595 "Expected legal fixed length vector!");
2596
2597 unsigned MinVLen = Subtarget.getRealMinVLen();
2598 unsigned MaxELen = Subtarget.getELen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601 switch (EltVT.SimpleTy) {
2602 default:
2603 llvm_unreachable("unexpected element type for RVV container");
2604 case MVT::i1:
2605 case MVT::i8:
2606 case MVT::i16:
2607 case MVT::i32:
2608 case MVT::i64:
2609 case MVT::f16:
2610 case MVT::f32:
2611 case MVT::f64: {
2612 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2613 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2614 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2615 unsigned NumElts =
2617 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2618 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2619 return MVT::getScalableVectorVT(EltVT, NumElts);
2620 }
2621 }
2622}
2623
2625 const RISCVSubtarget &Subtarget) {
2627 Subtarget);
2628}
2629
2631 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2632}
2633
2634// Grow V to consume an entire RVV register.
2636 const RISCVSubtarget &Subtarget) {
2637 assert(VT.isScalableVector() &&
2638 "Expected to convert into a scalable vector!");
2639 assert(V.getValueType().isFixedLengthVector() &&
2640 "Expected a fixed length vector operand!");
2641 SDLoc DL(V);
2642 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2643 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2644}
2645
2646// Shrink V so it's just big enough to maintain a VT's worth of data.
2648 const RISCVSubtarget &Subtarget) {
2650 "Expected to convert into a fixed length vector!");
2651 assert(V.getValueType().isScalableVector() &&
2652 "Expected a scalable vector operand!");
2653 SDLoc DL(V);
2654 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2655 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2656}
2657
2658/// Return the type of the mask type suitable for masking the provided
2659/// vector type. This is simply an i1 element type vector of the same
2660/// (possibly scalable) length.
2661static MVT getMaskTypeFor(MVT VecVT) {
2662 assert(VecVT.isVector());
2664 return MVT::getVectorVT(MVT::i1, EC);
2665}
2666
2667/// Creates an all ones mask suitable for masking a vector of type VecTy with
2668/// vector length VL. .
2669static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2670 SelectionDAG &DAG) {
2671 MVT MaskVT = getMaskTypeFor(VecVT);
2672 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2673}
2674
2675static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2676 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2677 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2678 // canonicalize the representation. InsertVSETVLI will pick the immediate
2679 // encoding later if profitable.
2680 const auto [MinVLMAX, MaxVLMAX] =
2681 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2682 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2683 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2684
2685 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2686}
2687
2688static std::pair<SDValue, SDValue>
2690 const RISCVSubtarget &Subtarget) {
2691 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2692 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2693 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2694 return {Mask, VL};
2695}
2696
2697static std::pair<SDValue, SDValue>
2698getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2699 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2700 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2701 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2702 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2703 return {Mask, VL};
2704}
2705
2706// Gets the two common "VL" operands: an all-ones mask and the vector length.
2707// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2708// the vector type that the fixed-length vector is contained in. Otherwise if
2709// VecVT is scalable, then ContainerVT should be the same as VecVT.
2710static std::pair<SDValue, SDValue>
2711getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2712 const RISCVSubtarget &Subtarget) {
2713 if (VecVT.isFixedLengthVector())
2714 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2715 Subtarget);
2716 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2717 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2718}
2719
2721 SelectionDAG &DAG) const {
2722 assert(VecVT.isScalableVector() && "Expected scalable vector");
2723 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2724 VecVT.getVectorElementCount());
2725}
2726
2727std::pair<unsigned, unsigned>
2729 const RISCVSubtarget &Subtarget) {
2730 assert(VecVT.isScalableVector() && "Expected scalable vector");
2731
2732 unsigned EltSize = VecVT.getScalarSizeInBits();
2733 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2734
2735 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2736 unsigned MaxVLMAX =
2737 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2738
2739 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2740 unsigned MinVLMAX =
2741 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2742
2743 return std::make_pair(MinVLMAX, MaxVLMAX);
2744}
2745
2746// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2747// of either is (currently) supported. This can get us into an infinite loop
2748// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2749// as a ..., etc.
2750// Until either (or both) of these can reliably lower any node, reporting that
2751// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2752// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2753// which is not desirable.
2755 EVT VT, unsigned DefinedValues) const {
2756 return false;
2757}
2758
2760 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2761 // implementation-defined.
2762 if (!VT.isVector())
2764 unsigned DLenFactor = Subtarget.getDLenFactor();
2765 unsigned Cost;
2766 if (VT.isScalableVector()) {
2767 unsigned LMul;
2768 bool Fractional;
2769 std::tie(LMul, Fractional) =
2771 if (Fractional)
2772 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2773 else
2774 Cost = (LMul * DLenFactor);
2775 } else {
2776 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2777 }
2778 return Cost;
2779}
2780
2781
2782/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2783/// is generally quadratic in the number of vreg implied by LMUL. Note that
2784/// operand (index and possibly mask) are handled separately.
2786 return getLMULCost(VT) * getLMULCost(VT);
2787}
2788
2789/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2790/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2791/// or may track the vrgather.vv cost. It is implementation-dependent.
2793 return getLMULCost(VT);
2794}
2795
2796/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2797/// for the type VT. (This does not cover the vslide1up or vslide1down
2798/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2799/// or may track the vrgather.vv cost. It is implementation-dependent.
2801 return getLMULCost(VT);
2802}
2803
2804/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2805/// for the type VT. (This does not cover the vslide1up or vslide1down
2806/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2807/// or may track the vrgather.vv cost. It is implementation-dependent.
2809 return getLMULCost(VT);
2810}
2811
2813 const RISCVSubtarget &Subtarget) {
2814 // RISC-V FP-to-int conversions saturate to the destination register size, but
2815 // don't produce 0 for nan. We can use a conversion instruction and fix the
2816 // nan case with a compare and a select.
2817 SDValue Src = Op.getOperand(0);
2818
2819 MVT DstVT = Op.getSimpleValueType();
2820 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2821
2822 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2823
2824 if (!DstVT.isVector()) {
2825 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2826 // the result.
2827 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2828 Src.getValueType() == MVT::bf16) {
2829 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2830 }
2831
2832 unsigned Opc;
2833 if (SatVT == DstVT)
2834 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2835 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2837 else
2838 return SDValue();
2839 // FIXME: Support other SatVTs by clamping before or after the conversion.
2840
2841 SDLoc DL(Op);
2842 SDValue FpToInt = DAG.getNode(
2843 Opc, DL, DstVT, Src,
2845
2846 if (Opc == RISCVISD::FCVT_WU_RV64)
2847 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2848
2849 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2850 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2852 }
2853
2854 // Vectors.
2855
2856 MVT DstEltVT = DstVT.getVectorElementType();
2857 MVT SrcVT = Src.getSimpleValueType();
2858 MVT SrcEltVT = SrcVT.getVectorElementType();
2859 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2860 unsigned DstEltSize = DstEltVT.getSizeInBits();
2861
2862 // Only handle saturating to the destination type.
2863 if (SatVT != DstEltVT)
2864 return SDValue();
2865
2866 // FIXME: Don't support narrowing by more than 1 steps for now.
2867 if (SrcEltSize > (2 * DstEltSize))
2868 return SDValue();
2869
2870 MVT DstContainerVT = DstVT;
2871 MVT SrcContainerVT = SrcVT;
2872 if (DstVT.isFixedLengthVector()) {
2873 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2874 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2875 assert(DstContainerVT.getVectorElementCount() ==
2876 SrcContainerVT.getVectorElementCount() &&
2877 "Expected same element count");
2878 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2879 }
2880
2881 SDLoc DL(Op);
2882
2883 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2884
2885 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2886 {Src, Src, DAG.getCondCode(ISD::SETNE),
2887 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2888
2889 // Need to widen by more than 1 step, promote the FP type, then do a widening
2890 // convert.
2891 if (DstEltSize > (2 * SrcEltSize)) {
2892 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2893 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2894 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2895 }
2896
2897 unsigned RVVOpc =
2899 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2900
2901 SDValue SplatZero = DAG.getNode(
2902 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2903 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2904 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2905 Res, DAG.getUNDEF(DstContainerVT), VL);
2906
2907 if (DstVT.isFixedLengthVector())
2908 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2909
2910 return Res;
2911}
2912
2914 switch (Opc) {
2915 case ISD::FROUNDEVEN:
2917 case ISD::VP_FROUNDEVEN:
2918 return RISCVFPRndMode::RNE;
2919 case ISD::FTRUNC:
2920 case ISD::STRICT_FTRUNC:
2921 case ISD::VP_FROUNDTOZERO:
2922 return RISCVFPRndMode::RTZ;
2923 case ISD::FFLOOR:
2924 case ISD::STRICT_FFLOOR:
2925 case ISD::VP_FFLOOR:
2926 return RISCVFPRndMode::RDN;
2927 case ISD::FCEIL:
2928 case ISD::STRICT_FCEIL:
2929 case ISD::VP_FCEIL:
2930 return RISCVFPRndMode::RUP;
2931 case ISD::FROUND:
2932 case ISD::STRICT_FROUND:
2933 case ISD::VP_FROUND:
2934 return RISCVFPRndMode::RMM;
2935 case ISD::FRINT:
2936 return RISCVFPRndMode::DYN;
2937 }
2938
2940}
2941
2942// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2943// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2944// the integer domain and back. Taking care to avoid converting values that are
2945// nan or already correct.
2946static SDValue
2948 const RISCVSubtarget &Subtarget) {
2949 MVT VT = Op.getSimpleValueType();
2950 assert(VT.isVector() && "Unexpected type");
2951
2952 SDLoc DL(Op);
2953
2954 SDValue Src = Op.getOperand(0);
2955
2956 MVT ContainerVT = VT;
2957 if (VT.isFixedLengthVector()) {
2958 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2959 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2960 }
2961
2962 SDValue Mask, VL;
2963 if (Op->isVPOpcode()) {
2964 Mask = Op.getOperand(1);
2965 if (VT.isFixedLengthVector())
2966 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2967 Subtarget);
2968 VL = Op.getOperand(2);
2969 } else {
2970 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2971 }
2972
2973 // Freeze the source since we are increasing the number of uses.
2974 Src = DAG.getFreeze(Src);
2975
2976 // We do the conversion on the absolute value and fix the sign at the end.
2977 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2978
2979 // Determine the largest integer that can be represented exactly. This and
2980 // values larger than it don't have any fractional bits so don't need to
2981 // be converted.
2982 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2983 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2984 APFloat MaxVal = APFloat(FltSem);
2985 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2986 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2987 SDValue MaxValNode =
2988 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2989 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2990 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2991
2992 // If abs(Src) was larger than MaxVal or nan, keep it.
2993 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2994 Mask =
2995 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2996 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2997 Mask, Mask, VL});
2998
2999 // Truncate to integer and convert back to FP.
3000 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3001 MVT XLenVT = Subtarget.getXLenVT();
3002 SDValue Truncated;
3003
3004 switch (Op.getOpcode()) {
3005 default:
3006 llvm_unreachable("Unexpected opcode");
3007 case ISD::FCEIL:
3008 case ISD::VP_FCEIL:
3009 case ISD::FFLOOR:
3010 case ISD::VP_FFLOOR:
3011 case ISD::FROUND:
3012 case ISD::FROUNDEVEN:
3013 case ISD::VP_FROUND:
3014 case ISD::VP_FROUNDEVEN:
3015 case ISD::VP_FROUNDTOZERO: {
3018 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3019 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3020 break;
3021 }
3022 case ISD::FTRUNC:
3023 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3024 Mask, VL);
3025 break;
3026 case ISD::FRINT:
3027 case ISD::VP_FRINT:
3028 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3029 break;
3030 case ISD::FNEARBYINT:
3031 case ISD::VP_FNEARBYINT:
3032 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3033 Mask, VL);
3034 break;
3035 }
3036
3037 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3038 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3039 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3040 Mask, VL);
3041
3042 // Restore the original sign so that -0.0 is preserved.
3043 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3044 Src, Src, Mask, VL);
3045
3046 if (!VT.isFixedLengthVector())
3047 return Truncated;
3048
3049 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3050}
3051
3052// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3053// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3054// qNan and coverting the new source to integer and back to FP.
3055static SDValue
3057 const RISCVSubtarget &Subtarget) {
3058 SDLoc DL(Op);
3059 MVT VT = Op.getSimpleValueType();
3060 SDValue Chain = Op.getOperand(0);
3061 SDValue Src = Op.getOperand(1);
3062
3063 MVT ContainerVT = VT;
3064 if (VT.isFixedLengthVector()) {
3065 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3066 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3067 }
3068
3069 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3070
3071 // Freeze the source since we are increasing the number of uses.
3072 Src = DAG.getFreeze(Src);
3073
3074 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3075 MVT MaskVT = Mask.getSimpleValueType();
3077 DAG.getVTList(MaskVT, MVT::Other),
3078 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3079 DAG.getUNDEF(MaskVT), Mask, VL});
3080 Chain = Unorder.getValue(1);
3082 DAG.getVTList(ContainerVT, MVT::Other),
3083 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3084 Chain = Src.getValue(1);
3085
3086 // We do the conversion on the absolute value and fix the sign at the end.
3087 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3088
3089 // Determine the largest integer that can be represented exactly. This and
3090 // values larger than it don't have any fractional bits so don't need to
3091 // be converted.
3092 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3093 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3094 APFloat MaxVal = APFloat(FltSem);
3095 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3096 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3097 SDValue MaxValNode =
3098 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3099 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3100 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3101
3102 // If abs(Src) was larger than MaxVal or nan, keep it.
3103 Mask = DAG.getNode(
3104 RISCVISD::SETCC_VL, DL, MaskVT,
3105 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3106
3107 // Truncate to integer and convert back to FP.
3108 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3109 MVT XLenVT = Subtarget.getXLenVT();
3110 SDValue Truncated;
3111
3112 switch (Op.getOpcode()) {
3113 default:
3114 llvm_unreachable("Unexpected opcode");
3115 case ISD::STRICT_FCEIL:
3116 case ISD::STRICT_FFLOOR:
3117 case ISD::STRICT_FROUND:
3121 Truncated = DAG.getNode(
3122 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3123 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3124 break;
3125 }
3126 case ISD::STRICT_FTRUNC:
3127 Truncated =
3129 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3130 break;
3133 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3134 Mask, VL);
3135 break;
3136 }
3137 Chain = Truncated.getValue(1);
3138
3139 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3140 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3141 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3142 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3143 Truncated, Mask, VL);
3144 Chain = Truncated.getValue(1);
3145 }
3146
3147 // Restore the original sign so that -0.0 is preserved.
3148 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3149 Src, Src, Mask, VL);
3150
3151 if (VT.isFixedLengthVector())
3152 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3153 return DAG.getMergeValues({Truncated, Chain}, DL);
3154}
3155
3156static SDValue
3158 const RISCVSubtarget &Subtarget) {
3159 MVT VT = Op.getSimpleValueType();
3160 if (VT.isVector())
3161 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3162
3163 if (DAG.shouldOptForSize())
3164 return SDValue();
3165
3166 SDLoc DL(Op);
3167 SDValue Src = Op.getOperand(0);
3168
3169 // Create an integer the size of the mantissa with the MSB set. This and all
3170 // values larger than it don't have any fractional bits so don't need to be
3171 // converted.
3172 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3173 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3174 APFloat MaxVal = APFloat(FltSem);
3175 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3176 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3177 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3178
3180 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3181 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3182}
3183
3184// Expand vector LRINT and LLRINT by converting to the integer domain.
3186 const RISCVSubtarget &Subtarget) {
3187 MVT VT = Op.getSimpleValueType();
3188 assert(VT.isVector() && "Unexpected type");
3189
3190 SDLoc DL(Op);
3191 SDValue Src = Op.getOperand(0);
3192 MVT ContainerVT = VT;
3193
3194 if (VT.isFixedLengthVector()) {
3195 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3197 }
3198
3199 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3200 SDValue Truncated =
3201 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3202
3203 if (!VT.isFixedLengthVector())
3204 return Truncated;
3205
3206 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3207}
3208
3209static SDValue
3211 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3212 SDValue Offset, SDValue Mask, SDValue VL,
3214 if (Merge.isUndef())
3216 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3217 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3218 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3219}
3220
3221static SDValue
3222getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3224 SDValue VL,
3226 if (Merge.isUndef())
3228 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3229 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3230 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3231}
3232
3233static MVT getLMUL1VT(MVT VT) {
3235 "Unexpected vector MVT");
3239}
3240
3244 int64_t Addend;
3245};
3246
3247static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3249 // We will use a SINT_TO_FP to materialize this constant so we should use a
3250 // signed APSInt here.
3251 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3252 // We use an arbitrary rounding mode here. If a floating-point is an exact
3253 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3254 // the rounding mode changes the output value, then it is not an exact
3255 // integer.
3257 bool IsExact;
3258 // If it is out of signed integer range, it will return an invalid operation.
3259 // If it is not an exact integer, IsExact is false.
3260 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3262 !IsExact)
3263 return std::nullopt;
3264 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3265}
3266
3267// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3268// to the (non-zero) step S and start value X. This can be then lowered as the
3269// RVV sequence (VID * S) + X, for example.
3270// The step S is represented as an integer numerator divided by a positive
3271// denominator. Note that the implementation currently only identifies
3272// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3273// cannot detect 2/3, for example.
3274// Note that this method will also match potentially unappealing index
3275// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3276// determine whether this is worth generating code for.
3277static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3278 unsigned EltSizeInBits) {
3279 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3280 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3281 return std::nullopt;
3282 bool IsInteger = Op.getValueType().isInteger();
3283
3284 std::optional<unsigned> SeqStepDenom;
3285 std::optional<int64_t> SeqStepNum, SeqAddend;
3286 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3287 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3288
3289 // First extract the ops into a list of constant integer values. This may not
3290 // be possible for floats if they're not all representable as integers.
3292 const unsigned OpSize = Op.getScalarValueSizeInBits();
3293 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3294 if (Elt.isUndef()) {
3295 Elts[Idx] = std::nullopt;
3296 continue;
3297 }
3298 if (IsInteger) {
3299 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3300 } else {
3301 auto ExactInteger =
3302 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3303 if (!ExactInteger)
3304 return std::nullopt;
3305 Elts[Idx] = *ExactInteger;
3306 }
3307 }
3308
3309 for (auto [Idx, Elt] : enumerate(Elts)) {
3310 // Assume undef elements match the sequence; we just have to be careful
3311 // when interpolating across them.
3312 if (!Elt)
3313 continue;
3314
3315 if (PrevElt) {
3316 // Calculate the step since the last non-undef element, and ensure
3317 // it's consistent across the entire sequence.
3318 unsigned IdxDiff = Idx - PrevElt->second;
3319 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3320
3321 // A zero-value value difference means that we're somewhere in the middle
3322 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3323 // step change before evaluating the sequence.
3324 if (ValDiff == 0)
3325 continue;
3326
3327 int64_t Remainder = ValDiff % IdxDiff;
3328 // Normalize the step if it's greater than 1.
3329 if (Remainder != ValDiff) {
3330 // The difference must cleanly divide the element span.
3331 if (Remainder != 0)
3332 return std::nullopt;
3333 ValDiff /= IdxDiff;
3334 IdxDiff = 1;
3335 }
3336
3337 if (!SeqStepNum)
3338 SeqStepNum = ValDiff;
3339 else if (ValDiff != SeqStepNum)
3340 return std::nullopt;
3341
3342 if (!SeqStepDenom)
3343 SeqStepDenom = IdxDiff;
3344 else if (IdxDiff != *SeqStepDenom)
3345 return std::nullopt;
3346 }
3347
3348 // Record this non-undef element for later.
3349 if (!PrevElt || PrevElt->first != *Elt)
3350 PrevElt = std::make_pair(*Elt, Idx);
3351 }
3352
3353 // We need to have logged a step for this to count as a legal index sequence.
3354 if (!SeqStepNum || !SeqStepDenom)
3355 return std::nullopt;
3356
3357 // Loop back through the sequence and validate elements we might have skipped
3358 // while waiting for a valid step. While doing this, log any sequence addend.
3359 for (auto [Idx, Elt] : enumerate(Elts)) {
3360 if (!Elt)
3361 continue;
3362 uint64_t ExpectedVal =
3363 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3364 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3365 if (!SeqAddend)
3366 SeqAddend = Addend;
3367 else if (Addend != SeqAddend)
3368 return std::nullopt;
3369 }
3370
3371 assert(SeqAddend && "Must have an addend if we have a step");
3372
3373 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3374}
3375
3376// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3377// and lower it as a VRGATHER_VX_VL from the source vector.
3378static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3379 SelectionDAG &DAG,
3380 const RISCVSubtarget &Subtarget) {
3381 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3382 return SDValue();
3383 SDValue Vec = SplatVal.getOperand(0);
3384 // Only perform this optimization on vectors of the same size for simplicity.
3385 // Don't perform this optimization for i1 vectors.
3386 // FIXME: Support i1 vectors, maybe by promoting to i8?
3387 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3388 return SDValue();
3389 SDValue Idx = SplatVal.getOperand(1);
3390 // The index must be a legal type.
3391 if (Idx.getValueType() != Subtarget.getXLenVT())
3392 return SDValue();
3393
3394 MVT ContainerVT = VT;
3395 if (VT.isFixedLengthVector()) {
3396 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3397 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3398 }
3399
3400 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3401
3402 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3403 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3404
3405 if (!VT.isFixedLengthVector())
3406 return Gather;
3407
3408 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3409}
3410
3411
3412/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3413/// which constitute a large proportion of the elements. In such cases we can
3414/// splat a vector with the dominant element and make up the shortfall with
3415/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3416/// Note that this includes vectors of 2 elements by association. The
3417/// upper-most element is the "dominant" one, allowing us to use a splat to
3418/// "insert" the upper element, and an insert of the lower element at position
3419/// 0, which improves codegen.
3421 const RISCVSubtarget &Subtarget) {
3422 MVT VT = Op.getSimpleValueType();
3423 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3424
3425 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3426
3427 SDLoc DL(Op);
3428 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3429
3430 MVT XLenVT = Subtarget.getXLenVT();
3431 unsigned NumElts = Op.getNumOperands();
3432
3433 SDValue DominantValue;
3434 unsigned MostCommonCount = 0;
3435 DenseMap<SDValue, unsigned> ValueCounts;
3436 unsigned NumUndefElts =
3437 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3438
3439 // Track the number of scalar loads we know we'd be inserting, estimated as
3440 // any non-zero floating-point constant. Other kinds of element are either
3441 // already in registers or are materialized on demand. The threshold at which
3442 // a vector load is more desirable than several scalar materializion and
3443 // vector-insertion instructions is not known.
3444 unsigned NumScalarLoads = 0;
3445
3446 for (SDValue V : Op->op_values()) {
3447 if (V.isUndef())
3448 continue;
3449
3450 ValueCounts.insert(std::make_pair(V, 0));
3451 unsigned &Count = ValueCounts[V];
3452 if (0 == Count)
3453 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3454 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3455
3456 // Is this value dominant? In case of a tie, prefer the highest element as
3457 // it's cheaper to insert near the beginning of a vector than it is at the
3458 // end.
3459 if (++Count >= MostCommonCount) {
3460 DominantValue = V;
3461 MostCommonCount = Count;
3462 }
3463 }
3464
3465 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3466 unsigned NumDefElts = NumElts - NumUndefElts;
3467 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3468
3469 // Don't perform this optimization when optimizing for size, since
3470 // materializing elements and inserting them tends to cause code bloat.
3471 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3472 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3473 ((MostCommonCount > DominantValueCountThreshold) ||
3474 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3475 // Start by splatting the most common element.
3476 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3477
3478 DenseSet<SDValue> Processed{DominantValue};
3479
3480 // We can handle an insert into the last element (of a splat) via
3481 // v(f)slide1down. This is slightly better than the vslideup insert
3482 // lowering as it avoids the need for a vector group temporary. It
3483 // is also better than using vmerge.vx as it avoids the need to
3484 // materialize the mask in a vector register.
3485 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3486 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3487 LastOp != DominantValue) {
3488 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3489 auto OpCode =
3491 if (!VT.isFloatingPoint())
3492 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3493 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3494 LastOp, Mask, VL);
3495 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3496 Processed.insert(LastOp);
3497 }
3498
3499 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3500 for (const auto &OpIdx : enumerate(Op->ops())) {
3501 const SDValue &V = OpIdx.value();
3502 if (V.isUndef() || !Processed.insert(V).second)
3503 continue;
3504 if (ValueCounts[V] == 1) {
3505 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3506 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3507 } else {
3508 // Blend in all instances of this value using a VSELECT, using a
3509 // mask where each bit signals whether that element is the one
3510 // we're after.
3512 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3513 return DAG.getConstant(V == V1, DL, XLenVT);
3514 });
3515 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3516 DAG.getBuildVector(SelMaskTy, DL, Ops),
3517 DAG.getSplatBuildVector(VT, DL, V), Vec);
3518 }
3519 }
3520
3521 return Vec;
3522 }
3523
3524 return SDValue();
3525}
3526
3528 const RISCVSubtarget &Subtarget) {
3529 MVT VT = Op.getSimpleValueType();
3530 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3531
3532 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3533
3534 SDLoc DL(Op);
3535 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3536
3537 MVT XLenVT = Subtarget.getXLenVT();
3538 unsigned NumElts = Op.getNumOperands();
3539
3540 if (VT.getVectorElementType() == MVT::i1) {
3541 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3542 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3543 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3544 }
3545
3546 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3547 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3548 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3549 }
3550
3551 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3552 // scalar integer chunks whose bit-width depends on the number of mask
3553 // bits and XLEN.
3554 // First, determine the most appropriate scalar integer type to use. This
3555 // is at most XLenVT, but may be shrunk to a smaller vector element type
3556 // according to the size of the final vector - use i8 chunks rather than
3557 // XLenVT if we're producing a v8i1. This results in more consistent
3558 // codegen across RV32 and RV64.
3559 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3560 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3561 // If we have to use more than one INSERT_VECTOR_ELT then this
3562 // optimization is likely to increase code size; avoid peforming it in
3563 // such a case. We can use a load from a constant pool in this case.
3564 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3565 return SDValue();
3566 // Now we can create our integer vector type. Note that it may be larger
3567 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3568 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3569 MVT IntegerViaVecVT =
3570 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3571 IntegerViaVecElts);
3572
3573 uint64_t Bits = 0;
3574 unsigned BitPos = 0, IntegerEltIdx = 0;
3575 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3576
3577 for (unsigned I = 0; I < NumElts;) {
3578 SDValue V = Op.getOperand(I);
3579 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3580 Bits |= ((uint64_t)BitValue << BitPos);
3581 ++BitPos;
3582 ++I;
3583
3584 // Once we accumulate enough bits to fill our scalar type or process the
3585 // last element, insert into our vector and clear our accumulated data.
3586 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3587 if (NumViaIntegerBits <= 32)
3588 Bits = SignExtend64<32>(Bits);
3589 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3590 Elts[IntegerEltIdx] = Elt;
3591 Bits = 0;
3592 BitPos = 0;
3593 IntegerEltIdx++;
3594 }
3595 }
3596
3597 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3598
3599 if (NumElts < NumViaIntegerBits) {
3600 // If we're producing a smaller vector than our minimum legal integer
3601 // type, bitcast to the equivalent (known-legal) mask type, and extract
3602 // our final mask.
3603 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3604 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3605 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3606 DAG.getConstant(0, DL, XLenVT));
3607 } else {
3608 // Else we must have produced an integer type with the same size as the
3609 // mask type; bitcast for the final result.
3610 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3611 Vec = DAG.getBitcast(VT, Vec);
3612 }
3613
3614 return Vec;
3615 }
3616
3617 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3618 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3620 if (!VT.isFloatingPoint())
3621 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3622 Splat =
3623 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3624 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3625 }
3626
3627 // Try and match index sequences, which we can lower to the vid instruction
3628 // with optional modifications. An all-undef vector is matched by
3629 // getSplatValue, above.
3630 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3631 int64_t StepNumerator = SimpleVID->StepNumerator;
3632 unsigned StepDenominator = SimpleVID->StepDenominator;
3633 int64_t Addend = SimpleVID->Addend;
3634
3635 assert(StepNumerator != 0 && "Invalid step");
3636 bool Negate = false;
3637 int64_t SplatStepVal = StepNumerator;
3638 unsigned StepOpcode = ISD::MUL;
3639 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3640 // anyway as the shift of 63 won't fit in uimm5.
3641 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3642 isPowerOf2_64(std::abs(StepNumerator))) {
3643 Negate = StepNumerator < 0;
3644 StepOpcode = ISD::SHL;
3645 SplatStepVal = Log2_64(std::abs(StepNumerator));
3646 }
3647
3648 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3649 // threshold since it's the immediate value many RVV instructions accept.
3650 // There is no vmul.vi instruction so ensure multiply constant can fit in
3651 // a single addi instruction.
3652 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3653 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3654 isPowerOf2_32(StepDenominator) &&
3655 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3656 MVT VIDVT =
3658 MVT VIDContainerVT =
3659 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3660 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3661 // Convert right out of the scalable type so we can use standard ISD
3662 // nodes for the rest of the computation. If we used scalable types with
3663 // these, we'd lose the fixed-length vector info and generate worse
3664 // vsetvli code.
3665 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3666 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3667 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3668 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3669 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3670 }
3671 if (StepDenominator != 1) {
3672 SDValue SplatStep =
3673 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3674 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3675 }
3676 if (Addend != 0 || Negate) {
3677 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3678 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3679 VID);
3680 }
3681 if (VT.isFloatingPoint()) {
3682 // TODO: Use vfwcvt to reduce register pressure.
3683 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3684 }
3685 return VID;
3686 }
3687 }
3688
3689 // For very small build_vectors, use a single scalar insert of a constant.
3690 // TODO: Base this on constant rematerialization cost, not size.
3691 const unsigned EltBitSize = VT.getScalarSizeInBits();
3692 if (VT.getSizeInBits() <= 32 &&
3694 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3695 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3696 "Unexpected sequence type");
3697 // If we can use the original VL with the modified element type, this
3698 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3699 // be moved into InsertVSETVLI?
3700 unsigned ViaVecLen =
3701 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3702 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3703
3704 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3705 uint64_t SplatValue = 0;
3706 // Construct the amalgamated value at this larger vector type.
3707 for (const auto &OpIdx : enumerate(Op->op_values())) {
3708 const auto &SeqV = OpIdx.value();
3709 if (!SeqV.isUndef())
3710 SplatValue |=
3711 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3712 }
3713
3714 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3715 // achieve better constant materializion.
3716 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3717 SplatValue = SignExtend64<32>(SplatValue);
3718
3719 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3720 DAG.getUNDEF(ViaVecVT),
3721 DAG.getConstant(SplatValue, DL, XLenVT),
3722 DAG.getVectorIdxConstant(0, DL));
3723 if (ViaVecLen != 1)
3725 MVT::getVectorVT(ViaIntVT, 1), Vec,
3726 DAG.getConstant(0, DL, XLenVT));
3727 return DAG.getBitcast(VT, Vec);
3728 }
3729
3730
3731 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3732 // when re-interpreted as a vector with a larger element type. For example,
3733 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3734 // could be instead splat as
3735 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3736 // TODO: This optimization could also work on non-constant splats, but it
3737 // would require bit-manipulation instructions to construct the splat value.
3738 SmallVector<SDValue> Sequence;
3739 const auto *BV = cast<BuildVectorSDNode>(Op);
3740 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3742 BV->getRepeatedSequence(Sequence) &&
3743 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3744 unsigned SeqLen = Sequence.size();
3745 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3746 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3747 ViaIntVT == MVT::i64) &&
3748 "Unexpected sequence type");
3749
3750 // If we can use the original VL with the modified element type, this
3751 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3752 // be moved into InsertVSETVLI?
3753 const unsigned RequiredVL = NumElts / SeqLen;
3754 const unsigned ViaVecLen =
3755 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3756 NumElts : RequiredVL;
3757 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3758
3759 unsigned EltIdx = 0;
3760 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3761 uint64_t SplatValue = 0;
3762 // Construct the amalgamated value which can be splatted as this larger
3763 // vector type.
3764 for (const auto &SeqV : Sequence) {
3765 if (!SeqV.isUndef())
3766 SplatValue |=
3767 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3768 EltIdx++;
3769 }
3770
3771 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3772 // achieve better constant materializion.
3773 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3774 SplatValue = SignExtend64<32>(SplatValue);
3775
3776 // Since we can't introduce illegal i64 types at this stage, we can only
3777 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3778 // way we can use RVV instructions to splat.
3779 assert((ViaIntVT.bitsLE(XLenVT) ||
3780 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3781 "Unexpected bitcast sequence");
3782 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3783 SDValue ViaVL =
3784 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3785 MVT ViaContainerVT =
3786 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3787 SDValue Splat =
3788 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3789 DAG.getUNDEF(ViaContainerVT),
3790 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3791 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3792 if (ViaVecLen != RequiredVL)
3794 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3795 DAG.getConstant(0, DL, XLenVT));
3796 return DAG.getBitcast(VT, Splat);
3797 }
3798 }
3799
3800 // If the number of signbits allows, see if we can lower as a <N x i8>.
3801 // Our main goal here is to reduce LMUL (and thus work) required to
3802 // build the constant, but we will also narrow if the resulting
3803 // narrow vector is known to materialize cheaply.
3804 // TODO: We really should be costing the smaller vector. There are
3805 // profitable cases this misses.
3806 if (EltBitSize > 8 && VT.isInteger() &&
3807 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3808 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3809 if (EltBitSize - SignBits < 8) {
3810 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3811 DL, Op->ops());
3812 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3813 Source, DAG, Subtarget);
3814 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3815 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3816 }
3817 }
3818
3819 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3820 return Res;
3821
3822 // For constant vectors, use generic constant pool lowering. Otherwise,
3823 // we'd have to materialize constants in GPRs just to move them into the
3824 // vector.
3825 return SDValue();
3826}
3827
3829 const RISCVSubtarget &Subtarget) {
3830 MVT VT = Op.getSimpleValueType();
3831 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3832
3833 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3835 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3836
3837 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3838
3839 SDLoc DL(Op);
3840 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3841
3842 MVT XLenVT = Subtarget.getXLenVT();
3843
3844 if (VT.getVectorElementType() == MVT::i1) {
3845 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3846 // vector type, we have a legal equivalently-sized i8 type, so we can use
3847 // that.
3848 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3849 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3850
3851 SDValue WideVec;
3852 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3853 // For a splat, perform a scalar truncate before creating the wider
3854 // vector.
3855 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3856 DAG.getConstant(1, DL, Splat.getValueType()));
3857 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3858 } else {
3859 SmallVector<SDValue, 8> Ops(Op->op_values());
3860 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3861 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3862 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3863 }
3864
3865 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3866 }
3867
3868 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3869 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3870 return Gather;
3871 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3873 if (!VT.isFloatingPoint())
3874 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3875 Splat =
3876 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3877 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3878 }
3879
3880 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3881 return Res;
3882
3883 // If we're compiling for an exact VLEN value, we can split our work per
3884 // register in the register group.
3885 if (const auto VLen = Subtarget.getRealVLen();
3886 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3887 MVT ElemVT = VT.getVectorElementType();
3888 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3889 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3890 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3891 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3892 assert(M1VT == getLMUL1VT(M1VT));
3893
3894 // The following semantically builds up a fixed length concat_vector
3895 // of the component build_vectors. We eagerly lower to scalable and
3896 // insert_subvector here to avoid DAG combining it back to a large
3897 // build_vector.
3898 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3899 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3900 SDValue Vec = DAG.getUNDEF(ContainerVT);
3901 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3902 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3903 SDValue SubBV =
3904 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3905 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3906 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3907 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3908 DAG.getVectorIdxConstant(InsertIdx, DL));
3909 }
3910 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3911 }
3912
3913 // For m1 vectors, if we have non-undef values in both halves of our vector,
3914 // split the vector into low and high halves, build them separately, then
3915 // use a vselect to combine them. For long vectors, this cuts the critical
3916 // path of the vslide1down sequence in half, and gives us an opportunity
3917 // to special case each half independently. Note that we don't change the
3918 // length of the sub-vectors here, so if both fallback to the generic
3919 // vslide1down path, we should be able to fold the vselect into the final
3920 // vslidedown (for the undef tail) for the first half w/ masking.
3921 unsigned NumElts = VT.getVectorNumElements();
3922 unsigned NumUndefElts =
3923 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3924 unsigned NumDefElts = NumElts - NumUndefElts;
3925 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3926 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3927 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3928 SmallVector<SDValue> MaskVals;
3929 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3930 SubVecAOps.reserve(NumElts);
3931 SubVecBOps.reserve(NumElts);
3932 for (unsigned i = 0; i < NumElts; i++) {
3933 SDValue Elem = Op->getOperand(i);
3934 if (i < NumElts / 2) {
3935 SubVecAOps.push_back(Elem);
3936 SubVecBOps.push_back(UndefElem);
3937 } else {
3938 SubVecAOps.push_back(UndefElem);
3939 SubVecBOps.push_back(Elem);
3940 }
3941 bool SelectMaskVal = (i < NumElts / 2);
3942 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3943 }
3944 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3945 MaskVals.size() == NumElts);
3946
3947 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3948 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3949 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3950 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3951 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3952 }
3953
3954 // Cap the cost at a value linear to the number of elements in the vector.
3955 // The default lowering is to use the stack. The vector store + scalar loads
3956 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3957 // being (at least) linear in LMUL. As a result, using the vslidedown
3958 // lowering for every element ends up being VL*LMUL..
3959 // TODO: Should we be directly costing the stack alternative? Doing so might
3960 // give us a more accurate upper bound.
3961 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3962
3963 // TODO: unify with TTI getSlideCost.
3964 InstructionCost PerSlideCost = 1;
3965 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3966 default: break;
3968 PerSlideCost = 2;
3969 break;
3971 PerSlideCost = 4;
3972 break;
3974 PerSlideCost = 8;
3975 break;
3976 }
3977
3978 // TODO: Should we be using the build instseq then cost + evaluate scheme
3979 // we use for integer constants here?
3980 unsigned UndefCount = 0;
3981 for (const SDValue &V : Op->ops()) {
3982 if (V.isUndef()) {
3983 UndefCount++;
3984 continue;
3985 }
3986 if (UndefCount) {
3987 LinearBudget -= PerSlideCost;
3988 UndefCount = 0;
3989 }
3990 LinearBudget -= PerSlideCost;
3991 }
3992 if (UndefCount) {
3993 LinearBudget -= PerSlideCost;
3994 }
3995
3996 if (LinearBudget < 0)
3997 return SDValue();
3998
3999 assert((!VT.isFloatingPoint() ||
4000 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4001 "Illegal type which will result in reserved encoding");
4002
4003 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4004
4005 SDValue Vec;
4006 UndefCount = 0;
4007 for (SDValue V : Op->ops()) {
4008 if (V.isUndef()) {
4009 UndefCount++;
4010 continue;
4011 }
4012
4013 // Start our sequence with a TA splat in the hopes that hardware is able to
4014 // recognize there's no dependency on the prior value of our temporary
4015 // register.
4016 if (!Vec) {
4017 Vec = DAG.getSplatVector(VT, DL, V);
4018 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4019 UndefCount = 0;
4020 continue;
4021 }
4022
4023 if (UndefCount) {
4024 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4025 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4026 Vec, Offset, Mask, VL, Policy);
4027 UndefCount = 0;
4028 }
4029 auto OpCode =
4031 if (!VT.isFloatingPoint())
4032 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4033 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4034 V, Mask, VL);
4035 }
4036 if (UndefCount) {
4037 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4038 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4039 Vec, Offset, Mask, VL, Policy);
4040 }
4041 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4042}
4043
4044static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4046 SelectionDAG &DAG) {
4047 if (!Passthru)
4048 Passthru = DAG.getUNDEF(VT);
4049 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4050 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4051 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4052 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4053 // node in order to try and match RVV vector/scalar instructions.
4054 if ((LoC >> 31) == HiC)
4055 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4056
4057 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4058 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4059 // vlmax vsetvli or vsetivli to change the VL.
4060 // FIXME: Support larger constants?
4061 // FIXME: Support non-constant VLs by saturating?
4062 if (LoC == HiC) {
4063 SDValue NewVL;
4064 if (isAllOnesConstant(VL) ||
4065 (isa<RegisterSDNode>(VL) &&
4066 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4067 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4068 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4069 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4070
4071 if (NewVL) {
4072 MVT InterVT =
4073 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4074 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4075 DAG.getUNDEF(InterVT), Lo, NewVL);
4076 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4077 }
4078 }
4079 }
4080
4081 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4082 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4083 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4084 Hi.getConstantOperandVal(1) == 31)
4085 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4086
4087 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4088 // even if it might be sign extended.
4089 if (Hi.isUndef())
4090 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4091
4092 // Fall back to a stack store and stride x0 vector load.
4093 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4094 Hi, VL);
4095}
4096
4097// Called by type legalization to handle splat of i64 on RV32.
4098// FIXME: We can optimize this when the type has sign or zero bits in one
4099// of the halves.
4100static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4101 SDValue Scalar, SDValue VL,
4102 SelectionDAG &DAG) {
4103 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4104 SDValue Lo, Hi;
4105 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4106 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4107}
4108
4109// This function lowers a splat of a scalar operand Splat with the vector
4110// length VL. It ensures the final sequence is type legal, which is useful when
4111// lowering a splat after type legalization.
4112static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4113 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4114 const RISCVSubtarget &Subtarget) {
4115 bool HasPassthru = Passthru && !Passthru.isUndef();
4116 if (!HasPassthru && !Passthru)
4117 Passthru = DAG.getUNDEF(VT);
4118 if (VT.isFloatingPoint())
4119 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4120
4121 MVT XLenVT = Subtarget.getXLenVT();
4122
4123 // Simplest case is that the operand needs to be promoted to XLenVT.
4124 if (Scalar.getValueType().bitsLE(XLenVT)) {
4125 // If the operand is a constant, sign extend to increase our chances
4126 // of being able to use a .vi instruction. ANY_EXTEND would become a
4127 // a zero extend and the simm5 check in isel would fail.
4128 // FIXME: Should we ignore the upper bits in isel instead?
4129 unsigned ExtOpc =
4130 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4131 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4132 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4133 }
4134
4135 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4136 "Unexpected scalar for splat lowering!");
4137
4138 if (isOneConstant(VL) && isNullConstant(Scalar))
4139 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4140 DAG.getConstant(0, DL, XLenVT), VL);
4141
4142 // Otherwise use the more complicated splatting algorithm.
4143 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4144}
4145
4146// This function lowers an insert of a scalar operand Scalar into lane
4147// 0 of the vector regardless of the value of VL. The contents of the
4148// remaining lanes of the result vector are unspecified. VL is assumed
4149// to be non-zero.
4151 const SDLoc &DL, SelectionDAG &DAG,
4152 const RISCVSubtarget &Subtarget) {
4153 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4154
4155 const MVT XLenVT = Subtarget.getXLenVT();
4156 SDValue Passthru = DAG.getUNDEF(VT);
4157
4158 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4159 isNullConstant(Scalar.getOperand(1))) {
4160 SDValue ExtractedVal = Scalar.getOperand(0);
4161 // The element types must be the same.
4162 if (ExtractedVal.getValueType().getVectorElementType() ==
4163 VT.getVectorElementType()) {
4164 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4165 MVT ExtractedContainerVT = ExtractedVT;
4166 if (ExtractedContainerVT.isFixedLengthVector()) {
4167 ExtractedContainerVT = getContainerForFixedLengthVector(
4168 DAG, ExtractedContainerVT, Subtarget);
4169 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4170 ExtractedVal, DAG, Subtarget);
4171 }
4172 if (ExtractedContainerVT.bitsLE(VT))
4173 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4174 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4175 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4176 DAG.getVectorIdxConstant(0, DL));
4177 }
4178 }
4179
4180
4181 if (VT.isFloatingPoint())
4182 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4183 DAG.getUNDEF(VT), Scalar, VL);
4184
4185 // Avoid the tricky legalization cases by falling back to using the
4186 // splat code which already handles it gracefully.
4187 if (!Scalar.getValueType().bitsLE(XLenVT))
4188 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4189 DAG.getConstant(1, DL, XLenVT),
4190 VT, DL, DAG, Subtarget);
4191
4192 // If the operand is a constant, sign extend to increase our chances
4193 // of being able to use a .vi instruction. ANY_EXTEND would become a
4194 // a zero extend and the simm5 check in isel would fail.
4195 // FIXME: Should we ignore the upper bits in isel instead?
4196 unsigned ExtOpc =
4197 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4198 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4199 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4200 DAG.getUNDEF(VT), Scalar, VL);
4201}
4202
4203// Is this a shuffle extracts either the even or odd elements of a vector?
4204// That is, specifically, either (a) or (b) below.
4205// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4206// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4207// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4208// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4209// Returns {Src Vector, Even Elements} om success
4210static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4211 SDValue V2, ArrayRef<int> Mask,
4212 const RISCVSubtarget &Subtarget) {
4213 // Need to be able to widen the vector.
4214 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4215 return false;
4216
4217 // Both input must be extracts.
4218 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4219 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4220 return false;
4221
4222 // Extracting from the same source.
4223 SDValue Src = V1.getOperand(0);
4224 if (Src != V2.getOperand(0))
4225 return false;
4226
4227 // Src needs to have twice the number of elements.
4228 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4229 return false;
4230
4231 // The extracts must extract the two halves of the source.
4232 if (V1.getConstantOperandVal(1) != 0 ||
4233 V2.getConstantOperandVal(1) != Mask.size())
4234 return false;
4235
4236 // First index must be the first even or odd element from V1.
4237 if (Mask[0] != 0 && Mask[0] != 1)
4238 return false;
4239
4240 // The others must increase by 2 each time.
4241 // TODO: Support undef elements?
4242 for (unsigned i = 1; i != Mask.size(); ++i)
4243 if (Mask[i] != Mask[i - 1] + 2)
4244 return false;
4245
4246 return true;
4247}
4248
4249/// Is this shuffle interleaving contiguous elements from one vector into the
4250/// even elements and contiguous elements from another vector into the odd
4251/// elements. \p EvenSrc will contain the element that should be in the first
4252/// even element. \p OddSrc will contain the element that should be in the first
4253/// odd element. These can be the first element in a source or the element half
4254/// way through the source.
4255static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4256 int &OddSrc, const RISCVSubtarget &Subtarget) {
4257 // We need to be able to widen elements to the next larger integer type.
4258 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4259 return false;
4260
4261 int Size = Mask.size();
4262 int NumElts = VT.getVectorNumElements();
4263 assert(Size == (int)NumElts && "Unexpected mask size");
4264
4265 SmallVector<unsigned, 2> StartIndexes;
4266 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4267 return false;
4268
4269 EvenSrc = StartIndexes[0];
4270 OddSrc = StartIndexes[1];
4271
4272 // One source should be low half of first vector.
4273 if (EvenSrc != 0 && OddSrc != 0)
4274 return false;
4275
4276 // Subvectors will be subtracted from either at the start of the two input
4277 // vectors, or at the start and middle of the first vector if it's an unary
4278 // interleave.
4279 // In both cases, HalfNumElts will be extracted.
4280 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4281 // we'll create an illegal extract_subvector.
4282 // FIXME: We could support other values using a slidedown first.
4283 int HalfNumElts = NumElts / 2;
4284 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4285}
4286
4287/// Match shuffles that concatenate two vectors, rotate the concatenation,
4288/// and then extract the original number of elements from the rotated result.
4289/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4290/// returned rotation amount is for a rotate right, where elements move from
4291/// higher elements to lower elements. \p LoSrc indicates the first source
4292/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4293/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4294/// 0 or 1 if a rotation is found.
4295///
4296/// NOTE: We talk about rotate to the right which matches how bit shift and
4297/// rotate instructions are described where LSBs are on the right, but LLVM IR
4298/// and the table below write vectors with the lowest elements on the left.
4299static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4300 int Size = Mask.size();
4301
4302 // We need to detect various ways of spelling a rotation:
4303 // [11, 12, 13, 14, 15, 0, 1, 2]
4304 // [-1, 12, 13, 14, -1, -1, 1, -1]
4305 // [-1, -1, -1, -1, -1, -1, 1, 2]
4306 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4307 // [-1, 4, 5, 6, -1, -1, 9, -1]
4308 // [-1, 4, 5, 6, -1, -1, -1, -1]
4309 int Rotation = 0;
4310 LoSrc = -1;
4311 HiSrc = -1;
4312 for (int i = 0; i != Size; ++i) {
4313 int M = Mask[i];
4314 if (M < 0)
4315 continue;
4316
4317 // Determine where a rotate vector would have started.
4318 int StartIdx = i - (M % Size);
4319 // The identity rotation isn't interesting, stop.
4320 if (StartIdx == 0)
4321 return -1;
4322
4323 // If we found the tail of a vector the rotation must be the missing
4324 // front. If we found the head of a vector, it must be how much of the
4325 // head.
4326 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4327
4328 if (Rotation == 0)
4329 Rotation = CandidateRotation;
4330 else if (Rotation != CandidateRotation)
4331 // The rotations don't match, so we can't match this mask.
4332 return -1;
4333
4334 // Compute which value this mask is pointing at.
4335 int MaskSrc = M < Size ? 0 : 1;
4336
4337 // Compute which of the two target values this index should be assigned to.
4338 // This reflects whether the high elements are remaining or the low elemnts
4339 // are remaining.
4340 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4341
4342 // Either set up this value if we've not encountered it before, or check
4343 // that it remains consistent.
4344 if (TargetSrc < 0)
4345 TargetSrc = MaskSrc;
4346 else if (TargetSrc != MaskSrc)
4347 // This may be a rotation, but it pulls from the inputs in some
4348 // unsupported interleaving.
4349 return -1;
4350 }
4351
4352 // Check that we successfully analyzed the mask, and normalize the results.
4353 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4354 assert((LoSrc >= 0 || HiSrc >= 0) &&
4355 "Failed to find a rotated input vector!");
4356
4357 return Rotation;
4358}
4359
4360// Lower a deinterleave shuffle to vnsrl.
4361// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4362// -> [p, q, r, s] (EvenElts == false)
4363// VT is the type of the vector to return, <[vscale x ]n x ty>
4364// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4366 bool EvenElts,
4367 const RISCVSubtarget &Subtarget,
4368 SelectionDAG &DAG) {
4369 // The result is a vector of type <m x n x ty>
4370 MVT ContainerVT = VT;
4371 // Convert fixed vectors to scalable if needed
4372 if (ContainerVT.isFixedLengthVector()) {
4373 assert(Src.getSimpleValueType().isFixedLengthVector());
4374 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4375
4376 // The source is a vector of type <m x n*2 x ty>
4377 MVT SrcContainerVT =
4379 ContainerVT.getVectorElementCount() * 2);
4380 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4381 }
4382
4383 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4384
4385 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4386 // This also converts FP to int.
4387 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4388 MVT WideSrcContainerVT = MVT::getVectorVT(
4389 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4390 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4391
4392 // The integer version of the container type.
4393 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4394
4395 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4396 // the original element size.
4397 unsigned Shift = EvenElts ? 0 : EltBits;
4398 SDValue SplatShift = DAG.getNode(
4399 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4400 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4401 SDValue Res =
4402 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4403 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4404 // Cast back to FP if needed.
4405 Res = DAG.getBitcast(ContainerVT, Res);
4406
4407 if (VT.isFixedLengthVector())
4408 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4409 return Res;
4410}
4411
4412// Lower the following shuffle to vslidedown.
4413// a)
4414// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4415// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4416// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4417// b)
4418// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4419// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4420// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4421// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4422// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4423// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4425 SDValue V1, SDValue V2,
4426 ArrayRef<int> Mask,
4427 const RISCVSubtarget &Subtarget,
4428 SelectionDAG &DAG) {
4429 auto findNonEXTRACT_SUBVECTORParent =
4430 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4431 uint64_t Offset = 0;
4432 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4433 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4434 // a scalable vector. But we don't want to match the case.
4435 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4436 Offset += Parent.getConstantOperandVal(1);
4437 Parent = Parent.getOperand(0);
4438 }
4439 return std::make_pair(Parent, Offset);
4440 };
4441
4442 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4443 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4444
4445 // Extracting from the same source.
4446 SDValue Src = V1Src;
4447 if (Src != V2Src)
4448 return SDValue();
4449
4450 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4451 SmallVector<int, 16> NewMask(Mask);
4452 for (size_t i = 0; i != NewMask.size(); ++i) {
4453 if (NewMask[i] == -1)
4454 continue;
4455
4456 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4457 NewMask[i] = NewMask[i] + V1IndexOffset;
4458 } else {
4459 // Minus NewMask.size() is needed. Otherwise, the b case would be
4460 // <5,6,7,12> instead of <5,6,7,8>.
4461 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4462 }
4463 }
4464
4465 // First index must be known and non-zero. It will be used as the slidedown
4466 // amount.
4467 if (NewMask[0] <= 0)
4468 return SDValue();
4469
4470 // NewMask is also continuous.
4471 for (unsigned i = 1; i != NewMask.size(); ++i)
4472 if (NewMask[i - 1] + 1 != NewMask[i])
4473 return SDValue();
4474
4475 MVT XLenVT = Subtarget.getXLenVT();
4476 MVT SrcVT = Src.getSimpleValueType();
4477 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4478 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4479 SDValue Slidedown =
4480 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4481 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4482 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4483 return DAG.getNode(
4485 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4486 DAG.getConstant(0, DL, XLenVT));
4487}
4488
4489// Because vslideup leaves the destination elements at the start intact, we can
4490// use it to perform shuffles that insert subvectors:
4491//
4492// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4493// ->
4494// vsetvli zero, 8, e8, mf2, ta, ma
4495// vslideup.vi v8, v9, 4
4496//
4497// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4498// ->
4499// vsetvli zero, 5, e8, mf2, tu, ma
4500// vslideup.v1 v8, v9, 2
4502 SDValue V1, SDValue V2,
4503 ArrayRef<int> Mask,
4504 const RISCVSubtarget &Subtarget,
4505 SelectionDAG &DAG) {
4506 unsigned NumElts = VT.getVectorNumElements();
4507 int NumSubElts, Index;
4508 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4509 Index))
4510 return SDValue();
4511
4512 bool OpsSwapped = Mask[Index] < (int)NumElts;
4513 SDValue InPlace = OpsSwapped ? V2 : V1;
4514 SDValue ToInsert = OpsSwapped ? V1 : V2;
4515
4516 MVT XLenVT = Subtarget.getXLenVT();
4517 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4518 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4519 // We slide up by the index that the subvector is being inserted at, and set
4520 // VL to the index + the number of elements being inserted.
4522 // If the we're adding a suffix to the in place vector, i.e. inserting right
4523 // up to the very end of it, then we don't actually care about the tail.
4524 if (NumSubElts + Index >= (int)NumElts)
4525 Policy |= RISCVII::TAIL_AGNOSTIC;
4526
4527 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4528 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4529 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4530
4531 SDValue Res;
4532 // If we're inserting into the lowest elements, use a tail undisturbed
4533 // vmv.v.v.
4534 if (Index == 0)
4535 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4536 VL);
4537 else
4538 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4539 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4540 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4541}
4542
4543/// Match v(f)slide1up/down idioms. These operations involve sliding
4544/// N-1 elements to make room for an inserted scalar at one end.
4546 SDValue V1, SDValue V2,
4547 ArrayRef<int> Mask,
4548 const RISCVSubtarget &Subtarget,
4549 SelectionDAG &DAG) {
4550 bool OpsSwapped = false;
4551 if (!isa<BuildVectorSDNode>(V1)) {
4552 if (!isa<BuildVectorSDNode>(V2))
4553 return SDValue();
4554 std::swap(V1, V2);
4555 OpsSwapped = true;
4556 }
4557 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4558 if (!Splat)
4559 return SDValue();
4560
4561 // Return true if the mask could describe a slide of Mask.size() - 1
4562 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4563 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4564 const unsigned S = (Offset > 0) ? 0 : -Offset;
4565 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4566 for (unsigned i = S; i != E; ++i)
4567 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4568 return false;
4569 return true;
4570 };
4571
4572 const unsigned NumElts = VT.getVectorNumElements();
4573 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4574 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4575 return SDValue();
4576
4577 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4578 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4579 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4580 return SDValue();
4581
4582 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4583 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4584 auto OpCode = IsVSlidedown ?
4587 if (!VT.isFloatingPoint())
4588 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4589 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4590 DAG.getUNDEF(ContainerVT),
4591 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4592 Splat, TrueMask, VL);
4593 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4594}
4595
4596// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4597// to create an interleaved vector of <[vscale x] n*2 x ty>.
4598// This requires that the size of ty is less than the subtarget's maximum ELEN.
4600 const SDLoc &DL, SelectionDAG &DAG,
4601 const RISCVSubtarget &Subtarget) {
4602 MVT VecVT = EvenV.getSimpleValueType();
4603 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4604 // Convert fixed vectors to scalable if needed
4605 if (VecContainerVT.isFixedLengthVector()) {
4606 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4607 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4608 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4609 }
4610
4611 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4612
4613 // We're working with a vector of the same size as the resulting
4614 // interleaved vector, but with half the number of elements and
4615 // twice the SEW (Hence the restriction on not using the maximum
4616 // ELEN)
4617 MVT WideVT =
4619 VecVT.getVectorElementCount());
4620 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4621 if (WideContainerVT.isFixedLengthVector())
4622 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4623
4624 // Bitcast the input vectors to integers in case they are FP
4625 VecContainerVT = VecContainerVT.changeTypeToInteger();
4626 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4627 OddV = DAG.getBitcast(VecContainerVT, OddV);
4628
4629 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4630 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4631
4632 SDValue Interleaved;
4633 if (OddV.isUndef()) {
4634 // If OddV is undef, this is a zero extend.
4635 // FIXME: Not only does this optimize the code, it fixes some correctness
4636 // issues because MIR does not have freeze.
4637 Interleaved =
4638 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4639 } else if (Subtarget.hasStdExtZvbb()) {
4640 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4641 SDValue OffsetVec =
4642 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4643 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4644 OffsetVec, Passthru, Mask, VL);
4645 if (!EvenV.isUndef())
4646 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4647 Interleaved, EvenV, Passthru, Mask, VL);
4648 } else if (EvenV.isUndef()) {
4649 Interleaved =
4650 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4651
4652 SDValue OffsetVec =
4653 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4654 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4655 Interleaved, OffsetVec, Passthru, Mask, VL);
4656 } else {
4657 // FIXME: We should freeze the odd vector here. We already handled the case
4658 // of provably undef/poison above.
4659
4660 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4661 // vwaddu.vv
4662 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4663 OddV, Passthru, Mask, VL);
4664
4665 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4666 SDValue AllOnesVec = DAG.getSplatVector(
4667 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4668 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4669 OddV, AllOnesVec, Passthru, Mask, VL);
4670
4671 // Add the two together so we get
4672 // (OddV * 0xff...ff) + (OddV + EvenV)
4673 // = (OddV * 0x100...00) + EvenV
4674 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4675 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4676 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4677 Interleaved, OddsMul, Passthru, Mask, VL);
4678 }
4679
4680 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4681 MVT ResultContainerVT = MVT::getVectorVT(
4682 VecVT.getVectorElementType(), // Make sure to use original type
4683 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4684 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4685
4686 // Convert back to a fixed vector if needed
4687 MVT ResultVT =
4690 if (ResultVT.isFixedLengthVector())
4691 Interleaved =
4692 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4693
4694 return Interleaved;
4695}
4696
4697// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4698// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4700 SelectionDAG &DAG,
4701 const RISCVSubtarget &Subtarget) {
4702 SDLoc DL(SVN);
4703 MVT VT = SVN->getSimpleValueType(0);
4704 SDValue V = SVN->getOperand(0);
4705 unsigned NumElts = VT.getVectorNumElements();
4706
4707 assert(VT.getVectorElementType() == MVT::i1);
4708
4710 SVN->getMask().size()) ||
4711 !SVN->getOperand(1).isUndef())
4712 return SDValue();
4713
4714 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4715 EVT ViaVT = EVT::getVectorVT(
4716 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4717 EVT ViaBitVT =
4718 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4719
4720 // If we don't have zvbb or the larger element type > ELEN, the operation will
4721 // be illegal.
4723 ViaVT) ||
4724 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4725 return SDValue();
4726
4727 // If the bit vector doesn't fit exactly into the larger element type, we need
4728 // to insert it into the larger vector and then shift up the reversed bits
4729 // afterwards to get rid of the gap introduced.
4730 if (ViaEltSize > NumElts)
4731 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4732 V, DAG.getVectorIdxConstant(0, DL));
4733
4734 SDValue Res =
4735 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4736
4737 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4738 // element type.
4739 if (ViaEltSize > NumElts)
4740 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4741 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4742
4743 Res = DAG.getBitcast(ViaBitVT, Res);
4744
4745 if (ViaEltSize > NumElts)
4746 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4747 DAG.getVectorIdxConstant(0, DL));
4748 return Res;
4749}
4750
4752 SelectionDAG &DAG,
4753 const RISCVSubtarget &Subtarget,
4754 MVT &RotateVT, unsigned &RotateAmt) {
4755 SDLoc DL(SVN);
4756
4757 EVT VT = SVN->getValueType(0);
4758 unsigned NumElts = VT.getVectorNumElements();
4759 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4760 unsigned NumSubElts;
4761 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4762 NumElts, NumSubElts, RotateAmt))
4763 return false;
4764 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4765 NumElts / NumSubElts);
4766
4767 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4768 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4769}
4770
4771// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4772// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4773// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4775 SelectionDAG &DAG,
4776 const RISCVSubtarget &Subtarget) {
4777 SDLoc DL(SVN);
4778
4779 EVT VT = SVN->getValueType(0);
4780 unsigned RotateAmt;
4781 MVT RotateVT;
4782 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4783 return SDValue();
4784
4785 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4786
4787 SDValue Rotate;
4788 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4789 // so canonicalize to vrev8.
4790 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4791 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4792 else
4793 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4794 DAG.getConstant(RotateAmt, DL, RotateVT));
4795
4796 return DAG.getBitcast(VT, Rotate);
4797}
4798
4799// If compiling with an exactly known VLEN, see if we can split a
4800// shuffle on m2 or larger into a small number of m1 sized shuffles
4801// which write each destination registers exactly once.
4803 SelectionDAG &DAG,
4804 const RISCVSubtarget &Subtarget) {
4805 SDLoc DL(SVN);
4806 MVT VT = SVN->getSimpleValueType(0);
4807 SDValue V1 = SVN->getOperand(0);
4808 SDValue V2 = SVN->getOperand(1);
4809 ArrayRef<int> Mask = SVN->getMask();
4810 unsigned NumElts = VT.getVectorNumElements();
4811
4812 // If we don't know exact data layout, not much we can do. If this
4813 // is already m1 or smaller, no point in splitting further.
4814 const auto VLen = Subtarget.getRealVLen();
4815 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4816 return SDValue();
4817
4818 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4819 // expansion for.
4820 unsigned RotateAmt;
4821 MVT RotateVT;
4822 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4823 return SDValue();
4824
4825 MVT ElemVT = VT.getVectorElementType();
4826 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4827 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4828
4830 OutMasks(VRegsPerSrc, {-1, {}});
4831
4832 // Check if our mask can be done as a 1-to-1 mapping from source
4833 // to destination registers in the group without needing to
4834 // write each destination more than once.
4835 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4836 int DstVecIdx = DstIdx / ElemsPerVReg;
4837 int DstSubIdx = DstIdx % ElemsPerVReg;
4838 int SrcIdx = Mask[DstIdx];
4839 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4840 continue;
4841 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4842 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4843 if (OutMasks[DstVecIdx].first == -1)
4844 OutMasks[DstVecIdx].first = SrcVecIdx;
4845 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4846 // Note: This case could easily be handled by keeping track of a chain
4847 // of source values and generating two element shuffles below. This is
4848 // less an implementation question, and more a profitability one.
4849 return SDValue();
4850
4851 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4852 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4853 }
4854
4855 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4856 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4857 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4858 assert(M1VT == getLMUL1VT(M1VT));
4859 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4860 SDValue Vec = DAG.getUNDEF(ContainerVT);
4861 // The following semantically builds up a fixed length concat_vector
4862 // of the component shuffle_vectors. We eagerly lower to scalable here
4863 // to avoid DAG combining it back to a large shuffle_vector again.
4864 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4865 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4866 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4867 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4868 if (SrcVecIdx == -1)
4869 continue;
4870 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4871 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4872 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4873 DAG.getVectorIdxConstant(ExtractIdx, DL));
4874 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4875 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4876 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4877 unsigned InsertIdx = DstVecIdx * NumOpElts;
4878 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4879 DAG.getVectorIdxConstant(InsertIdx, DL));
4880 }
4881 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4882}
4883
4885 const RISCVSubtarget &Subtarget) {
4886 SDValue V1 = Op.getOperand(0);
4887 SDValue V2 = Op.getOperand(1);
4888 SDLoc DL(Op);
4889 MVT XLenVT = Subtarget.getXLenVT();
4890 MVT VT = Op.getSimpleValueType();
4891 unsigned NumElts = VT.getVectorNumElements();
4892 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4893
4894 if (VT.getVectorElementType() == MVT::i1) {
4895 // Lower to a vror.vi of a larger element type if possible before we promote
4896 // i1s to i8s.
4897 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4898 return V;
4899 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4900 return V;
4901
4902 // Promote i1 shuffle to i8 shuffle.
4903 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4904 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4905 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4906 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4907 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4908 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4909 ISD::SETNE);
4910 }
4911
4912 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4913
4914 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4915
4916 if (SVN->isSplat()) {
4917 const int Lane = SVN->getSplatIndex();
4918 if (Lane >= 0) {
4919 MVT SVT = VT.getVectorElementType();
4920
4921 // Turn splatted vector load into a strided load with an X0 stride.
4922 SDValue V = V1;
4923 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4924 // with undef.
4925 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4926 int Offset = Lane;
4927 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4928 int OpElements =
4929 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4930 V = V.getOperand(Offset / OpElements);
4931 Offset %= OpElements;
4932 }
4933
4934 // We need to ensure the load isn't atomic or volatile.
4935 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4936 auto *Ld = cast<LoadSDNode>(V);
4937 Offset *= SVT.getStoreSize();
4938 SDValue NewAddr = DAG.getMemBasePlusOffset(
4939 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4940
4941 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4942 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4943 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4944 SDValue IntID =
4945 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4946 SDValue Ops[] = {Ld->getChain(),
4947 IntID,
4948 DAG.getUNDEF(ContainerVT),
4949 NewAddr,
4950 DAG.getRegister(RISCV::X0, XLenVT),
4951 VL};
4952 SDValue NewLoad = DAG.getMemIntrinsicNode(
4953 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4955 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4956 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4957 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4958 }
4959
4960 // Otherwise use a scalar load and splat. This will give the best
4961 // opportunity to fold a splat into the operation. ISel can turn it into
4962 // the x0 strided load if we aren't able to fold away the select.
4963 if (SVT.isFloatingPoint())
4964 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4965 Ld->getPointerInfo().getWithOffset(Offset),
4966 Ld->getOriginalAlign(),
4967 Ld->getMemOperand()->getFlags());
4968 else
4969 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4970 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4971 Ld->getOriginalAlign(),
4972 Ld->getMemOperand()->getFlags());
4974
4975 unsigned Opc =
4977 SDValue Splat =
4978 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4979 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4980 }
4981
4982 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4983 assert(Lane < (int)NumElts && "Unexpected lane!");
4984 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4985 V1, DAG.getConstant(Lane, DL, XLenVT),
4986 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4987 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4988 }
4989 }
4990
4991 // For exact VLEN m2 or greater, try to split to m1 operations if we
4992 // can split cleanly.
4993 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4994 return V;
4995
4996 ArrayRef<int> Mask = SVN->getMask();
4997
4998 if (SDValue V =
4999 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5000 return V;
5001
5002 if (SDValue V =
5003 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5004 return V;
5005
5006 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5007 // available.
5008 if (Subtarget.hasStdExtZvkb())
5009 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5010 return V;
5011
5012 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5013 // be undef which can be handled with a single SLIDEDOWN/UP.
5014 int LoSrc, HiSrc;
5015 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5016 if (Rotation > 0) {
5017 SDValue LoV, HiV;
5018 if (LoSrc >= 0) {
5019 LoV = LoSrc == 0 ? V1 : V2;
5020 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5021 }
5022 if (HiSrc >= 0) {
5023 HiV = HiSrc == 0 ? V1 : V2;
5024 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5025 }
5026
5027 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5028 // to slide LoV up by (NumElts - Rotation).
5029 unsigned InvRotate = NumElts - Rotation;
5030
5031 SDValue Res = DAG.getUNDEF(ContainerVT);
5032 if (HiV) {
5033 // Even though we could use a smaller VL, don't to avoid a vsetivli
5034 // toggle.
5035 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5036 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5037 }
5038 if (LoV)
5039 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5040 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5042
5043 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5044 }
5045
5046 // If this is a deinterleave and we can widen the vector, then we can use
5047 // vnsrl to deinterleave.
5048 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5049 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5050 Subtarget, DAG);
5051 }
5052
5053 if (SDValue V =
5054 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5055 return V;
5056
5057 // Detect an interleave shuffle and lower to
5058 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5059 int EvenSrc, OddSrc;
5060 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5061 // Extract the halves of the vectors.
5062 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5063
5064 int Size = Mask.size();
5065 SDValue EvenV, OddV;
5066 assert(EvenSrc >= 0 && "Undef source?");
5067 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5068 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5069 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5070
5071 assert(OddSrc >= 0 && "Undef source?");
5072 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5073 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5074 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5075
5076 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5077 }
5078
5079
5080 // Handle any remaining single source shuffles
5081 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5082 if (V2.isUndef()) {
5083 // We might be able to express the shuffle as a bitrotate. But even if we
5084 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5085 // shifts and a vor will have a higher throughput than a vrgather.
5086 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5087 return V;
5088
5089 if (VT.getScalarSizeInBits() == 8 &&
5090 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5091 // On such a vector we're unable to use i8 as the index type.
5092 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5093 // may involve vector splitting if we're already at LMUL=8, or our
5094 // user-supplied maximum fixed-length LMUL.
5095 return SDValue();
5096 }
5097
5098 // Base case for the two operand recursion below - handle the worst case
5099 // single source shuffle.
5100 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5101 MVT IndexVT = VT.changeTypeToInteger();
5102 // Since we can't introduce illegal index types at this stage, use i16 and
5103 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5104 // than XLenVT.
5105 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5106 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5107 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5108 }
5109
5110 // If the mask allows, we can do all the index computation in 16 bits. This
5111 // requires less work and less register pressure at high LMUL, and creates
5112 // smaller constants which may be cheaper to materialize.
5113 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5114 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5115 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5116 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5117 }
5118
5119 MVT IndexContainerVT =
5120 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5121
5122 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5123 SmallVector<SDValue> GatherIndicesLHS;
5124 for (int MaskIndex : Mask) {
5125 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5126 GatherIndicesLHS.push_back(IsLHSIndex
5127 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5128 : DAG.getUNDEF(XLenVT));
5129 }
5130 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5131 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5132 Subtarget);
5133 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5134 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5135 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5136 }
5137
5138 // By default we preserve the original operand order, and use a mask to
5139 // select LHS as true and RHS as false. However, since RVV vector selects may
5140 // feature splats but only on the LHS, we may choose to invert our mask and
5141 // instead select between RHS and LHS.
5142 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5143
5144 // Detect shuffles which can be re-expressed as vector selects; these are
5145 // shuffles in which each element in the destination is taken from an element
5146 // at the corresponding index in either source vectors.
5147 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5148 int MaskIndex = MaskIdx.value();
5149 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5150 });
5151 if (IsSelect) {
5152 // Now construct the mask that will be used by the vselect operation.
5153 SmallVector<SDValue> MaskVals;
5154 for (int MaskIndex : Mask) {
5155 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5156 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5157 }
5158
5159 if (SwapOps)
5160 std::swap(V1, V2);
5161
5162 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5163 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5164 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5165 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5166 }
5167
5168 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5169 // merged with a second vrgather.
5170 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5171 SmallVector<SDValue> MaskVals;
5172
5173 // Now construct the mask that will be used by the blended vrgather operation.
5174 // Cconstruct the appropriate indices into each vector.
5175 for (int MaskIndex : Mask) {
5176 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5177 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5178 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5179 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5180 ? MaskIndex : -1);
5181 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5182 }
5183
5184 if (SwapOps) {
5185 std::swap(V1, V2);
5186 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5187 }
5188
5189 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5190 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5191 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5192
5193 // Recursively invoke lowering for each operand if we had two
5194 // independent single source shuffles, and then combine the result via a
5195 // vselect. Note that the vselect will likely be folded back into the
5196 // second permute (vrgather, or other) by the post-isel combine.
5197 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5198 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5199 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5200}
5201
5203 // Support splats for any type. These should type legalize well.
5204 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5205 return true;
5206
5207 // Only support legal VTs for other shuffles for now.
5208 if (!isTypeLegal(VT))
5209 return false;
5210
5211 MVT SVT = VT.getSimpleVT();
5212
5213 // Not for i1 vectors.
5214 if (SVT.getScalarType() == MVT::i1)
5215 return false;
5216
5217 int Dummy1, Dummy2;
5218 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5219 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5220}
5221
5222// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5223// the exponent.
5224SDValue
5225RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5226 SelectionDAG &DAG) const {
5227 MVT VT = Op.getSimpleValueType();
5228 unsigned EltSize = VT.getScalarSizeInBits();
5229 SDValue Src = Op.getOperand(0);
5230 SDLoc DL(Op);
5231 MVT ContainerVT = VT;
5232
5233 SDValue Mask, VL;
5234 if (Op->isVPOpcode()) {
5235 Mask = Op.getOperand(1);
5236 if (VT.isFixedLengthVector())
5237 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5238 Subtarget);
5239 VL = Op.getOperand(2);
5240 }
5241
5242 // We choose FP type that can represent the value if possible. Otherwise, we
5243 // use rounding to zero conversion for correct exponent of the result.
5244 // TODO: Use f16 for i8 when possible?
5245 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5246 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5247 FloatEltVT = MVT::f32;
5248 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5249
5250 // Legal types should have been checked in the RISCVTargetLowering
5251 // constructor.
5252 // TODO: Splitting may make sense in some cases.
5253 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5254 "Expected legal float type!");
5255
5256 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5257 // The trailing zero count is equal to log2 of this single bit value.
5258 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5259 SDValue Neg = DAG.getNegative(Src, DL, VT);
5260 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5261 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5262 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5263 Src, Mask, VL);
5264 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5265 }
5266
5267 // We have a legal FP type, convert to it.
5268 SDValue FloatVal;
5269 if (FloatVT.bitsGT(VT)) {
5270 if (Op->isVPOpcode())
5271 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5272 else
5273 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5274 } else {
5275 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5276 if (VT.isFixedLengthVector()) {
5277 ContainerVT = getContainerForFixedLengthVector(VT);
5278 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5279 }
5280 if (!Op->isVPOpcode())
5281 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5282 SDValue RTZRM =
5284 MVT ContainerFloatVT =
5285 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5286 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5287 Src, Mask, RTZRM, VL);
5288 if (VT.isFixedLengthVector())
5289 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5290 }
5291 // Bitcast to integer and shift the exponent to the LSB.
5292 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5293 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5294 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5295
5296 SDValue Exp;
5297 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5298 if (Op->isVPOpcode()) {
5299 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5300 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5301 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5302 } else {
5303 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5304 DAG.getConstant(ShiftAmt, DL, IntVT));
5305 if (IntVT.bitsLT(VT))
5306 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5307 else if (IntVT.bitsGT(VT))
5308 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5309 }
5310
5311 // The exponent contains log2 of the value in biased form.
5312 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5313 // For trailing zeros, we just need to subtract the bias.
5314 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5315 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5316 DAG.getConstant(ExponentBias, DL, VT));
5317 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5318 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5319 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5320
5321 // For leading zeros, we need to remove the bias and convert from log2 to
5322 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5323 unsigned Adjust = ExponentBias + (EltSize - 1);
5324 SDValue Res;
5325 if (Op->isVPOpcode())
5326 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5327 Mask, VL);
5328 else
5329 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5330
5331 // The above result with zero input equals to Adjust which is greater than
5332 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5333 if (Op.getOpcode() == ISD::CTLZ)
5334 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5335 else if (Op.getOpcode() == ISD::VP_CTLZ)
5336 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5337 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5338 return Res;
5339}
5340
5341// While RVV has alignment restrictions, we should always be able to load as a
5342// legal equivalently-sized byte-typed vector instead. This method is
5343// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5344// the load is already correctly-aligned, it returns SDValue().
5345SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5346 SelectionDAG &DAG) const {
5347 auto *Load = cast<LoadSDNode>(Op);
5348 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5349
5351 Load->getMemoryVT(),
5352 *Load->getMemOperand()))
5353 return SDValue();
5354
5355 SDLoc DL(Op);
5356 MVT VT = Op.getSimpleValueType();
5357 unsigned EltSizeBits = VT.getScalarSizeInBits();
5358 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5359 "Unexpected unaligned RVV load type");
5360 MVT NewVT =
5361 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5362 assert(NewVT.isValid() &&
5363 "Expecting equally-sized RVV vector types to be legal");
5364 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5365 Load->getPointerInfo(), Load->getOriginalAlign(),
5366 Load->getMemOperand()->getFlags());
5367 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5368}
5369
5370// While RVV has alignment restrictions, we should always be able to store as a
5371// legal equivalently-sized byte-typed vector instead. This method is
5372// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5373// returns SDValue() if the store is already correctly aligned.
5374SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5375 SelectionDAG &DAG) const {
5376 auto *Store = cast<StoreSDNode>(Op);
5377 assert(Store && Store->getValue().getValueType().isVector() &&
5378 "Expected vector store");
5379
5381 Store->getMemoryVT(),
5382 *Store->getMemOperand()))
5383 return SDValue();
5384
5385 SDLoc DL(Op);
5386 SDValue StoredVal = Store->getValue();
5387 MVT VT = StoredVal.getSimpleValueType();
5388 unsigned EltSizeBits = VT.getScalarSizeInBits();
5389 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5390 "Unexpected unaligned RVV store type");
5391 MVT NewVT =
5392 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5393 assert(NewVT.isValid() &&
5394 "Expecting equally-sized RVV vector types to be legal");
5395 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5396 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5397 Store->getPointerInfo(), Store->getOriginalAlign(),
5398 Store->getMemOperand()->getFlags());
5399}
5400
5402 const RISCVSubtarget &Subtarget) {
5403 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5404
5405 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5406
5407 // All simm32 constants should be handled by isel.
5408 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5409 // this check redundant, but small immediates are common so this check
5410 // should have better compile time.
5411 if (isInt<32>(Imm))
5412 return Op;
5413
5414 // We only need to cost the immediate, if constant pool lowering is enabled.
5415 if (!Subtarget.useConstantPoolForLargeInts())
5416 return Op;
5417
5419 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5420 return Op;
5421
5422 // Optimizations below are disabled for opt size. If we're optimizing for
5423 // size, use a constant pool.
5424 if (DAG.shouldOptForSize())
5425 return SDValue();
5426
5427 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5428 // that if it will avoid a constant pool.
5429 // It will require an extra temporary register though.
5430 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5431 // low and high 32 bits are the same and bit 31 and 63 are set.
5432 unsigned ShiftAmt, AddOpc;
5433 RISCVMatInt::InstSeq SeqLo =
5434 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5435 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5436 return Op;
5437
5438 return SDValue();
5439}
5440
5442 const RISCVSubtarget &Subtarget) {
5443 SDLoc dl(Op);
5444 AtomicOrdering FenceOrdering =
5445 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5446 SyncScope::ID FenceSSID =
5447 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5448
5449 if (Subtarget.hasStdExtZtso()) {
5450 // The only fence that needs an instruction is a sequentially-consistent
5451 // cross-thread fence.
5452 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5453 FenceSSID == SyncScope::System)
5454 return Op;
5455
5456 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5457 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5458 }
5459
5460 // singlethread fences only synchronize with signal handlers on the same
5461 // thread and thus only need to preserve instruction order, not actually
5462 // enforce memory ordering.
5463 if (FenceSSID == SyncScope::SingleThread)
5464 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5465 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5466
5467 return Op;
5468}
5469
5471 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5472 "Unexpected custom legalisation");
5473
5474 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5475 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5476 SDLoc DL(Op);
5477 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5478 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5479 SDValue Result =
5480 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5481
5482 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5483 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5484 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5485 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5486 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5487 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5488 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5489}
5490
5492 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5493 "Unexpected custom legalisation");
5494
5495 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5496 // sign extend allows overflow of the lower 32 bits to be detected on
5497 // the promoted size.
5498 SDLoc DL(Op);
5499 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5500 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5501 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5502 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5503}
5504
5505// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5507 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5508 "Unexpected custom legalisation");
5509 if (isa<ConstantSDNode>(Op.getOperand(1)))
5510 return SDValue();
5511
5512 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5513 SDLoc DL(Op);
5514 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5515 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5516 SDValue WideOp =
5517 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5518 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5519 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5520 DAG.getValueType(MVT::i32));
5521 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5522 ISD::SETNE);
5523 return DAG.getMergeValues({Res, Ovf}, DL);
5524}
5525
5526// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5528 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5529 "Unexpected custom legalisation");
5530 SDLoc DL(Op);
5531 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5532 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5533 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5534 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5535 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5536 DAG.getValueType(MVT::i32));
5537 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5538 ISD::SETNE);
5539 return DAG.getMergeValues({Res, Ovf}, DL);
5540}
5541
5542SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5543 SelectionDAG &DAG) const {
5544 SDLoc DL(Op);
5545 MVT VT = Op.getSimpleValueType();
5546 MVT XLenVT = Subtarget.getXLenVT();
5547 unsigned Check = Op.getConstantOperandVal(1);
5548 unsigned TDCMask = 0;
5549 if (Check & fcSNan)
5550 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5551 if (Check & fcQNan)
5552 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5553 if (Check & fcPosInf)
5555 if (Check & fcNegInf)
5557 if (Check & fcPosNormal)
5559 if (Check & fcNegNormal)
5561 if (Check & fcPosSubnormal)
5563 if (Check & fcNegSubnormal)
5565 if (Check & fcPosZero)
5566 TDCMask |= RISCV::FPMASK_Positive_Zero;
5567 if (Check & fcNegZero)
5568 TDCMask |= RISCV::FPMASK_Negative_Zero;
5569
5570 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5571
5572 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5573
5574 if (VT.isVector()) {
5575 SDValue Op0 = Op.getOperand(0);
5576 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5577
5578 if (VT.isScalableVector()) {
5580 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5581 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5582 Mask = Op.getOperand(2);
5583 VL = Op.getOperand(3);
5584 }
5585 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5586 VL, Op->getFlags());
5587 if (IsOneBitMask)
5588 return DAG.getSetCC(DL, VT, FPCLASS,
5589 DAG.getConstant(TDCMask, DL, DstVT),
5591 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5592 DAG.getConstant(TDCMask, DL, DstVT));
5593 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5594 ISD::SETNE);
5595 }
5596
5597 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5599 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5600 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5601 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5602 Mask = Op.getOperand(2);
5603 MVT MaskContainerVT =
5604 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5605 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5606 VL = Op.getOperand(3);
5607 }
5608 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5609
5610 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5611 Mask, VL, Op->getFlags());
5612
5613 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5614 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5615 if (IsOneBitMask) {
5616 SDValue VMSEQ =
5617 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5618 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5619 DAG.getUNDEF(ContainerVT), Mask, VL});
5620 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5621 }
5622 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5623 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5624
5625 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5626 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5627 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5628
5629 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5630 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5631 DAG.getUNDEF(ContainerVT), Mask, VL});
5632 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5633 }
5634
5635 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5636 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5637 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5639 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5640}
5641
5642// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5643// operations propagate nans.
5645 const RISCVSubtarget &Subtarget) {
5646 SDLoc DL(Op);
5647 MVT VT = Op.getSimpleValueType();
5648
5649 SDValue X = Op.getOperand(0);
5650 SDValue Y = Op.getOperand(1);
5651
5652 if (!VT.isVector()) {
5653 MVT XLenVT = Subtarget.getXLenVT();
5654
5655 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5656 // ensures that when one input is a nan, the other will also be a nan
5657 // allowing the nan to propagate. If both inputs are nan, this will swap the
5658 // inputs which is harmless.
5659
5660 SDValue NewY = Y;
5661 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5662 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5663 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5664 }
5665
5666 SDValue NewX = X;
5667 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5668 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5669 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5670 }
5671
5672 unsigned Opc =
5673 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5674 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5675 }
5676
5677 // Check no NaNs before converting to fixed vector scalable.
5678 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5679 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5680
5681 MVT ContainerVT = VT;
5682 if (VT.isFixedLengthVector()) {
5683 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5684 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5685 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5686 }
5687
5688 SDValue Mask, VL;
5689 if (Op->isVPOpcode()) {
5690 Mask = Op.getOperand(2);
5691 if (VT.isFixedLengthVector())
5692 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5693 Subtarget);
5694 VL = Op.getOperand(3);
5695 } else {
5696 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5697 }
5698
5699 SDValue NewY = Y;
5700 if (!XIsNeverNan) {
5701 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5702 {X, X, DAG.getCondCode(ISD::SETOEQ),
5703 DAG.getUNDEF(ContainerVT), Mask, VL});
5704 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5705 DAG.getUNDEF(ContainerVT), VL);
5706 }
5707
5708 SDValue NewX = X;
5709 if (!YIsNeverNan) {
5710 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5711 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5712 DAG.getUNDEF(ContainerVT), Mask, VL});
5713 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5714 DAG.getUNDEF(ContainerVT), VL);
5715 }
5716
5717 unsigned Opc =
5718 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5721 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5722 DAG.getUNDEF(ContainerVT), Mask, VL);
5723 if (VT.isFixedLengthVector())
5724 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5725 return Res;
5726}
5727
5728/// Get a RISC-V target specified VL op for a given SDNode.
5729static unsigned getRISCVVLOp(SDValue Op) {
5730#define OP_CASE(NODE) \
5731 case ISD::NODE: \
5732 return RISCVISD::NODE##_VL;
5733#define VP_CASE(NODE) \
5734 case ISD::VP_##NODE: \
5735 return RISCVISD::NODE##_VL;
5736 // clang-format off
5737 switch (Op.getOpcode()) {
5738 default:
5739 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5740 OP_CASE(ADD)
5741 OP_CASE(SUB)
5742 OP_CASE(MUL)
5743 OP_CASE(MULHS)
5744 OP_CASE(MULHU)
5745 OP_CASE(SDIV)
5746 OP_CASE(SREM)
5747 OP_CASE(UDIV)
5748 OP_CASE(UREM)
5749 OP_CASE(SHL)
5750 OP_CASE(SRA)
5751 OP_CASE(SRL)
5752 OP_CASE(ROTL)
5753 OP_CASE(ROTR)
5754 OP_CASE(BSWAP)
5755 OP_CASE(CTTZ)
5756 OP_CASE(CTLZ)
5757 OP_CASE(CTPOP)
5758 OP_CASE(BITREVERSE)
5759 OP_CASE(SADDSAT)
5760 OP_CASE(UADDSAT)
5761 OP_CASE(SSUBSAT)
5762 OP_CASE(USUBSAT)
5763 OP_CASE(AVGFLOORU)
5764 OP_CASE(AVGCEILU)
5765 OP_CASE(FADD)
5766 OP_CASE(FSUB)
5767 OP_CASE(FMUL)
5768 OP_CASE(FDIV)
5769 OP_CASE(FNEG)
5770 OP_CASE(FABS)
5771 OP_CASE(FSQRT)
5772 OP_CASE(SMIN)
5773 OP_CASE(SMAX)
5774 OP_CASE(UMIN)
5775 OP_CASE(UMAX)
5776 OP_CASE(STRICT_FADD)
5777 OP_CASE(STRICT_FSUB)
5778 OP_CASE(STRICT_FMUL)
5779 OP_CASE(STRICT_FDIV)
5780 OP_CASE(STRICT_FSQRT)
5781 VP_CASE(ADD) // VP_ADD
5782 VP_CASE(SUB) // VP_SUB
5783 VP_CASE(MUL) // VP_MUL
5784 VP_CASE(SDIV) // VP_SDIV
5785 VP_CASE(SREM) // VP_SREM
5786 VP_CASE(UDIV) // VP_UDIV
5787 VP_CASE(UREM) // VP_UREM
5788 VP_CASE(SHL) // VP_SHL
5789 VP_CASE(FADD) // VP_FADD
5790 VP_CASE(FSUB) // VP_FSUB
5791 VP_CASE(FMUL) // VP_FMUL
5792 VP_CASE(FDIV) // VP_FDIV
5793 VP_CASE(FNEG) // VP_FNEG
5794 VP_CASE(FABS) // VP_FABS
5795 VP_CASE(SMIN) // VP_SMIN
5796 VP_CASE(SMAX) // VP_SMAX
5797 VP_CASE(UMIN) // VP_UMIN
5798 VP_CASE(UMAX) // VP_UMAX
5799 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5800 VP_CASE(SETCC) // VP_SETCC
5801 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5802 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5803 VP_CASE(BITREVERSE) // VP_BITREVERSE
5804 VP_CASE(SADDSAT) // VP_SADDSAT
5805 VP_CASE(UADDSAT) // VP_UADDSAT
5806 VP_CASE(SSUBSAT) // VP_SSUBSAT
5807 VP_CASE(USUBSAT) // VP_USUBSAT
5808 VP_CASE(BSWAP) // VP_BSWAP
5809 VP_CASE(CTLZ) // VP_CTLZ
5810 VP_CASE(CTTZ) // VP_CTTZ
5811 VP_CASE(CTPOP) // VP_CTPOP
5813 case ISD::VP_CTLZ_ZERO_UNDEF:
5814 return RISCVISD::CTLZ_VL;
5816 case ISD::VP_CTTZ_ZERO_UNDEF:
5817 return RISCVISD::CTTZ_VL;
5818 case ISD::FMA:
5819 case ISD::VP_FMA:
5820 return RISCVISD::VFMADD_VL;
5821 case ISD::STRICT_FMA:
5823 case ISD::AND:
5824 case ISD::VP_AND:
5825 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5826 return RISCVISD::VMAND_VL;
5827 return RISCVISD::AND_VL;
5828 case ISD::OR:
5829 case ISD::VP_OR:
5830 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5831 return RISCVISD::VMOR_VL;
5832 return RISCVISD::OR_VL;
5833 case ISD::XOR:
5834 case ISD::VP_XOR:
5835 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5836 return RISCVISD::VMXOR_VL;
5837 return RISCVISD::XOR_VL;
5838 case ISD::VP_SELECT:
5839 case ISD::VP_MERGE:
5840 return RISCVISD::VMERGE_VL;
5841 case ISD::VP_ASHR:
5842 return RISCVISD::SRA_VL;
5843 case ISD::VP_LSHR:
5844 return RISCVISD::SRL_VL;
5845 case ISD::VP_SQRT:
5846 return RISCVISD::FSQRT_VL;
5847 case ISD::VP_SIGN_EXTEND:
5848 return RISCVISD::VSEXT_VL;
5849 case ISD::VP_ZERO_EXTEND:
5850 return RISCVISD::VZEXT_VL;
5851 case ISD::VP_FP_TO_SINT:
5853 case ISD::VP_FP_TO_UINT:
5855 case ISD::FMINNUM:
5856 case ISD::VP_FMINNUM:
5857 return RISCVISD::VFMIN_VL;
5858 case ISD::FMAXNUM:
5859 case ISD::VP_FMAXNUM:
5860 return RISCVISD::VFMAX_VL;
5861 case ISD::LRINT:
5862 case ISD::VP_LRINT:
5863 case ISD::LLRINT:
5864 case ISD::VP_LLRINT:
5866 }
5867 // clang-format on
5868#undef OP_CASE
5869#undef VP_CASE
5870}
5871
5872/// Return true if a RISC-V target specified op has a merge operand.
5873static bool hasMergeOp(unsigned Opcode) {
5874 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5876 "not a RISC-V target specific op");
5878 126 &&
5881 21 &&
5882 "adding target specific op should update this function");
5883 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5884 return true;
5885 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5886 return true;
5887 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5888 return true;
5889 if (Opcode == RISCVISD::SETCC_VL)
5890 return true;
5891 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5892 return true;
5893 if (Opcode == RISCVISD::VMERGE_VL)
5894 return true;
5895 return false;
5896}
5897
5898/// Return true if a RISC-V target specified op has a mask operand.
5899static bool hasMaskOp(unsigned Opcode) {
5900 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5902 "not a RISC-V target specific op");
5904 126 &&
5907 21 &&
5908 "adding target specific op should update this function");
5909 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5910 return true;
5911 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5912 return true;
5913 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5915 return true;
5916 return false;
5917}
5918
5920 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5921 SDLoc DL(Op);
5922
5925
5926 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5927 if (!Op.getOperand(j).getValueType().isVector()) {
5928 LoOperands[j] = Op.getOperand(j);
5929 HiOperands[j] = Op.getOperand(j);
5930 continue;
5931 }
5932 std::tie(LoOperands[j], HiOperands[j]) =
5933 DAG.SplitVector(Op.getOperand(j), DL);
5934 }
5935
5936 SDValue LoRes =
5937 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5938 SDValue HiRes =
5939 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5940
5941 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5942}
5943
5945 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5946 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5947 SDLoc DL(Op);
5948
5951
5952 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5953 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5954 std::tie(LoOperands[j], HiOperands[j]) =
5955 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5956 continue;
5957 }
5958 if (!Op.getOperand(j).getValueType().isVector()) {
5959 LoOperands[j] = Op.getOperand(j);
5960 HiOperands[j] = Op.getOperand(j);
5961 continue;
5962 }
5963 std::tie(LoOperands[j], HiOperands[j]) =
5964 DAG.SplitVector(Op.getOperand(j), DL);
5965 }
5966
5967 SDValue LoRes =
5968 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5969 SDValue HiRes =
5970 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5971
5972 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5973}
5974
5976 SDLoc DL(Op);
5977
5978 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5979 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5980 auto [EVLLo, EVLHi] =
5981 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5982
5983 SDValue ResLo =
5984 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5985 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5986 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5987 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5988}
5989
5991
5992 assert(Op->isStrictFPOpcode());
5993
5994 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5995
5996 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
5997 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
5998
5999 SDLoc DL(Op);
6000
6003
6004 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6005 if (!Op.getOperand(j).getValueType().isVector()) {
6006 LoOperands[j] = Op.getOperand(j);
6007 HiOperands[j] = Op.getOperand(j);
6008 continue;
6009 }
6010 std::tie(LoOperands[j], HiOperands[j]) =
6011 DAG.SplitVector(Op.getOperand(j), DL);
6012 }
6013
6014 SDValue LoRes =
6015 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6016 HiOperands[0] = LoRes.getValue(1);
6017 SDValue HiRes =
6018 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6019
6020 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6021 LoRes.getValue(0), HiRes.getValue(0));
6022 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6023}
6024
6026 SelectionDAG &DAG) const {
6027 switch (Op.getOpcode()) {
6028 default:
6029 report_fatal_error("unimplemented operand");
6030 case ISD::ATOMIC_FENCE:
6031 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6032 case ISD::GlobalAddress:
6033 return lowerGlobalAddress(Op, DAG);
6034 case ISD::BlockAddress:
6035 return lowerBlockAddress(Op, DAG);
6036 case ISD::ConstantPool:
6037 return lowerConstantPool(Op, DAG);
6038 case ISD::JumpTable:
6039 return lowerJumpTable(Op, DAG);
6041 return lowerGlobalTLSAddress(Op, DAG);
6042 case ISD::Constant:
6043 return lowerConstant(Op, DAG, Subtarget);
6044 case ISD::SELECT:
6045 return lowerSELECT(Op, DAG);
6046 case ISD::BRCOND:
6047 return lowerBRCOND(Op, DAG);
6048 case ISD::VASTART:
6049 return lowerVASTART(Op, DAG);
6050 case ISD::FRAMEADDR:
6051 return lowerFRAMEADDR(Op, DAG);
6052 case ISD::RETURNADDR:
6053 return lowerRETURNADDR(Op, DAG);
6054 case ISD::SADDO:
6055 case ISD::SSUBO:
6056 return lowerSADDO_SSUBO(Op, DAG);
6057 case ISD::SMULO:
6058 return lowerSMULO(Op, DAG);
6059 case ISD::SHL_PARTS:
6060 return lowerShiftLeftParts(Op, DAG);
6061 case ISD::SRA_PARTS:
6062 return lowerShiftRightParts(Op, DAG, true);
6063 case ISD::SRL_PARTS:
6064 return lowerShiftRightParts(Op, DAG, false);
6065 case ISD::ROTL:
6066 case ISD::ROTR:
6067 if (Op.getValueType().isFixedLengthVector()) {
6068 assert(Subtarget.hasStdExtZvkb());
6069 return lowerToScalableOp(Op, DAG);
6070 }
6071 assert(Subtarget.hasVendorXTHeadBb() &&
6072 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6073 "Unexpected custom legalization");
6074 // XTHeadBb only supports rotate by constant.
6075 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6076 return SDValue();
6077 return Op;
6078 case ISD::BITCAST: {
6079 SDLoc DL(Op);
6080 EVT VT = Op.getValueType();
6081 SDValue Op0 = Op.getOperand(0);
6082 EVT Op0VT = Op0.getValueType();
6083 MVT XLenVT = Subtarget.getXLenVT();
6084 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6085 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6086 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6087 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6088 return FPConv;
6089 }
6090 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6091 Subtarget.hasStdExtZfbfmin()) {
6092 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6093 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6094 return FPConv;
6095 }
6096 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6097 Subtarget.hasStdExtFOrZfinx()) {
6098 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6099 SDValue FPConv =
6100 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6101 return FPConv;
6102 }
6103 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6104 SDValue Lo, Hi;
6105 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6106 SDValue RetReg =
6107 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6108 return RetReg;
6109 }
6110
6111 // Consider other scalar<->scalar casts as legal if the types are legal.
6112 // Otherwise expand them.
6113 if (!VT.isVector() && !Op0VT.isVector()) {
6114 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6115 return Op;
6116 return SDValue();
6117 }
6118
6119 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6120 "Unexpected types");
6121
6122 if (VT.isFixedLengthVector()) {
6123 // We can handle fixed length vector bitcasts with a simple replacement
6124 // in isel.
6125 if (Op0VT.isFixedLengthVector())
6126 return Op;
6127 // When bitcasting from scalar to fixed-length vector, insert the scalar
6128 // into a one-element vector of the result type, and perform a vector
6129 // bitcast.
6130 if (!Op0VT.isVector()) {
6131 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6132 if (!isTypeLegal(BVT))
6133 return SDValue();
6134 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6135 DAG.getUNDEF(BVT), Op0,
6136 DAG.getVectorIdxConstant(0, DL)));
6137 }
6138 return SDValue();
6139 }
6140 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6141 // thus: bitcast the vector to a one-element vector type whose element type
6142 // is the same as the result type, and extract the first element.
6143 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6144 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6145 if (!isTypeLegal(BVT))
6146 return SDValue();
6147 SDValue BVec = DAG.getBitcast(BVT, Op0);
6148 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6149 DAG.getVectorIdxConstant(0, DL));
6150 }
6151 return SDValue();
6152 }
6154 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6156 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6158 return LowerINTRINSIC_VOID(Op, DAG);
6159 case ISD::IS_FPCLASS:
6160 return LowerIS_FPCLASS(Op, DAG);
6161 case ISD::BITREVERSE: {
6162 MVT VT = Op.getSimpleValueType();
6163 if (VT.isFixedLengthVector()) {
6164 assert(Subtarget.hasStdExtZvbb());
6165 return lowerToScalableOp(Op, DAG);
6166 }
6167 SDLoc DL(Op);
6168 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6169 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6170 // Expand bitreverse to a bswap(rev8) followed by brev8.
6171 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6172 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6173 }
6174 case ISD::TRUNCATE:
6175 // Only custom-lower vector truncates
6176 if (!Op.getSimpleValueType().isVector())
6177 return Op;
6178 return lowerVectorTruncLike(Op, DAG);
6179 case ISD::ANY_EXTEND:
6180 case ISD::ZERO_EXTEND:
6181 if (Op.getOperand(0).getValueType().isVector() &&
6182 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6183 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6184 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6185 case ISD::SIGN_EXTEND:
6186 if (Op.getOperand(0).getValueType().isVector() &&
6187 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6188 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6189 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6191 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6193 return lowerINSERT_VECTOR_ELT(Op, DAG);
6195 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6196 case ISD::SCALAR_TO_VECTOR: {
6197 MVT VT = Op.getSimpleValueType();
6198 SDLoc DL(Op);
6199 SDValue Scalar = Op.getOperand(0);
6200 if (VT.getVectorElementType() == MVT::i1) {
6201 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6202 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6203 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6204 }
6205 MVT ContainerVT = VT;
6206 if (VT.isFixedLengthVector())
6207 ContainerVT = getContainerForFixedLengthVector(VT);
6208 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6209 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6210 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6211 DAG.getUNDEF(ContainerVT), Scalar, VL);
6212 if (VT.isFixedLengthVector())
6213 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6214 return V;
6215 }
6216 case ISD::VSCALE: {
6217 MVT XLenVT = Subtarget.getXLenVT();
6218 MVT VT = Op.getSimpleValueType();
6219 SDLoc DL(Op);
6220 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6221 // We define our scalable vector types for lmul=1 to use a 64 bit known
6222 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6223 // vscale as VLENB / 8.
6224 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6225 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6226 report_fatal_error("Support for VLEN==32 is incomplete.");
6227 // We assume VLENB is a multiple of 8. We manually choose the best shift
6228 // here because SimplifyDemandedBits isn't always able to simplify it.
6229 uint64_t Val = Op.getConstantOperandVal(0);
6230 if (isPowerOf2_64(Val)) {
6231 uint64_t Log2 = Log2_64(Val);
6232 if (Log2 < 3)
6233 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6234 DAG.getConstant(3 - Log2, DL, VT));
6235 else if (Log2 > 3)
6236 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6237 DAG.getConstant(Log2 - 3, DL, XLenVT));
6238 } else if ((Val % 8) == 0) {
6239 // If the multiplier is a multiple of 8, scale it down to avoid needing
6240 // to shift the VLENB value.
6241 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6242 DAG.getConstant(Val / 8, DL, XLenVT));
6243 } else {
6244 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6245 DAG.getConstant(3, DL, XLenVT));
6246 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6247 DAG.getConstant(Val, DL, XLenVT));
6248 }
6249 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6250 }
6251 case ISD::FPOWI: {
6252 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6253 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6254 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6255 Op.getOperand(1).getValueType() == MVT::i32) {
6256 SDLoc DL(Op);
6257 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6258 SDValue Powi =
6259 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6260 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6261 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6262 }
6263 return SDValue();
6264 }
6265 case ISD::FMAXIMUM:
6266 case ISD::FMINIMUM:
6267 if (Op.getValueType() == MVT::nxv32f16 &&
6268 (Subtarget.hasVInstructionsF16Minimal() &&
6269 !Subtarget.hasVInstructionsF16()))
6270 return SplitVectorOp(Op, DAG);
6271 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6272 case ISD::FP_EXTEND: {
6273 SDLoc DL(Op);
6274 EVT VT = Op.getValueType();
6275 SDValue Op0 = Op.getOperand(0);
6276 EVT Op0VT = Op0.getValueType();
6277 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6278 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6279 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6280 SDValue FloatVal =
6281 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6282 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6283 }
6284
6285 if (!Op.getValueType().isVector())
6286 return Op;
6287 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6288 }
6289 case ISD::FP_ROUND: {
6290 SDLoc DL(Op);
6291 EVT VT = Op.getValueType();
6292 SDValue Op0 = Op.getOperand(0);
6293 EVT Op0VT = Op0.getValueType();
6294 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6295 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6296 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6297 Subtarget.hasStdExtDOrZdinx()) {
6298 SDValue FloatVal =
6299 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6300 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6301 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6302 }
6303
6304 if (!Op.getValueType().isVector())
6305 return Op;
6306 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6307 }
6310 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6311 case ISD::SINT_TO_FP:
6312 case ISD::UINT_TO_FP:
6313 if (Op.getValueType().isVector() &&
6314 Op.getValueType().getScalarType() == MVT::f16 &&
6315 (Subtarget.hasVInstructionsF16Minimal() &&
6316 !Subtarget.hasVInstructionsF16())) {
6317 if (Op.getValueType() == MVT::nxv32f16)
6318 return SplitVectorOp(Op, DAG);
6319 // int -> f32
6320 SDLoc DL(Op);
6321 MVT NVT =
6322 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6323 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6324 // f32 -> f16
6325 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6326 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6327 }
6328 [[fallthrough]];
6329 case ISD::FP_TO_SINT:
6330 case ISD::FP_TO_UINT:
6331 if (SDValue Op1 = Op.getOperand(0);
6332 Op1.getValueType().isVector() &&
6333 Op1.getValueType().getScalarType() == MVT::f16 &&
6334 (Subtarget.hasVInstructionsF16Minimal() &&
6335 !Subtarget.hasVInstructionsF16())) {
6336 if (Op1.getValueType() == MVT::nxv32f16)
6337 return SplitVectorOp(Op, DAG);
6338 // f16 -> f32
6339 SDLoc DL(Op);
6340 MVT NVT = MVT::getVectorVT(MVT::f32,
6341 Op1.getValueType().getVectorElementCount());
6342 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6343 // f32 -> int
6344 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6345 }
6346 [[fallthrough]];
6351 // RVV can only do fp<->int conversions to types half/double the size as
6352 // the source. We custom-lower any conversions that do two hops into
6353 // sequences.
6354 MVT VT = Op.getSimpleValueType();
6355 if (!VT.isVector())
6356 return Op;
6357 SDLoc DL(Op);
6358 bool IsStrict = Op->isStrictFPOpcode();
6359 SDValue Src = Op.getOperand(0 + IsStrict);
6360 MVT EltVT = VT.getVectorElementType();
6361 MVT SrcVT = Src.getSimpleValueType();
6362 MVT SrcEltVT = SrcVT.getVectorElementType();
6363 unsigned EltSize = EltVT.getSizeInBits();
6364 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6365 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6366 "Unexpected vector element types");
6367
6368 bool IsInt2FP = SrcEltVT.isInteger();
6369 // Widening conversions
6370 if (EltSize > (2 * SrcEltSize)) {
6371 if (IsInt2FP) {
6372 // Do a regular integer sign/zero extension then convert to float.
6373 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6375 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6376 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6379 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6380 if (IsStrict)
6381 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6382 Op.getOperand(0), Ext);
6383 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6384 }
6385 // FP2Int
6386 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6387 // Do one doubling fp_extend then complete the operation by converting
6388 // to int.
6389 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6390 if (IsStrict) {
6391 auto [FExt, Chain] =
6392 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6393 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6394 }
6395 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6396 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6397 }
6398
6399 // Narrowing conversions
6400 if (SrcEltSize > (2 * EltSize)) {
6401 if (IsInt2FP) {
6402 // One narrowing int_to_fp, then an fp_round.
6403 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6404 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6405 if (IsStrict) {
6406 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6407 DAG.getVTList(InterimFVT, MVT::Other),
6408 Op.getOperand(0), Src);
6409 SDValue Chain = Int2FP.getValue(1);
6410 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6411 }
6412 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6413 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6414 }
6415 // FP2Int
6416 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6417 // representable by the integer, the result is poison.
6418 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6420 if (IsStrict) {
6421 SDValue FP2Int =
6422 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6423 Op.getOperand(0), Src);
6424 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6425 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6426 }
6427 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6428 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6429 }
6430
6431 // Scalable vectors can exit here. Patterns will handle equally-sized
6432 // conversions halving/doubling ones.
6433 if (!VT.isFixedLengthVector())
6434 return Op;
6435
6436 // For fixed-length vectors we lower to a custom "VL" node.
6437 unsigned RVVOpc = 0;
6438 switch (Op.getOpcode()) {
6439 default:
6440 llvm_unreachable("Impossible opcode");
6441 case ISD::FP_TO_SINT:
6443 break;
6444 case ISD::FP_TO_UINT:
6446 break;
6447 case ISD::SINT_TO_FP:
6448 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6449 break;
6450 case ISD::UINT_TO_FP:
6451 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6452 break;
6455 break;
6458 break;
6461 break;
6464 break;
6465 }
6466
6467 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6468 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6469 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6470 "Expected same element count");
6471
6472 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6473
6474 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6475 if (IsStrict) {
6476 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6477 Op.getOperand(0), Src, Mask, VL);
6478 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6479 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6480 }
6481 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6482 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6483 }
6486 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6487 case ISD::FP_TO_BF16: {
6488 // Custom lower to ensure the libcall return is passed in an FPR on hard
6489 // float ABIs.
6490 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6491 SDLoc DL(Op);
6492 MakeLibCallOptions CallOptions;
6493 RTLIB::Libcall LC =
6494 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6495 SDValue Res =
6496 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6497 if (Subtarget.is64Bit() && !RV64LegalI32)
6498 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6499 return DAG.getBitcast(MVT::i32, Res);
6500 }
6501 case ISD::BF16_TO_FP: {
6502 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6503 MVT VT = Op.getSimpleValueType();
6504 SDLoc DL(Op);
6505 Op = DAG.getNode(
6506 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6507 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6508 SDValue Res = Subtarget.is64Bit()
6509 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6510 : DAG.getBitcast(MVT::f32, Op);
6511 // fp_extend if the target VT is bigger than f32.
6512 if (VT != MVT::f32)
6513 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6514 return Res;
6515 }
6516 case ISD::FP_TO_FP16: {
6517 // Custom lower to ensure the libcall return is passed in an FPR on hard
6518 // float ABIs.
6519 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6520 SDLoc DL(Op);
6521 MakeLibCallOptions CallOptions;
6522 RTLIB::Libcall LC =
6523 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6524 SDValue Res =
6525 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6526 if (Subtarget.is64Bit() && !RV64LegalI32)
6527 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6528 return DAG.getBitcast(MVT::i32, Res);
6529 }
6530 case ISD::FP16_TO_FP: {
6531 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6532 // float ABIs.
6533 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6534 SDLoc DL(Op);
6535 MakeLibCallOptions CallOptions;
6536 SDValue Arg = Subtarget.is64Bit()
6537 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6538 Op.getOperand(0))
6539 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6540 SDValue Res =
6541 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6542 .first;
6543 return Res;
6544 }
6545 case ISD::FTRUNC:
6546 case ISD::FCEIL:
6547 case ISD::FFLOOR:
6548 case ISD::FNEARBYINT:
6549 case ISD::FRINT:
6550 case ISD::FROUND:
6551 case ISD::FROUNDEVEN:
6552 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6553 case ISD::LRINT:
6554 case ISD::LLRINT:
6555 return lowerVectorXRINT(Op, DAG, Subtarget);
6556 case ISD::VECREDUCE_ADD:
6561 return lowerVECREDUCE(Op, DAG);
6562 case ISD::VECREDUCE_AND:
6563 case ISD::VECREDUCE_OR:
6564 case ISD::VECREDUCE_XOR:
6565 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6566 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6567 return lowerVECREDUCE(Op, DAG);
6574 return lowerFPVECREDUCE(Op, DAG);
6575 case ISD::VP_REDUCE_ADD:
6576 case ISD::VP_REDUCE_UMAX:
6577 case ISD::VP_REDUCE_SMAX:
6578 case ISD::VP_REDUCE_UMIN:
6579 case ISD::VP_REDUCE_SMIN:
6580 case ISD::VP_REDUCE_FADD:
6581 case ISD::VP_REDUCE_SEQ_FADD:
6582 case ISD::VP_REDUCE_FMIN:
6583 case ISD::VP_REDUCE_FMAX:
6584 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6585 (Subtarget.hasVInstructionsF16Minimal() &&
6586 !Subtarget.hasVInstructionsF16()))
6587 return SplitVectorReductionOp(Op, DAG);
6588 return lowerVPREDUCE(Op, DAG);
6589 case ISD::VP_REDUCE_AND:
6590 case ISD::VP_REDUCE_OR:
6591 case ISD::VP_REDUCE_XOR:
6592 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6593 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6594 return lowerVPREDUCE(Op, DAG);
6595 case ISD::UNDEF: {
6596 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6597 return convertFromScalableVector(Op.getSimpleValueType(),
6598 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6599 }
6601 return lowerINSERT_SUBVECTOR(Op, DAG);
6603 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6605 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6607 return lowerVECTOR_INTERLEAVE(Op, DAG);
6608 case ISD::STEP_VECTOR:
6609 return lowerSTEP_VECTOR(Op, DAG);
6611 return lowerVECTOR_REVERSE(Op, DAG);
6612 case ISD::VECTOR_SPLICE:
6613 return lowerVECTOR_SPLICE(Op, DAG);
6614 case ISD::BUILD_VECTOR:
6615 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6616 case ISD::SPLAT_VECTOR:
6617 if (Op.getValueType().getScalarType() == MVT::f16 &&
6618 (Subtarget.hasVInstructionsF16Minimal() &&
6619 !Subtarget.hasVInstructionsF16())) {
6620 if (Op.getValueType() == MVT::nxv32f16)
6621 return SplitVectorOp(Op, DAG);
6622 SDLoc DL(Op);
6623 SDValue NewScalar =
6624 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6625 SDValue NewSplat = DAG.getNode(
6627 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6628 NewScalar);
6629 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6630 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6631 }
6632 if (Op.getValueType().getVectorElementType() == MVT::i1)
6633 return lowerVectorMaskSplat(Op, DAG);
6634 return SDValue();
6636 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6637 case ISD::CONCAT_VECTORS: {
6638 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6639 // better than going through the stack, as the default expansion does.
6640 SDLoc DL(Op);
6641 MVT VT = Op.getSimpleValueType();
6642 MVT ContainerVT = VT;
6643 if (VT.isFixedLengthVector())
6644 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6645
6646 // Recursively split concat_vectors with more than 2 operands:
6647 //
6648 // concat_vector op1, op2, op3, op4
6649 // ->
6650 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6651 //
6652 // This reduces the length of the chain of vslideups and allows us to
6653 // perform the vslideups at a smaller LMUL, limited to MF2.
6654 if (Op.getNumOperands() > 2 &&
6655 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6656 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6658 size_t HalfNumOps = Op.getNumOperands() / 2;
6659 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6660 Op->ops().take_front(HalfNumOps));
6661 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6662 Op->ops().drop_front(HalfNumOps));
6663 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6664 }
6665
6666 unsigned NumOpElts =
6667 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6668 SDValue Vec = DAG.getUNDEF(VT);
6669 for (const auto &OpIdx : enumerate(Op->ops())) {
6670 SDValue SubVec = OpIdx.value();
6671 // Don't insert undef subvectors.
6672 if (SubVec.isUndef())
6673 continue;
6674 Vec =
6675 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6676 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6677 }
6678 return Vec;
6679 }
6680 case ISD::LOAD:
6681 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6682 return V;
6683 if (Op.getValueType().isFixedLengthVector())
6684 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6685 return Op;
6686 case ISD::STORE:
6687 if (auto V = expandUnalignedRVVStore(Op, DAG))
6688 return V;
6689 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6690 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6691 return Op;
6692 case ISD::MLOAD:
6693 case ISD::VP_LOAD:
6694 return lowerMaskedLoad(Op, DAG);
6695 case ISD::MSTORE:
6696 case ISD::VP_STORE:
6697 return lowerMaskedStore(Op, DAG);
6698 case ISD::SELECT_CC: {
6699 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6700 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6701 // into separate SETCC+SELECT just like LegalizeDAG.
6702 SDValue Tmp1 = Op.getOperand(0);
6703 SDValue Tmp2 = Op.getOperand(1);
6704 SDValue True = Op.getOperand(2);
6705 SDValue False = Op.getOperand(3);
6706 EVT VT = Op.getValueType();
6707 SDValue CC = Op.getOperand(4);
6708 EVT CmpVT = Tmp1.getValueType();
6709 EVT CCVT =
6710 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6711 SDLoc DL(Op);
6712 SDValue Cond =
6713 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6714 return DAG.getSelect(DL, VT, Cond, True, False);
6715 }
6716 case ISD::SETCC: {
6717 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6718 if (OpVT.isScalarInteger()) {
6719 MVT VT = Op.getSimpleValueType();
6720 SDValue LHS = Op.getOperand(0);
6721 SDValue RHS = Op.getOperand(1);
6722 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6723 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6724 "Unexpected CondCode");
6725
6726 SDLoc DL(Op);
6727
6728 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6729 // convert this to the equivalent of (set(u)ge X, C+1) by using
6730 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6731 // in a register.
6732 if (isa<ConstantSDNode>(RHS)) {
6733 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6734 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6735 // If this is an unsigned compare and the constant is -1, incrementing
6736 // the constant would change behavior. The result should be false.
6737 if (CCVal == ISD::SETUGT && Imm == -1)
6738 return DAG.getConstant(0, DL, VT);
6739 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6740 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6741 SDValue SetCC = DAG.getSetCC(
6742 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6743 return DAG.getLogicalNOT(DL, SetCC, VT);
6744 }
6745 }
6746
6747 // Not a constant we could handle, swap the operands and condition code to
6748 // SETLT/SETULT.
6749 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6750 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6751 }
6752
6753 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6754 (Subtarget.hasVInstructionsF16Minimal() &&
6755 !Subtarget.hasVInstructionsF16()))
6756 return SplitVectorOp(Op, DAG);
6757
6758 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6759 }
6760 case ISD::ADD:
6761 case ISD::SUB:
6762 case ISD::MUL:
6763 case ISD::MULHS:
6764 case ISD::MULHU:
6765 case ISD::AND:
6766 case ISD::OR:
6767 case ISD::XOR:
6768 case ISD::SDIV:
6769 case ISD::SREM:
6770 case ISD::UDIV:
6771 case ISD::UREM:
6772 case ISD::BSWAP:
6773 case ISD::CTPOP:
6774 return lowerToScalableOp(Op, DAG);
6775 case ISD::SHL:
6776 case ISD::SRA:
6777 case ISD::SRL:
6778 if (Op.getSimpleValueType().isFixedLengthVector())
6779 return lowerToScalableOp(Op, DAG);
6780 // This can be called for an i32 shift amount that needs to be promoted.
6781 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6782 "Unexpected custom legalisation");
6783 return SDValue();
6784 case ISD::FADD:
6785 case ISD::FSUB:
6786 case ISD::FMUL:
6787 case ISD::FDIV:
6788 case ISD::FNEG:
6789 case ISD::FABS:
6790 case ISD::FSQRT:
6791 case ISD::FMA:
6792 case ISD::FMINNUM:
6793 case ISD::FMAXNUM:
6794 if (Op.getValueType() == MVT::nxv32f16 &&
6795 (Subtarget.hasVInstructionsF16Minimal() &&
6796 !Subtarget.hasVInstructionsF16()))
6797 return SplitVectorOp(Op, DAG);
6798 [[fallthrough]];
6799 case ISD::AVGFLOORU:
6800 case ISD::AVGCEILU:
6801 case ISD::SMIN:
6802 case ISD::SMAX:
6803 case ISD::UMIN:
6804 case ISD::UMAX:
6805 return lowerToScalableOp(Op, DAG);
6806 case ISD::UADDSAT:
6807 case ISD::USUBSAT:
6808 if (!Op.getValueType().isVector())
6809 return lowerUADDSAT_USUBSAT(Op, DAG);
6810 return lowerToScalableOp(Op, DAG);
6811 case ISD::SADDSAT:
6812 case ISD::SSUBSAT:
6813 if (!Op.getValueType().isVector())
6814 return lowerSADDSAT_SSUBSAT(Op, DAG);
6815 return lowerToScalableOp(Op, DAG);
6816 case ISD::ABDS:
6817 case ISD::ABDU: {
6818 SDLoc dl(Op);
6819 EVT VT = Op->getValueType(0);
6820 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6821 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6822 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6823
6824 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6825 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6826 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6827 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6828 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6829 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6830 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6831 }
6832 case ISD::ABS:
6833 case ISD::VP_ABS:
6834 return lowerABS(Op, DAG);
6835 case ISD::CTLZ:
6837 case ISD::CTTZ:
6839 if (Subtarget.hasStdExtZvbb())
6840 return lowerToScalableOp(Op, DAG);
6841 assert(Op.getOpcode() != ISD::CTTZ);
6842 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6843 case ISD::VSELECT:
6844 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6845 case ISD::FCOPYSIGN:
6846 if (Op.getValueType() == MVT::nxv32f16 &&
6847 (Subtarget.hasVInstructionsF16Minimal() &&
6848 !Subtarget.hasVInstructionsF16()))
6849 return SplitVectorOp(Op, DAG);
6850 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6851 case ISD::STRICT_FADD:
6852 case ISD::STRICT_FSUB:
6853 case ISD::STRICT_FMUL:
6854 case ISD::STRICT_FDIV:
6855 case ISD::STRICT_FSQRT:
6856 case ISD::STRICT_FMA:
6857 if (Op.getValueType() == MVT::nxv32f16 &&
6858 (Subtarget.hasVInstructionsF16Minimal() &&
6859 !Subtarget.hasVInstructionsF16()))
6860 return SplitStrictFPVectorOp(Op, DAG);
6861 return lowerToScalableOp(Op, DAG);
6862 case ISD::STRICT_FSETCC:
6864 return lowerVectorStrictFSetcc(Op, DAG);
6865 case ISD::STRICT_FCEIL:
6866 case ISD::STRICT_FRINT:
6867 case ISD::STRICT_FFLOOR:
6868 case ISD::STRICT_FTRUNC:
6870 case ISD::STRICT_FROUND:
6872 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6873 case ISD::MGATHER:
6874 case ISD::VP_GATHER:
6875 return lowerMaskedGather(Op, DAG);
6876 case ISD::MSCATTER:
6877 case ISD::VP_SCATTER:
6878 return lowerMaskedScatter(Op, DAG);
6879 case ISD::GET_ROUNDING:
6880 return lowerGET_ROUNDING(Op, DAG);
6881 case ISD::SET_ROUNDING:
6882 return lowerSET_ROUNDING(Op, DAG);
6883 case ISD::EH_DWARF_CFA:
6884 return lowerEH_DWARF_CFA(Op, DAG);
6885 case ISD::VP_SELECT:
6886 case ISD::VP_MERGE:
6887 case ISD::VP_ADD:
6888 case ISD::VP_SUB:
6889 case ISD::VP_MUL:
6890 case ISD::VP_SDIV:
6891 case ISD::VP_UDIV:
6892 case ISD::VP_SREM:
6893 case ISD::VP_UREM:
6894 case ISD::VP_UADDSAT:
6895 case ISD::VP_USUBSAT:
6896 case ISD::VP_SADDSAT:
6897 case ISD::VP_SSUBSAT:
6898 case ISD::VP_LRINT:
6899 case ISD::VP_LLRINT:
6900 return lowerVPOp(Op, DAG);
6901 case ISD::VP_AND:
6902 case ISD::VP_OR:
6903 case ISD::VP_XOR:
6904 return lowerLogicVPOp(Op, DAG);
6905 case ISD::VP_FADD:
6906 case ISD::VP_FSUB:
6907 case ISD::VP_FMUL:
6908 case ISD::VP_FDIV:
6909 case ISD::VP_FNEG:
6910 case ISD::VP_FABS:
6911 case ISD::VP_SQRT:
6912 case ISD::VP_FMA:
6913 case ISD::VP_FMINNUM:
6914 case ISD::VP_FMAXNUM:
6915 case ISD::VP_FCOPYSIGN:
6916 if (Op.getValueType() == MVT::nxv32f16 &&
6917 (Subtarget.hasVInstructionsF16Minimal() &&
6918 !Subtarget.hasVInstructionsF16()))
6919 return SplitVPOp(Op, DAG);
6920 [[fallthrough]];
6921 case ISD::VP_ASHR:
6922 case ISD::VP_LSHR:
6923 case ISD::VP_SHL:
6924 return lowerVPOp(Op, DAG);
6925 case ISD::VP_IS_FPCLASS:
6926 return LowerIS_FPCLASS(Op, DAG);
6927 case ISD::VP_SIGN_EXTEND:
6928 case ISD::VP_ZERO_EXTEND:
6929 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6930 return lowerVPExtMaskOp(Op, DAG);
6931 return lowerVPOp(Op, DAG);
6932 case ISD::VP_TRUNCATE:
6933 return lowerVectorTruncLike(Op, DAG);
6934 case ISD::VP_FP_EXTEND:
6935 case ISD::VP_FP_ROUND:
6936 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6937 case ISD::VP_SINT_TO_FP:
6938 case ISD::VP_UINT_TO_FP:
6939 if (Op.getValueType().isVector() &&
6940 Op.getValueType().getScalarType() == MVT::f16 &&
6941 (Subtarget.hasVInstructionsF16Minimal() &&
6942 !Subtarget.hasVInstructionsF16())) {
6943 if (Op.getValueType() == MVT::nxv32f16)
6944 return SplitVPOp(Op, DAG);
6945 // int -> f32
6946 SDLoc DL(Op);
6947 MVT NVT =
6948 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6949 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6950 // f32 -> f16
6951 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6952 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6953 }
6954 [[fallthrough]];
6955 case ISD::VP_FP_TO_SINT:
6956 case ISD::VP_FP_TO_UINT:
6957 if (SDValue Op1 = Op.getOperand(0);
6958 Op1.getValueType().isVector() &&
6959 Op1.getValueType().getScalarType() == MVT::f16 &&
6960 (Subtarget.hasVInstructionsF16Minimal() &&
6961 !Subtarget.hasVInstructionsF16())) {
6962 if (Op1.getValueType() == MVT::nxv32f16)
6963 return SplitVPOp(Op, DAG);
6964 // f16 -> f32
6965 SDLoc DL(Op);
6966 MVT NVT = MVT::getVectorVT(MVT::f32,
6967 Op1.getValueType().getVectorElementCount());
6968 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6969 // f32 -> int
6970 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6971 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6972 }
6973 return lowerVPFPIntConvOp(Op, DAG);
6974 case ISD::VP_SETCC:
6975 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6976 (Subtarget.hasVInstructionsF16Minimal() &&
6977 !Subtarget.hasVInstructionsF16()))
6978 return SplitVPOp(Op, DAG);
6979 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6980 return lowerVPSetCCMaskOp(Op, DAG);
6981 [[fallthrough]];
6982 case ISD::VP_SMIN:
6983 case ISD::VP_SMAX:
6984 case ISD::VP_UMIN:
6985 case ISD::VP_UMAX:
6986 case ISD::VP_BITREVERSE:
6987 case ISD::VP_BSWAP:
6988 return lowerVPOp(Op, DAG);
6989 case ISD::VP_CTLZ:
6990 case ISD::VP_CTLZ_ZERO_UNDEF:
6991 if (Subtarget.hasStdExtZvbb())
6992 return lowerVPOp(Op, DAG);
6993 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6994 case ISD::VP_CTTZ:
6995 case ISD::VP_CTTZ_ZERO_UNDEF:
6996 if (Subtarget.hasStdExtZvbb())
6997 return lowerVPOp(Op, DAG);
6998 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6999 case ISD::VP_CTPOP:
7000 return lowerVPOp(Op, DAG);
7001 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7002 return lowerVPStridedLoad(Op, DAG);
7003 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7004 return lowerVPStridedStore(Op, DAG);
7005 case ISD::VP_FCEIL:
7006 case ISD::VP_FFLOOR:
7007 case ISD::VP_FRINT:
7008 case ISD::VP_FNEARBYINT:
7009 case ISD::VP_FROUND:
7010 case ISD::VP_FROUNDEVEN:
7011 case ISD::VP_FROUNDTOZERO:
7012 if (Op.getValueType() == MVT::nxv32f16 &&
7013 (Subtarget.hasVInstructionsF16Minimal() &&
7014 !Subtarget.hasVInstructionsF16()))
7015 return SplitVPOp(Op, DAG);
7016 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7017 case ISD::VP_FMAXIMUM:
7018 case ISD::VP_FMINIMUM:
7019 if (Op.getValueType() == MVT::nxv32f16 &&
7020 (Subtarget.hasVInstructionsF16Minimal() &&
7021 !Subtarget.hasVInstructionsF16()))
7022 return SplitVPOp(Op, DAG);
7023 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7024 case ISD::EXPERIMENTAL_VP_SPLICE:
7025 return lowerVPSpliceExperimental(Op, DAG);
7026 case ISD::EXPERIMENTAL_VP_REVERSE:
7027 return lowerVPReverseExperimental(Op, DAG);
7028 }
7029}
7030
7032 SelectionDAG &DAG, unsigned Flags) {
7033 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7034}
7035
7037 SelectionDAG &DAG, unsigned Flags) {
7038 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7039 Flags);
7040}
7041
7043 SelectionDAG &DAG, unsigned Flags) {
7044 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7045 N->getOffset(), Flags);
7046}
7047
7049 SelectionDAG &DAG, unsigned Flags) {
7050 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7051}
7052
7053template <class NodeTy>
7054SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7055 bool IsLocal, bool IsExternWeak) const {
7056 SDLoc DL(N);
7057 EVT Ty = getPointerTy(DAG.getDataLayout());
7058
7059 // When HWASAN is used and tagging of global variables is enabled
7060 // they should be accessed via the GOT, since the tagged address of a global
7061 // is incompatible with existing code models. This also applies to non-pic
7062 // mode.
7063 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7064 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7065 if (IsLocal && !Subtarget.allowTaggedGlobals())
7066 // Use PC-relative addressing to access the symbol. This generates the
7067 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7068 // %pcrel_lo(auipc)).
7069 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7070
7071 // Use PC-relative addressing to access the GOT for this symbol, then load
7072 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7073 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7074 SDValue Load =
7075 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7081 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7082 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7083 return Load;
7084 }
7085
7086 switch (getTargetMachine().getCodeModel()) {
7087 default:
7088 report_fatal_error("Unsupported code model for lowering");
7089 case CodeModel::Small: {
7090 // Generate a sequence for accessing addresses within the first 2 GiB of
7091 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7092 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7093 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7094 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7095 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7096 }
7097 case CodeModel::Medium: {
7098 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7099 if (IsExternWeak) {
7100 // An extern weak symbol may be undefined, i.e. have value 0, which may
7101 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7102 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7103 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7104 SDValue Load =
7105 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7111 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7112 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7113 return Load;
7114 }
7115
7116 // Generate a sequence for accessing addresses within any 2GiB range within
7117 // the address space. This generates the pattern (PseudoLLA sym), which
7118 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7119 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7120 }
7121 }
7122}
7123
7124SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7125 SelectionDAG &DAG) const {
7126 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7127 assert(N->getOffset() == 0 && "unexpected offset in global node");
7128 const GlobalValue *GV = N->getGlobal();
7129 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7130}
7131
7132SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7133 SelectionDAG &DAG) const {
7134 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7135
7136 return getAddr(N, DAG);
7137}
7138
7139SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7140 SelectionDAG &DAG) const {
7141 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7142
7143 return getAddr(N, DAG);
7144}
7145
7146SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7147 SelectionDAG &DAG) const {
7148 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7149
7150 return getAddr(N, DAG);
7151}
7152
7153SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7154 SelectionDAG &DAG,
7155 bool UseGOT) const {
7156 SDLoc DL(N);
7157 EVT Ty = getPointerTy(DAG.getDataLayout());
7158 const GlobalValue *GV = N->getGlobal();
7159 MVT XLenVT = Subtarget.getXLenVT();
7160
7161 if (UseGOT) {
7162 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7163 // load the address from the GOT and add the thread pointer. This generates
7164 // the pattern (PseudoLA_TLS_IE sym), which expands to
7165 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7166 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7167 SDValue Load =
7168 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7174 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7175 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7176
7177 // Add the thread pointer.
7178 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7179 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7180 }
7181
7182 // Generate a sequence for accessing the address relative to the thread
7183 // pointer, with the appropriate adjustment for the thread pointer offset.
7184 // This generates the pattern
7185 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7186 SDValue AddrHi =
7188 SDValue AddrAdd =
7190 SDValue AddrLo =
7192
7193 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7194 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7195 SDValue MNAdd =
7196 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7197 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7198}
7199
7200SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7201 SelectionDAG &DAG) const {
7202 SDLoc DL(N);
7203 EVT Ty = getPointerTy(DAG.getDataLayout());
7204 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7205 const GlobalValue *GV = N->getGlobal();
7206
7207 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7208 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7209 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7210 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7211 SDValue Load =
7212 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7213
7214 // Prepare argument list to generate call.
7216 ArgListEntry Entry;
7217 Entry.Node = Load;
7218 Entry.Ty = CallTy;
7219 Args.push_back(Entry);
7220
7221 // Setup call to __tls_get_addr.
7223 CLI.setDebugLoc(DL)
7224 .setChain(DAG.getEntryNode())
7225 .setLibCallee(CallingConv::C, CallTy,
7226 DAG.getExternalSymbol("__tls_get_addr", Ty),
7227 std::move(Args));
7228
7229 return LowerCallTo(CLI).first;
7230}
7231
7232SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7233 SelectionDAG &DAG) const {
7234 SDLoc DL(N);
7235 EVT Ty = getPointerTy(DAG.getDataLayout());
7236 const GlobalValue *GV = N->getGlobal();
7237
7238 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7239 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7240 //
7241 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7242 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7243 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7244 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7245 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7246 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7247}
7248
7249SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7250 SelectionDAG &DAG) const {
7251 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7252 assert(N->getOffset() == 0 && "unexpected offset in global node");
7253
7254 if (DAG.getTarget().useEmulatedTLS())
7255 return LowerToTLSEmulatedModel(N, DAG);
7256
7258
7261 report_fatal_error("In GHC calling convention TLS is not supported");
7262
7263 SDValue Addr;
7264 switch (Model) {
7266 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7267 break;
7269 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7270 break;
7273 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7274 : getDynamicTLSAddr(N, DAG);
7275 break;
7276 }
7277
7278 return Addr;
7279}
7280
7281// Return true if Val is equal to (setcc LHS, RHS, CC).
7282// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7283// Otherwise, return std::nullopt.
7284static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7285 ISD::CondCode CC, SDValue Val) {
7286 assert(Val->getOpcode() == ISD::SETCC);
7287 SDValue LHS2 = Val.getOperand(0);
7288 SDValue RHS2 = Val.getOperand(1);
7289 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7290
7291 if (LHS == LHS2 && RHS == RHS2) {
7292 if (CC == CC2)
7293 return true;
7294 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7295 return false;
7296 } else if (LHS == RHS2 && RHS == LHS2) {
7298 if (CC == CC2)
7299 return true;
7300 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7301 return false;
7302 }
7303
7304 return std::nullopt;
7305}
7306
7308 const RISCVSubtarget &Subtarget) {
7309 SDValue CondV = N->getOperand(0);
7310 SDValue TrueV = N->getOperand(1);
7311 SDValue FalseV = N->getOperand(2);
7312 MVT VT = N->getSimpleValueType(0);
7313 SDLoc DL(N);
7314
7315 if (!Subtarget.hasConditionalMoveFusion()) {
7316 // (select c, -1, y) -> -c | y
7317 if (isAllOnesConstant(TrueV)) {
7318 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7319 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7320 }
7321 // (select c, y, -1) -> (c-1) | y
7322 if (isAllOnesConstant(FalseV)) {
7323 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7324 DAG.getAllOnesConstant(DL, VT));
7325 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7326 }
7327
7328 // (select c, 0, y) -> (c-1) & y
7329 if (isNullConstant(TrueV)) {
7330 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7331 DAG.getAllOnesConstant(DL, VT));
7332 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7333 }
7334 // (select c, y, 0) -> -c & y
7335 if (isNullConstant(FalseV)) {
7336 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7337 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7338 }
7339 }
7340
7341 // select c, ~x, x --> xor -c, x
7342 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7343 const APInt &TrueVal = TrueV->getAsAPIntVal();
7344 const APInt &FalseVal = FalseV->getAsAPIntVal();
7345 if (~TrueVal == FalseVal) {
7346 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7347 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7348 }
7349 }
7350
7351 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7352 // when both truev and falsev are also setcc.
7353 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7354 FalseV.getOpcode() == ISD::SETCC) {
7355 SDValue LHS = CondV.getOperand(0);
7356 SDValue RHS = CondV.getOperand(1);
7357 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7358
7359 // (select x, x, y) -> x | y
7360 // (select !x, x, y) -> x & y
7361 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7362 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7363 DAG.getFreeze(FalseV));
7364 }
7365 // (select x, y, x) -> x & y
7366 // (select !x, y, x) -> x | y
7367 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7368 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7369 DAG.getFreeze(TrueV), FalseV);
7370 }
7371 }
7372
7373 return SDValue();
7374}
7375
7376// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7377// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7378// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7379// being `0` or `-1`. In such cases we can replace `select` with `and`.
7380// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7381// than `c0`?
7382static SDValue
7384 const RISCVSubtarget &Subtarget) {
7385 if (Subtarget.hasShortForwardBranchOpt())
7386 return SDValue();
7387
7388 unsigned SelOpNo = 0;
7389 SDValue Sel = BO->getOperand(0);
7390 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7391 SelOpNo = 1;
7392 Sel = BO->getOperand(1);
7393 }
7394
7395 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7396 return SDValue();
7397
7398 unsigned ConstSelOpNo = 1;
7399 unsigned OtherSelOpNo = 2;
7400 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7401 ConstSelOpNo = 2;
7402 OtherSelOpNo = 1;
7403 }
7404 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7405 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7406 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7407 return SDValue();
7408
7409 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7410 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7411 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7412 return SDValue();
7413
7414 SDLoc DL(Sel);
7415 EVT VT = BO->getValueType(0);
7416
7417 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7418 if (SelOpNo == 1)
7419 std::swap(NewConstOps[0], NewConstOps[1]);
7420
7421 SDValue NewConstOp =
7422 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7423 if (!NewConstOp)
7424 return SDValue();
7425
7426 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7427 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7428 return SDValue();
7429
7430 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7431 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7432 if (SelOpNo == 1)
7433 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7434 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7435
7436 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7437 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7438 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7439}
7440
7441SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7442 SDValue CondV = Op.getOperand(0);
7443 SDValue TrueV = Op.getOperand(1);
7444 SDValue FalseV = Op.getOperand(2);
7445 SDLoc DL(Op);
7446 MVT VT = Op.getSimpleValueType();
7447 MVT XLenVT = Subtarget.getXLenVT();
7448
7449 // Lower vector SELECTs to VSELECTs by splatting the condition.
7450 if (VT.isVector()) {
7451 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7452 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7453 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7454 }
7455
7456 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7457 // nodes to implement the SELECT. Performing the lowering here allows for
7458 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7459 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7460 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7461 VT.isScalarInteger()) {
7462 // (select c, t, 0) -> (czero_eqz t, c)
7463 if (isNullConstant(FalseV))
7464 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7465 // (select c, 0, f) -> (czero_nez f, c)
7466 if (isNullConstant(TrueV))
7467 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7468
7469 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7470 if (TrueV.getOpcode() == ISD::AND &&
7471 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7472 return DAG.getNode(
7473 ISD::OR, DL, VT, TrueV,
7474 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7475 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7476 if (FalseV.getOpcode() == ISD::AND &&
7477 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7478 return DAG.getNode(
7479 ISD::OR, DL, VT, FalseV,
7480 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7481
7482 // Try some other optimizations before falling back to generic lowering.
7483 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7484 return V;
7485
7486 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7487 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7488 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7489 const APInt &TrueVal = TrueV->getAsAPIntVal();
7490 const APInt &FalseVal = FalseV->getAsAPIntVal();
7491 const int TrueValCost = RISCVMatInt::getIntMatCost(
7492 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7493 const int FalseValCost = RISCVMatInt::getIntMatCost(
7494 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7495 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7496 SDValue LHSVal = DAG.getConstant(
7497 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7498 SDValue RHSVal =
7499 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7500 SDValue CMOV =
7502 DL, VT, LHSVal, CondV);
7503 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7504 }
7505
7506 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7507 // Unless we have the short forward branch optimization.
7508 if (!Subtarget.hasConditionalMoveFusion())
7509 return DAG.getNode(
7510 ISD::OR, DL, VT,
7511 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7512 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7513 }
7514
7515 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7516 return V;
7517
7518 if (Op.hasOneUse()) {
7519 unsigned UseOpc = Op->use_begin()->getOpcode();
7520 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7521 SDNode *BinOp = *Op->use_begin();
7522 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7523 DAG, Subtarget)) {
7524 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7525 return lowerSELECT(NewSel, DAG);
7526 }
7527 }
7528 }
7529
7530 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7531 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7532 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7533 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7534 if (FPTV && FPFV) {
7535 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7536 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7537 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7538 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7539 DAG.getConstant(1, DL, XLenVT));
7540 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7541 }
7542 }
7543
7544 // If the condition is not an integer SETCC which operates on XLenVT, we need
7545 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7546 // (select condv, truev, falsev)
7547 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7548 if (CondV.getOpcode() != ISD::SETCC ||
7549 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7550 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7551 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7552
7553 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7554
7555 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7556 }
7557
7558 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7559 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7560 // advantage of the integer compare+branch instructions. i.e.:
7561 // (select (setcc lhs, rhs, cc), truev, falsev)
7562 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7563 SDValue LHS = CondV.getOperand(0);
7564 SDValue RHS = CondV.getOperand(1);
7565 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7566
7567 // Special case for a select of 2 constants that have a diffence of 1.
7568 // Normally this is done by DAGCombine, but if the select is introduced by
7569 // type legalization or op legalization, we miss it. Restricting to SETLT
7570 // case for now because that is what signed saturating add/sub need.
7571 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7572 // but we would probably want to swap the true/false values if the condition
7573 // is SETGE/SETLE to avoid an XORI.
7574 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7575 CCVal == ISD::SETLT) {
7576 const APInt &TrueVal = TrueV->getAsAPIntVal();
7577 const APInt &FalseVal = FalseV->getAsAPIntVal();
7578 if (TrueVal - 1 == FalseVal)
7579 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7580 if (TrueVal + 1 == FalseVal)
7581 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7582 }
7583
7584 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7585 // 1 < x ? x : 1 -> 0 < x ? x : 1
7586 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7587 RHS == TrueV && LHS == FalseV) {
7588 LHS = DAG.getConstant(0, DL, VT);
7589 // 0 <u x is the same as x != 0.
7590 if (CCVal == ISD::SETULT) {
7591 std::swap(LHS, RHS);
7592 CCVal = ISD::SETNE;
7593 }
7594 }
7595
7596 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7597 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7598 RHS == FalseV) {
7599 RHS = DAG.getConstant(0, DL, VT);
7600 }
7601
7602 SDValue TargetCC = DAG.getCondCode(CCVal);
7603
7604 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7605 // (select (setcc lhs, rhs, CC), constant, falsev)
7606 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7607 std::swap(TrueV, FalseV);
7608 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7609 }
7610
7611 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7612 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7613}
7614
7615SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7616 SDValue CondV = Op.getOperand(1);
7617 SDLoc DL(Op);
7618 MVT XLenVT = Subtarget.getXLenVT();
7619
7620 if (CondV.getOpcode() == ISD::SETCC &&
7621 CondV.getOperand(0).getValueType() == XLenVT) {
7622 SDValue LHS = CondV.getOperand(0);
7623 SDValue RHS = CondV.getOperand(1);
7624 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7625
7626 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7627
7628 SDValue TargetCC = DAG.getCondCode(CCVal);
7629 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7630 LHS, RHS, TargetCC, Op.getOperand(2));
7631 }
7632
7633 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7634 CondV, DAG.getConstant(0, DL, XLenVT),
7635 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7636}
7637
7638SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7641
7642 SDLoc DL(Op);
7643 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7645
7646 // vastart just stores the address of the VarArgsFrameIndex slot into the
7647 // memory location argument.
7648 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7649 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7650 MachinePointerInfo(SV));
7651}
7652
7653SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7654 SelectionDAG &DAG) const {
7655 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7657 MachineFrameInfo &MFI = MF.getFrameInfo();
7658 MFI.setFrameAddressIsTaken(true);
7659 Register FrameReg = RI.getFrameRegister(MF);
7660 int XLenInBytes = Subtarget.getXLen() / 8;
7661
7662 EVT VT = Op.getValueType();
7663 SDLoc DL(Op);
7664 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7665 unsigned Depth = Op.getConstantOperandVal(0);
7666 while (Depth--) {
7667 int Offset = -(XLenInBytes * 2);
7668 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7670 FrameAddr =
7671 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7672 }
7673 return FrameAddr;
7674}
7675
7676SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7677 SelectionDAG &DAG) const {
7678 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7680 MachineFrameInfo &MFI = MF.getFrameInfo();
7681 MFI.setReturnAddressIsTaken(true);
7682 MVT XLenVT = Subtarget.getXLenVT();
7683 int XLenInBytes = Subtarget.getXLen() / 8;
7684
7686 return SDValue();
7687
7688 EVT VT = Op.getValueType();
7689 SDLoc DL(Op);
7690 unsigned Depth = Op.getConstantOperandVal(0);
7691 if (Depth) {
7692 int Off = -XLenInBytes;
7693 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7694 SDValue Offset = DAG.getConstant(Off, DL, VT);
7695 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7696 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7698 }
7699
7700 // Return the value of the return address register, marking it an implicit
7701 // live-in.
7702 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7703 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7704}
7705
7706SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7707 SelectionDAG &DAG) const {
7708 SDLoc DL(Op);
7709 SDValue Lo = Op.getOperand(0);
7710 SDValue Hi = Op.getOperand(1);
7711 SDValue Shamt = Op.getOperand(2);
7712 EVT VT = Lo.getValueType();
7713
7714 // if Shamt-XLEN < 0: // Shamt < XLEN
7715 // Lo = Lo << Shamt
7716 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7717 // else:
7718 // Lo = 0
7719 // Hi = Lo << (Shamt-XLEN)
7720
7721 SDValue Zero = DAG.getConstant(0, DL, VT);
7722 SDValue One = DAG.getConstant(1, DL, VT);
7723 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7724 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7725 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7726 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7727
7728 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7729 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7730 SDValue ShiftRightLo =
7731 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7732 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7733 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7734 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7735
7736 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7737
7738 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7739 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7740
7741 SDValue Parts[2] = {Lo, Hi};
7742 return DAG.getMergeValues(Parts, DL);
7743}
7744
7745SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7746 bool IsSRA) const {
7747 SDLoc DL(Op);
7748 SDValue Lo = Op.getOperand(0);
7749 SDValue Hi = Op.getOperand(1);
7750 SDValue Shamt = Op.getOperand(2);
7751 EVT VT = Lo.getValueType();
7752
7753 // SRA expansion:
7754 // if Shamt-XLEN < 0: // Shamt < XLEN
7755 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7756 // Hi = Hi >>s Shamt
7757 // else:
7758 // Lo = Hi >>s (Shamt-XLEN);
7759 // Hi = Hi >>s (XLEN-1)
7760 //
7761 // SRL expansion:
7762 // if Shamt-XLEN < 0: // Shamt < XLEN
7763 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7764 // Hi = Hi >>u Shamt
7765 // else:
7766 // Lo = Hi >>u (Shamt-XLEN);
7767 // Hi = 0;
7768
7769 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7770
7771 SDValue Zero = DAG.getConstant(0, DL, VT);
7772 SDValue One = DAG.getConstant(1, DL, VT);
7773 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7774 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7775 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7776 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7777
7778 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7779 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7780 SDValue ShiftLeftHi =
7781 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7782 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7783 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7784 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7785 SDValue HiFalse =
7786 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7787
7788 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7789
7790 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7791 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7792
7793 SDValue Parts[2] = {Lo, Hi};
7794 return DAG.getMergeValues(Parts, DL);
7795}
7796
7797// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7798// legal equivalently-sized i8 type, so we can use that as a go-between.
7799SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7800 SelectionDAG &DAG) const {
7801 SDLoc DL(Op);
7802 MVT VT = Op.getSimpleValueType();
7803 SDValue SplatVal = Op.getOperand(0);
7804 // All-zeros or all-ones splats are handled specially.
7805 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7806 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7807 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7808 }
7809 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7810 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7811 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7812 }
7813 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7814 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7815 DAG.getConstant(1, DL, SplatVal.getValueType()));
7816 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7817 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7818 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7819}
7820
7821// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7822// illegal (currently only vXi64 RV32).
7823// FIXME: We could also catch non-constant sign-extended i32 values and lower
7824// them to VMV_V_X_VL.
7825SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7826 SelectionDAG &DAG) const {
7827 SDLoc DL(Op);
7828 MVT VecVT = Op.getSimpleValueType();
7829 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7830 "Unexpected SPLAT_VECTOR_PARTS lowering");
7831
7832 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7833 SDValue Lo = Op.getOperand(0);
7834 SDValue Hi = Op.getOperand(1);
7835
7836 MVT ContainerVT = VecVT;
7837 if (VecVT.isFixedLengthVector())
7838 ContainerVT = getContainerForFixedLengthVector(VecVT);
7839
7840 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7841
7842 SDValue Res =
7843 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7844
7845 if (VecVT.isFixedLengthVector())
7846 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7847
7848 return Res;
7849}
7850
7851// Custom-lower extensions from mask vectors by using a vselect either with 1
7852// for zero/any-extension or -1 for sign-extension:
7853// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7854// Note that any-extension is lowered identically to zero-extension.
7855SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7856 int64_t ExtTrueVal) const {
7857 SDLoc DL(Op);
7858 MVT VecVT = Op.getSimpleValueType();
7859 SDValue Src = Op.getOperand(0);
7860 // Only custom-lower extensions from mask types
7861 assert(Src.getValueType().isVector() &&
7862 Src.getValueType().getVectorElementType() == MVT::i1);
7863
7864 if (VecVT.isScalableVector()) {
7865 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7866 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7867 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7868 }
7869
7870 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7871 MVT I1ContainerVT =
7872 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7873
7874 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7875
7876 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7877
7878 MVT XLenVT = Subtarget.getXLenVT();
7879 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7880 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7881
7882 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7883 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7884 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7885 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7886 SDValue Select =
7887 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7888 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7889
7890 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7891}
7892
7893SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7894 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7895 MVT ExtVT = Op.getSimpleValueType();
7896 // Only custom-lower extensions from fixed-length vector types.
7897 if (!ExtVT.isFixedLengthVector())
7898 return Op;
7899 MVT VT = Op.getOperand(0).getSimpleValueType();
7900 // Grab the canonical container type for the extended type. Infer the smaller
7901 // type from that to ensure the same number of vector elements, as we know
7902 // the LMUL will be sufficient to hold the smaller type.
7903 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7904 // Get the extended container type manually to ensure the same number of
7905 // vector elements between source and dest.
7906 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7907 ContainerExtVT.getVectorElementCount());
7908
7909 SDValue Op1 =
7910 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7911
7912 SDLoc DL(Op);
7913 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7914
7915 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7916
7917 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7918}
7919
7920// Custom-lower truncations from vectors to mask vectors by using a mask and a
7921// setcc operation:
7922// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7923SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7924 SelectionDAG &DAG) const {
7925 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7926 SDLoc DL(Op);
7927 EVT MaskVT = Op.getValueType();
7928 // Only expect to custom-lower truncations to mask types
7929 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7930 "Unexpected type for vector mask lowering");
7931 SDValue Src = Op.getOperand(0);
7932 MVT VecVT = Src.getSimpleValueType();
7933 SDValue Mask, VL;
7934 if (IsVPTrunc) {
7935 Mask = Op.getOperand(1);
7936 VL = Op.getOperand(2);
7937 }
7938 // If this is a fixed vector, we need to convert it to a scalable vector.
7939 MVT ContainerVT = VecVT;
7940
7941 if (VecVT.isFixedLengthVector()) {
7942 ContainerVT = getContainerForFixedLengthVector(VecVT);
7943 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7944 if (IsVPTrunc) {
7945 MVT MaskContainerVT =
7946 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7947 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7948 }
7949 }
7950
7951 if (!IsVPTrunc) {
7952 std::tie(Mask, VL) =
7953 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7954 }
7955
7956 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7957 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7958
7959 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7960 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7961 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7962 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7963
7964 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7965 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7966 DAG.getUNDEF(ContainerVT), Mask, VL);
7967 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7968 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7969 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7970 if (MaskVT.isFixedLengthVector())
7971 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7972 return Trunc;
7973}
7974
7975SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7976 SelectionDAG &DAG) const {
7977 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7978 SDLoc DL(Op);
7979
7980 MVT VT = Op.getSimpleValueType();
7981 // Only custom-lower vector truncates
7982 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7983
7984 // Truncates to mask types are handled differently
7985 if (VT.getVectorElementType() == MVT::i1)
7986 return lowerVectorMaskTruncLike(Op, DAG);
7987
7988 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7989 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7990 // truncate by one power of two at a time.
7991 MVT DstEltVT = VT.getVectorElementType();
7992
7993 SDValue Src = Op.getOperand(0);
7994 MVT SrcVT = Src.getSimpleValueType();
7995 MVT SrcEltVT = SrcVT.getVectorElementType();
7996
7997 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7998 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7999 "Unexpected vector truncate lowering");
8000
8001 MVT ContainerVT = SrcVT;
8002 SDValue Mask, VL;
8003 if (IsVPTrunc) {
8004 Mask = Op.getOperand(1);
8005 VL = Op.getOperand(2);
8006 }
8007 if (SrcVT.isFixedLengthVector()) {
8008 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8009 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8010 if (IsVPTrunc) {
8011 MVT MaskVT = getMaskTypeFor(ContainerVT);
8012 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8013 }
8014 }
8015
8016 SDValue Result = Src;
8017 if (!IsVPTrunc) {
8018 std::tie(Mask, VL) =
8019 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8020 }
8021
8022 LLVMContext &Context = *DAG.getContext();
8023 const ElementCount Count = ContainerVT.getVectorElementCount();
8024 do {
8025 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8026 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8027 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8028 Mask, VL);
8029 } while (SrcEltVT != DstEltVT);
8030
8031 if (SrcVT.isFixedLengthVector())
8032 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8033
8034 return Result;
8035}
8036
8037SDValue
8038RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8039 SelectionDAG &DAG) const {
8040 SDLoc DL(Op);
8041 SDValue Chain = Op.getOperand(0);
8042 SDValue Src = Op.getOperand(1);
8043 MVT VT = Op.getSimpleValueType();
8044 MVT SrcVT = Src.getSimpleValueType();
8045 MVT ContainerVT = VT;
8046 if (VT.isFixedLengthVector()) {
8047 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8048 ContainerVT =
8049 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8050 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8051 }
8052
8053 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8054
8055 // RVV can only widen/truncate fp to types double/half the size as the source.
8056 if ((VT.getVectorElementType() == MVT::f64 &&
8057 SrcVT.getVectorElementType() == MVT::f16) ||
8058 (VT.getVectorElementType() == MVT::f16 &&
8059 SrcVT.getVectorElementType() == MVT::f64)) {
8060 // For double rounding, the intermediate rounding should be round-to-odd.
8061 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8064 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8065 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8066 Chain, Src, Mask, VL);
8067 Chain = Src.getValue(1);
8068 }
8069
8070 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8073 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8074 Chain, Src, Mask, VL);
8075 if (VT.isFixedLengthVector()) {
8076 // StrictFP operations have two result values. Their lowered result should
8077 // have same result count.
8078 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8079 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8080 }
8081 return Res;
8082}
8083
8084SDValue
8085RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8086 SelectionDAG &DAG) const {
8087 bool IsVP =
8088 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8089 bool IsExtend =
8090 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8091 // RVV can only do truncate fp to types half the size as the source. We
8092 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8093 // conversion instruction.
8094 SDLoc DL(Op);
8095 MVT VT = Op.getSimpleValueType();
8096
8097 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8098
8099 SDValue Src = Op.getOperand(0);
8100 MVT SrcVT = Src.getSimpleValueType();
8101
8102 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8103 SrcVT.getVectorElementType() != MVT::f16);
8104 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8105 SrcVT.getVectorElementType() != MVT::f64);
8106
8107 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8108
8109 // Prepare any fixed-length vector operands.
8110 MVT ContainerVT = VT;
8111 SDValue Mask, VL;
8112 if (IsVP) {
8113 Mask = Op.getOperand(1);
8114 VL = Op.getOperand(2);
8115 }
8116 if (VT.isFixedLengthVector()) {
8117 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8118 ContainerVT =
8119 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8120 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8121 if (IsVP) {
8122 MVT MaskVT = getMaskTypeFor(ContainerVT);
8123 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8124 }
8125 }
8126
8127 if (!IsVP)
8128 std::tie(Mask, VL) =
8129 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8130
8131 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8132
8133 if (IsDirectConv) {
8134 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8135 if (VT.isFixedLengthVector())
8136 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8137 return Src;
8138 }
8139
8140 unsigned InterConvOpc =
8142
8143 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8144 SDValue IntermediateConv =
8145 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8146 SDValue Result =
8147 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8148 if (VT.isFixedLengthVector())
8149 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8150 return Result;
8151}
8152
8153// Given a scalable vector type and an index into it, returns the type for the
8154// smallest subvector that the index fits in. This can be used to reduce LMUL
8155// for operations like vslidedown.
8156//
8157// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8158static std::optional<MVT>
8159getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8160 const RISCVSubtarget &Subtarget) {
8161 assert(VecVT.isScalableVector());
8162 const unsigned EltSize = VecVT.getScalarSizeInBits();
8163 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8164 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8165 MVT SmallerVT;
8166 if (MaxIdx < MinVLMAX)
8167 SmallerVT = getLMUL1VT(VecVT);
8168 else if (MaxIdx < MinVLMAX * 2)
8169 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8170 else if (MaxIdx < MinVLMAX * 4)
8171 SmallerVT = getLMUL1VT(VecVT)
8174 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8175 return std::nullopt;
8176 return SmallerVT;
8177}
8178
8179// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8180// first position of a vector, and that vector is slid up to the insert index.
8181// By limiting the active vector length to index+1 and merging with the
8182// original vector (with an undisturbed tail policy for elements >= VL), we
8183// achieve the desired result of leaving all elements untouched except the one
8184// at VL-1, which is replaced with the desired value.
8185SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8186 SelectionDAG &DAG) const {
8187 SDLoc DL(Op);
8188 MVT VecVT = Op.getSimpleValueType();
8189 SDValue Vec = Op.getOperand(0);
8190 SDValue Val = Op.getOperand(1);
8191 SDValue Idx = Op.getOperand(2);
8192
8193 if (VecVT.getVectorElementType() == MVT::i1) {
8194 // FIXME: For now we just promote to an i8 vector and insert into that,
8195 // but this is probably not optimal.
8196 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8197 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8198 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8199 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8200 }
8201
8202 MVT ContainerVT = VecVT;
8203 // If the operand is a fixed-length vector, convert to a scalable one.
8204 if (VecVT.isFixedLengthVector()) {
8205 ContainerVT = getContainerForFixedLengthVector(VecVT);
8206 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8207 }
8208
8209 // If we know the index we're going to insert at, we can shrink Vec so that
8210 // we're performing the scalar inserts and slideup on a smaller LMUL.
8211 MVT OrigContainerVT = ContainerVT;
8212 SDValue OrigVec = Vec;
8213 SDValue AlignedIdx;
8214 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8215 const unsigned OrigIdx = IdxC->getZExtValue();
8216 // Do we know an upper bound on LMUL?
8217 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8218 DL, DAG, Subtarget)) {
8219 ContainerVT = *ShrunkVT;
8220 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8221 }
8222
8223 // If we're compiling for an exact VLEN value, we can always perform
8224 // the insert in m1 as we can determine the register corresponding to
8225 // the index in the register group.
8226 const MVT M1VT = getLMUL1VT(ContainerVT);
8227 if (auto VLEN = Subtarget.getRealVLen();
8228 VLEN && ContainerVT.bitsGT(M1VT)) {
8229 EVT ElemVT = VecVT.getVectorElementType();
8230 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8231 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8232 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8233 unsigned ExtractIdx =
8234 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8235 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8236 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8237 ContainerVT = M1VT;
8238 }
8239
8240 if (AlignedIdx)
8241 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8242 AlignedIdx);
8243 }
8244
8245 MVT XLenVT = Subtarget.getXLenVT();
8246
8247 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8248 // Even i64-element vectors on RV32 can be lowered without scalar
8249 // legalization if the most-significant 32 bits of the value are not affected
8250 // by the sign-extension of the lower 32 bits.
8251 // TODO: We could also catch sign extensions of a 32-bit value.
8252 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8253 const auto *CVal = cast<ConstantSDNode>(Val);
8254 if (isInt<32>(CVal->getSExtValue())) {
8255 IsLegalInsert = true;
8256 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8257 }
8258 }
8259
8260 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8261
8262 SDValue ValInVec;
8263
8264 if (IsLegalInsert) {
8265 unsigned Opc =
8267 if (isNullConstant(Idx)) {
8268 if (!VecVT.isFloatingPoint())
8269 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8270 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8271
8272 if (AlignedIdx)
8273 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8274 Vec, AlignedIdx);
8275 if (!VecVT.isFixedLengthVector())
8276 return Vec;
8277 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8278 }
8279 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8280 } else {
8281 // On RV32, i64-element vectors must be specially handled to place the
8282 // value at element 0, by using two vslide1down instructions in sequence on
8283 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8284 // this.
8285 SDValue ValLo, ValHi;
8286 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8287 MVT I32ContainerVT =
8288 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8289 SDValue I32Mask =
8290 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8291 // Limit the active VL to two.
8292 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8293 // If the Idx is 0 we can insert directly into the vector.
8294 if (isNullConstant(Idx)) {
8295 // First slide in the lo value, then the hi in above it. We use slide1down
8296 // to avoid the register group overlap constraint of vslide1up.
8297 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8298 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8299 // If the source vector is undef don't pass along the tail elements from
8300 // the previous slide1down.
8301 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8302 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8303 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8304 // Bitcast back to the right container type.
8305 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8306
8307 if (AlignedIdx)
8308 ValInVec =
8309 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8310 ValInVec, AlignedIdx);
8311 if (!VecVT.isFixedLengthVector())
8312 return ValInVec;
8313 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8314 }
8315
8316 // First slide in the lo value, then the hi in above it. We use slide1down
8317 // to avoid the register group overlap constraint of vslide1up.
8318 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8319 DAG.getUNDEF(I32ContainerVT),
8320 DAG.getUNDEF(I32ContainerVT), ValLo,
8321 I32Mask, InsertI64VL);
8322 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8323 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8324 I32Mask, InsertI64VL);
8325 // Bitcast back to the right container type.
8326 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8327 }
8328
8329 // Now that the value is in a vector, slide it into position.
8330 SDValue InsertVL =
8331 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8332
8333 // Use tail agnostic policy if Idx is the last index of Vec.
8335 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8336 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8337 Policy = RISCVII::TAIL_AGNOSTIC;
8338 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8339 Idx, Mask, InsertVL, Policy);
8340
8341 if (AlignedIdx)
8342 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8343 Slideup, AlignedIdx);
8344 if (!VecVT.isFixedLengthVector())
8345 return Slideup;
8346 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8347}
8348
8349// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8350// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8351// types this is done using VMV_X_S to allow us to glean information about the
8352// sign bits of the result.
8353SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8354 SelectionDAG &DAG) const {
8355 SDLoc DL(Op);
8356 SDValue Idx = Op.getOperand(1);
8357 SDValue Vec = Op.getOperand(0);
8358 EVT EltVT = Op.getValueType();
8359 MVT VecVT = Vec.getSimpleValueType();
8360 MVT XLenVT = Subtarget.getXLenVT();
8361
8362 if (VecVT.getVectorElementType() == MVT::i1) {
8363 // Use vfirst.m to extract the first bit.
8364 if (isNullConstant(Idx)) {
8365 MVT ContainerVT = VecVT;
8366 if (VecVT.isFixedLengthVector()) {
8367 ContainerVT = getContainerForFixedLengthVector(VecVT);
8368 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8369 }
8370 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8371 SDValue Vfirst =
8372 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8373 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8374 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8375 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8376 }
8377 if (VecVT.isFixedLengthVector()) {
8378 unsigned NumElts = VecVT.getVectorNumElements();
8379 if (NumElts >= 8) {
8380 MVT WideEltVT;
8381 unsigned WidenVecLen;
8382 SDValue ExtractElementIdx;
8383 SDValue ExtractBitIdx;
8384 unsigned MaxEEW = Subtarget.getELen();
8385 MVT LargestEltVT = MVT::getIntegerVT(
8386 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8387 if (NumElts <= LargestEltVT.getSizeInBits()) {
8388 assert(isPowerOf2_32(NumElts) &&
8389 "the number of elements should be power of 2");
8390 WideEltVT = MVT::getIntegerVT(NumElts);
8391 WidenVecLen = 1;
8392 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8393 ExtractBitIdx = Idx;
8394 } else {
8395 WideEltVT = LargestEltVT;
8396 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8397 // extract element index = index / element width
8398 ExtractElementIdx = DAG.getNode(
8399 ISD::SRL, DL, XLenVT, Idx,
8400 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8401 // mask bit index = index % element width
8402 ExtractBitIdx = DAG.getNode(
8403 ISD::AND, DL, XLenVT, Idx,
8404 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8405 }
8406 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8407 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8408 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8409 Vec, ExtractElementIdx);
8410 // Extract the bit from GPR.
8411 SDValue ShiftRight =
8412 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8413 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8414 DAG.getConstant(1, DL, XLenVT));
8415 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8416 }
8417 }
8418 // Otherwise, promote to an i8 vector and extract from that.
8419 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8420 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8421 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8422 }
8423
8424 // If this is a fixed vector, we need to convert it to a scalable vector.
8425 MVT ContainerVT = VecVT;
8426 if (VecVT.isFixedLengthVector()) {
8427 ContainerVT = getContainerForFixedLengthVector(VecVT);
8428 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8429 }
8430
8431 // If we're compiling for an exact VLEN value and we have a known
8432 // constant index, we can always perform the extract in m1 (or
8433 // smaller) as we can determine the register corresponding to
8434 // the index in the register group.
8435 const auto VLen = Subtarget.getRealVLen();
8436 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8437 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8438 MVT M1VT = getLMUL1VT(ContainerVT);
8439 unsigned OrigIdx = IdxC->getZExtValue();
8440 EVT ElemVT = VecVT.getVectorElementType();
8441 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8442 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8443 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8444 unsigned ExtractIdx =
8445 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8446 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8447 DAG.getVectorIdxConstant(ExtractIdx, DL));
8448 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8449 ContainerVT = M1VT;
8450 }
8451
8452 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8453 // contains our index.
8454 std::optional<uint64_t> MaxIdx;
8455 if (VecVT.isFixedLengthVector())
8456 MaxIdx = VecVT.getVectorNumElements() - 1;
8457 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8458 MaxIdx = IdxC->getZExtValue();
8459 if (MaxIdx) {
8460 if (auto SmallerVT =
8461 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8462 ContainerVT = *SmallerVT;
8463 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8464 DAG.getConstant(0, DL, XLenVT));
8465 }
8466 }
8467
8468 // If after narrowing, the required slide is still greater than LMUL2,
8469 // fallback to generic expansion and go through the stack. This is done
8470 // for a subtle reason: extracting *all* elements out of a vector is
8471 // widely expected to be linear in vector size, but because vslidedown
8472 // is linear in LMUL, performing N extracts using vslidedown becomes
8473 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8474 // seems to have the same problem (the store is linear in LMUL), but the
8475 // generic expansion *memoizes* the store, and thus for many extracts of
8476 // the same vector we end up with one store and a bunch of loads.
8477 // TODO: We don't have the same code for insert_vector_elt because we
8478 // have BUILD_VECTOR and handle the degenerate case there. Should we
8479 // consider adding an inverse BUILD_VECTOR node?
8480 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8481 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8482 return SDValue();
8483
8484 // If the index is 0, the vector is already in the right position.
8485 if (!isNullConstant(Idx)) {
8486 // Use a VL of 1 to avoid processing more elements than we need.
8487 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8488 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8489 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8490 }
8491
8492 if (!EltVT.isInteger()) {
8493 // Floating-point extracts are handled in TableGen.
8494 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8495 DAG.getVectorIdxConstant(0, DL));
8496 }
8497
8498 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8499 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8500}
8501
8502// Some RVV intrinsics may claim that they want an integer operand to be
8503// promoted or expanded.
8505 const RISCVSubtarget &Subtarget) {
8506 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8507 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8508 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8509 "Unexpected opcode");
8510
8511 if (!Subtarget.hasVInstructions())
8512 return SDValue();
8513
8514 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8515 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8516 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8517
8518 SDLoc DL(Op);
8519
8521 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8522 if (!II || !II->hasScalarOperand())
8523 return SDValue();
8524
8525 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8526 assert(SplatOp < Op.getNumOperands());
8527
8528 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8529 SDValue &ScalarOp = Operands[SplatOp];
8530 MVT OpVT = ScalarOp.getSimpleValueType();
8531 MVT XLenVT = Subtarget.getXLenVT();
8532
8533 // If this isn't a scalar, or its type is XLenVT we're done.
8534 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8535 return SDValue();
8536
8537 // Simplest case is that the operand needs to be promoted to XLenVT.
8538 if (OpVT.bitsLT(XLenVT)) {
8539 // If the operand is a constant, sign extend to increase our chances
8540 // of being able to use a .vi instruction. ANY_EXTEND would become a
8541 // a zero extend and the simm5 check in isel would fail.
8542 // FIXME: Should we ignore the upper bits in isel instead?
8543 unsigned ExtOpc =
8544 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8545 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8546 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8547 }
8548
8549 // Use the previous operand to get the vXi64 VT. The result might be a mask
8550 // VT for compares. Using the previous operand assumes that the previous
8551 // operand will never have a smaller element size than a scalar operand and
8552 // that a widening operation never uses SEW=64.
8553 // NOTE: If this fails the below assert, we can probably just find the
8554 // element count from any operand or result and use it to construct the VT.
8555 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8556 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8557
8558 // The more complex case is when the scalar is larger than XLenVT.
8559 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8560 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8561
8562 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8563 // instruction to sign-extend since SEW>XLEN.
8564 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8565 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8566 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8567 }
8568
8569 switch (IntNo) {
8570 case Intrinsic::riscv_vslide1up:
8571 case Intrinsic::riscv_vslide1down:
8572 case Intrinsic::riscv_vslide1up_mask:
8573 case Intrinsic::riscv_vslide1down_mask: {
8574 // We need to special case these when the scalar is larger than XLen.
8575 unsigned NumOps = Op.getNumOperands();
8576 bool IsMasked = NumOps == 7;
8577
8578 // Convert the vector source to the equivalent nxvXi32 vector.
8579 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8580 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8581 SDValue ScalarLo, ScalarHi;
8582 std::tie(ScalarLo, ScalarHi) =
8583 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8584
8585 // Double the VL since we halved SEW.
8586 SDValue AVL = getVLOperand(Op);
8587 SDValue I32VL;
8588
8589 // Optimize for constant AVL
8590 if (isa<ConstantSDNode>(AVL)) {
8591 const auto [MinVLMAX, MaxVLMAX] =
8593
8594 uint64_t AVLInt = AVL->getAsZExtVal();
8595 if (AVLInt <= MinVLMAX) {
8596 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8597 } else if (AVLInt >= 2 * MaxVLMAX) {
8598 // Just set vl to VLMAX in this situation
8600 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8601 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8602 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8603 SDValue SETVLMAX = DAG.getTargetConstant(
8604 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8605 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8606 LMUL);
8607 } else {
8608 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8609 // is related to the hardware implementation.
8610 // So let the following code handle
8611 }
8612 }
8613 if (!I32VL) {
8615 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8616 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8617 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8618 SDValue SETVL =
8619 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8620 // Using vsetvli instruction to get actually used length which related to
8621 // the hardware implementation
8622 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8623 SEW, LMUL);
8624 I32VL =
8625 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8626 }
8627
8628 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8629
8630 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8631 // instructions.
8632 SDValue Passthru;
8633 if (IsMasked)
8634 Passthru = DAG.getUNDEF(I32VT);
8635 else
8636 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8637
8638 if (IntNo == Intrinsic::riscv_vslide1up ||
8639 IntNo == Intrinsic::riscv_vslide1up_mask) {
8640 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8641 ScalarHi, I32Mask, I32VL);
8642 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8643 ScalarLo, I32Mask, I32VL);
8644 } else {
8645 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8646 ScalarLo, I32Mask, I32VL);
8647 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8648 ScalarHi, I32Mask, I32VL);
8649 }
8650
8651 // Convert back to nxvXi64.
8652 Vec = DAG.getBitcast(VT, Vec);
8653
8654 if (!IsMasked)
8655 return Vec;
8656 // Apply mask after the operation.
8657 SDValue Mask = Operands[NumOps - 3];
8658 SDValue MaskedOff = Operands[1];
8659 // Assume Policy operand is the last operand.
8660 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8661 // We don't need to select maskedoff if it's undef.
8662 if (MaskedOff.isUndef())
8663 return Vec;
8664 // TAMU
8665 if (Policy == RISCVII::TAIL_AGNOSTIC)
8666 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8667 DAG.getUNDEF(VT), AVL);
8668 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8669 // It's fine because vmerge does not care mask policy.
8670 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8671 MaskedOff, AVL);
8672 }
8673 }
8674
8675 // We need to convert the scalar to a splat vector.
8676 SDValue VL = getVLOperand(Op);
8677 assert(VL.getValueType() == XLenVT);
8678 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8679 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8680}
8681
8682// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8683// scalable vector llvm.get.vector.length for now.
8684//
8685// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8686// (vscale * VF). The vscale and VF are independent of element width. We use
8687// SEW=8 for the vsetvli because it is the only element width that supports all
8688// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8689// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8690// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8691// SEW and LMUL are better for the surrounding vector instructions.
8693 const RISCVSubtarget &Subtarget) {
8694 MVT XLenVT = Subtarget.getXLenVT();
8695
8696 // The smallest LMUL is only valid for the smallest element width.
8697 const unsigned ElementWidth = 8;
8698
8699 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8700 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8701 // We don't support VF==1 with ELEN==32.
8702 [[maybe_unused]] unsigned MinVF =
8703 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8704
8705 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8706 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8707 "Unexpected VF");
8708
8709 bool Fractional = VF < LMul1VF;
8710 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8711 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8712 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8713
8714 SDLoc DL(N);
8715
8716 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8717 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8718
8719 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8720
8721 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8722 SDValue Res =
8723 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8724 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8725}
8726
8728 const RISCVSubtarget &Subtarget) {
8729 SDValue Op0 = N->getOperand(1);
8730 MVT OpVT = Op0.getSimpleValueType();
8731 MVT ContainerVT = OpVT;
8732 if (OpVT.isFixedLengthVector()) {
8733 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8734 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8735 }
8736 MVT XLenVT = Subtarget.getXLenVT();
8737 SDLoc DL(N);
8738 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8739 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8740 if (isOneConstant(N->getOperand(2)))
8741 return Res;
8742
8743 // Convert -1 to VL.
8744 SDValue Setcc =
8745 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8746 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8747 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8748}
8749
8750static inline void promoteVCIXScalar(const SDValue &Op,
8752 SelectionDAG &DAG) {
8753 const RISCVSubtarget &Subtarget =
8755
8756 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8757 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8758 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8759 SDLoc DL(Op);
8760
8762 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8763 if (!II || !II->hasScalarOperand())
8764 return;
8765
8766 unsigned SplatOp = II->ScalarOperand + 1;
8767 assert(SplatOp < Op.getNumOperands());
8768
8769 SDValue &ScalarOp = Operands[SplatOp];
8770 MVT OpVT = ScalarOp.getSimpleValueType();
8771 MVT XLenVT = Subtarget.getXLenVT();
8772
8773 // The code below is partially copied from lowerVectorIntrinsicScalars.
8774 // If this isn't a scalar, or its type is XLenVT we're done.
8775 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8776 return;
8777
8778 // Manually emit promote operation for scalar operation.
8779 if (OpVT.bitsLT(XLenVT)) {
8780 unsigned ExtOpc =
8781 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8782 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8783 }
8784
8785 return;
8786}
8787
8788static void processVCIXOperands(SDValue &OrigOp,
8790 SelectionDAG &DAG) {
8791 promoteVCIXScalar(OrigOp, Operands, DAG);
8792 const RISCVSubtarget &Subtarget =
8794 for (SDValue &V : Operands) {
8795 EVT ValType = V.getValueType();
8796 if (ValType.isVector() && ValType.isFloatingPoint()) {
8797 MVT InterimIVT =
8798 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8799 ValType.getVectorElementCount());
8800 V = DAG.getBitcast(InterimIVT, V);
8801 }
8802 if (ValType.isFixedLengthVector()) {
8803 MVT OpContainerVT = getContainerForFixedLengthVector(
8804 DAG, V.getSimpleValueType(), Subtarget);
8805 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8806 }
8807 }
8808}
8809
8810// LMUL * VLEN should be greater than or equal to EGS * SEW
8811static inline bool isValidEGW(int EGS, EVT VT,
8812 const RISCVSubtarget &Subtarget) {
8813 return (Subtarget.getRealMinVLen() *
8815 EGS * VT.getScalarSizeInBits();
8816}
8817
8818SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8819 SelectionDAG &DAG) const {
8820 unsigned IntNo = Op.getConstantOperandVal(0);
8821 SDLoc DL(Op);
8822 MVT XLenVT = Subtarget.getXLenVT();
8823
8824 switch (IntNo) {
8825 default:
8826 break; // Don't custom lower most intrinsics.
8827 case Intrinsic::thread_pointer: {
8828 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8829 return DAG.getRegister(RISCV::X4, PtrVT);
8830 }
8831 case Intrinsic::riscv_orc_b:
8832 case Intrinsic::riscv_brev8:
8833 case Intrinsic::riscv_sha256sig0:
8834 case Intrinsic::riscv_sha256sig1:
8835 case Intrinsic::riscv_sha256sum0:
8836 case Intrinsic::riscv_sha256sum1:
8837 case Intrinsic::riscv_sm3p0:
8838 case Intrinsic::riscv_sm3p1: {
8839 unsigned Opc;
8840 switch (IntNo) {
8841 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8842 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8843 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8844 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8845 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8846 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8847 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8848 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8849 }
8850
8851 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8852 SDValue NewOp =
8853 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8854 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8855 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8856 }
8857
8858 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8859 }
8860 case Intrinsic::riscv_sm4ks:
8861 case Intrinsic::riscv_sm4ed: {
8862 unsigned Opc =
8863 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8864
8865 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8866 SDValue NewOp0 =
8867 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8868 SDValue NewOp1 =
8869 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8870 SDValue Res =
8871 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8873 }
8874
8875 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8876 Op.getOperand(3));
8877 }
8878 case Intrinsic::riscv_zip:
8879 case Intrinsic::riscv_unzip: {
8880 unsigned Opc =
8881 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8882 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8883 }
8884 case Intrinsic::riscv_mopr: {
8885 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8886 SDValue NewOp =
8887 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8888 SDValue Res = DAG.getNode(
8889 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8890 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8891 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8892 }
8893 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8894 Op.getOperand(2));
8895 }
8896
8897 case Intrinsic::riscv_moprr: {
8898 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8899 SDValue NewOp0 =
8900 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8901 SDValue NewOp1 =
8902 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8903 SDValue Res = DAG.getNode(
8904 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8905 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8906 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8907 }
8908 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8909 Op.getOperand(2), Op.getOperand(3));
8910 }
8911 case Intrinsic::riscv_clmul:
8912 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8913 SDValue NewOp0 =
8914 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8915 SDValue NewOp1 =
8916 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8917 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8918 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8919 }
8920 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8921 Op.getOperand(2));
8922 case Intrinsic::riscv_clmulh:
8923 case Intrinsic::riscv_clmulr: {
8924 unsigned Opc =
8925 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8926 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8927 SDValue NewOp0 =
8928 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8929 SDValue NewOp1 =
8930 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8931 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8932 DAG.getConstant(32, DL, MVT::i64));
8933 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8934 DAG.getConstant(32, DL, MVT::i64));
8935 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8936 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8937 DAG.getConstant(32, DL, MVT::i64));
8938 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8939 }
8940
8941 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8942 }
8943 case Intrinsic::experimental_get_vector_length:
8944 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8945 case Intrinsic::experimental_cttz_elts:
8946 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8947 case Intrinsic::riscv_vmv_x_s: {
8948 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8949 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8950 }
8951 case Intrinsic::riscv_vfmv_f_s:
8952 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8953 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
8954 case Intrinsic::riscv_vmv_v_x:
8955 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8956 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8957 Subtarget);
8958 case Intrinsic::riscv_vfmv_v_f:
8959 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8960 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8961 case Intrinsic::riscv_vmv_s_x: {
8962 SDValue Scalar = Op.getOperand(2);
8963
8964 if (Scalar.getValueType().bitsLE(XLenVT)) {
8965 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8966 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8967 Op.getOperand(1), Scalar, Op.getOperand(3));
8968 }
8969
8970 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8971
8972 // This is an i64 value that lives in two scalar registers. We have to
8973 // insert this in a convoluted way. First we build vXi64 splat containing
8974 // the two values that we assemble using some bit math. Next we'll use
8975 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8976 // to merge element 0 from our splat into the source vector.
8977 // FIXME: This is probably not the best way to do this, but it is
8978 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8979 // point.
8980 // sw lo, (a0)
8981 // sw hi, 4(a0)
8982 // vlse vX, (a0)
8983 //
8984 // vid.v vVid
8985 // vmseq.vx mMask, vVid, 0
8986 // vmerge.vvm vDest, vSrc, vVal, mMask
8987 MVT VT = Op.getSimpleValueType();
8988 SDValue Vec = Op.getOperand(1);
8989 SDValue VL = getVLOperand(Op);
8990
8991 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8992 if (Op.getOperand(1).isUndef())
8993 return SplattedVal;
8994 SDValue SplattedIdx =
8995 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8996 DAG.getConstant(0, DL, MVT::i32), VL);
8997
8998 MVT MaskVT = getMaskTypeFor(VT);
8999 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9000 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9001 SDValue SelectCond =
9002 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9003 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9004 DAG.getUNDEF(MaskVT), Mask, VL});
9005 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9006 Vec, DAG.getUNDEF(VT), VL);
9007 }
9008 case Intrinsic::riscv_vfmv_s_f:
9009 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9010 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9011 // EGS * EEW >= 128 bits
9012 case Intrinsic::riscv_vaesdf_vv:
9013 case Intrinsic::riscv_vaesdf_vs:
9014 case Intrinsic::riscv_vaesdm_vv:
9015 case Intrinsic::riscv_vaesdm_vs:
9016 case Intrinsic::riscv_vaesef_vv:
9017 case Intrinsic::riscv_vaesef_vs:
9018 case Intrinsic::riscv_vaesem_vv:
9019 case Intrinsic::riscv_vaesem_vs:
9020 case Intrinsic::riscv_vaeskf1:
9021 case Intrinsic::riscv_vaeskf2:
9022 case Intrinsic::riscv_vaesz_vs:
9023 case Intrinsic::riscv_vsm4k:
9024 case Intrinsic::riscv_vsm4r_vv:
9025 case Intrinsic::riscv_vsm4r_vs: {
9026 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9027 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9028 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9029 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9030 return Op;
9031 }
9032 // EGS * EEW >= 256 bits
9033 case Intrinsic::riscv_vsm3c:
9034 case Intrinsic::riscv_vsm3me: {
9035 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9036 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9037 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9038 return Op;
9039 }
9040 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9041 case Intrinsic::riscv_vsha2ch:
9042 case Intrinsic::riscv_vsha2cl:
9043 case Intrinsic::riscv_vsha2ms: {
9044 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9045 !Subtarget.hasStdExtZvknhb())
9046 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9047 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9048 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9049 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9050 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9051 return Op;
9052 }
9053 case Intrinsic::riscv_sf_vc_v_x:
9054 case Intrinsic::riscv_sf_vc_v_i:
9055 case Intrinsic::riscv_sf_vc_v_xv:
9056 case Intrinsic::riscv_sf_vc_v_iv:
9057 case Intrinsic::riscv_sf_vc_v_vv:
9058 case Intrinsic::riscv_sf_vc_v_fv:
9059 case Intrinsic::riscv_sf_vc_v_xvv:
9060 case Intrinsic::riscv_sf_vc_v_ivv:
9061 case Intrinsic::riscv_sf_vc_v_vvv:
9062 case Intrinsic::riscv_sf_vc_v_fvv:
9063 case Intrinsic::riscv_sf_vc_v_xvw:
9064 case Intrinsic::riscv_sf_vc_v_ivw:
9065 case Intrinsic::riscv_sf_vc_v_vvw:
9066 case Intrinsic::riscv_sf_vc_v_fvw: {
9067 MVT VT = Op.getSimpleValueType();
9068
9069 SmallVector<SDValue> Operands{Op->op_values()};
9071
9072 MVT RetVT = VT;
9073 if (VT.isFixedLengthVector())
9075 else if (VT.isFloatingPoint())
9078
9079 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9080
9081 if (VT.isFixedLengthVector())
9082 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9083 else if (VT.isFloatingPoint())
9084 NewNode = DAG.getBitcast(VT, NewNode);
9085
9086 if (Op == NewNode)
9087 break;
9088
9089 return NewNode;
9090 }
9091 }
9092
9093 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9094}
9095
9097 unsigned Type) {
9098 SDLoc DL(Op);
9099 SmallVector<SDValue> Operands{Op->op_values()};
9100 Operands.erase(Operands.begin() + 1);
9101
9102 const RISCVSubtarget &Subtarget =
9104 MVT VT = Op.getSimpleValueType();
9105 MVT RetVT = VT;
9106 MVT FloatVT = VT;
9107
9108 if (VT.isFloatingPoint()) {
9109 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9111 FloatVT = RetVT;
9112 }
9113 if (VT.isFixedLengthVector())
9115 Subtarget);
9116
9118
9119 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9120 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9121 SDValue Chain = NewNode.getValue(1);
9122
9123 if (VT.isFixedLengthVector())
9124 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9125 if (VT.isFloatingPoint())
9126 NewNode = DAG.getBitcast(VT, NewNode);
9127
9128 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9129
9130 return NewNode;
9131}
9132
9134 unsigned Type) {
9135 SmallVector<SDValue> Operands{Op->op_values()};
9136 Operands.erase(Operands.begin() + 1);
9138
9139 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9140}
9141
9142SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9143 SelectionDAG &DAG) const {
9144 unsigned IntNo = Op.getConstantOperandVal(1);
9145 switch (IntNo) {
9146 default:
9147 break;
9148 case Intrinsic::riscv_masked_strided_load: {
9149 SDLoc DL(Op);
9150 MVT XLenVT = Subtarget.getXLenVT();
9151
9152 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9153 // the selection of the masked intrinsics doesn't do this for us.
9154 SDValue Mask = Op.getOperand(5);
9155 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9156
9157 MVT VT = Op->getSimpleValueType(0);
9158 MVT ContainerVT = VT;
9159 if (VT.isFixedLengthVector())
9160 ContainerVT = getContainerForFixedLengthVector(VT);
9161
9162 SDValue PassThru = Op.getOperand(2);
9163 if (!IsUnmasked) {
9164 MVT MaskVT = getMaskTypeFor(ContainerVT);
9165 if (VT.isFixedLengthVector()) {
9166 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9167 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9168 }
9169 }
9170
9171 auto *Load = cast<MemIntrinsicSDNode>(Op);
9172 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9173 SDValue Ptr = Op.getOperand(3);
9174 SDValue Stride = Op.getOperand(4);
9175 SDValue Result, Chain;
9176
9177 // TODO: We restrict this to unmasked loads currently in consideration of
9178 // the complexity of handling all falses masks.
9179 MVT ScalarVT = ContainerVT.getVectorElementType();
9180 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9181 SDValue ScalarLoad =
9182 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9183 ScalarVT, Load->getMemOperand());
9184 Chain = ScalarLoad.getValue(1);
9185 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9186 Subtarget);
9187 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9188 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9189 Load->getMemOperand());
9190 Chain = ScalarLoad.getValue(1);
9191 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9192 } else {
9193 SDValue IntID = DAG.getTargetConstant(
9194 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9195 XLenVT);
9196
9197 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9198 if (IsUnmasked)
9199 Ops.push_back(DAG.getUNDEF(ContainerVT));
9200 else
9201 Ops.push_back(PassThru);
9202 Ops.push_back(Ptr);
9203 Ops.push_back(Stride);
9204 if (!IsUnmasked)
9205 Ops.push_back(Mask);
9206 Ops.push_back(VL);
9207 if (!IsUnmasked) {
9208 SDValue Policy =
9210 Ops.push_back(Policy);
9211 }
9212
9213 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9214 Result =
9216 Load->getMemoryVT(), Load->getMemOperand());
9217 Chain = Result.getValue(1);
9218 }
9219 if (VT.isFixedLengthVector())
9220 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9221 return DAG.getMergeValues({Result, Chain}, DL);
9222 }
9223 case Intrinsic::riscv_seg2_load:
9224 case Intrinsic::riscv_seg3_load:
9225 case Intrinsic::riscv_seg4_load:
9226 case Intrinsic::riscv_seg5_load:
9227 case Intrinsic::riscv_seg6_load:
9228 case Intrinsic::riscv_seg7_load:
9229 case Intrinsic::riscv_seg8_load: {
9230 SDLoc DL(Op);
9231 static const Intrinsic::ID VlsegInts[7] = {
9232 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9233 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9234 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9235 Intrinsic::riscv_vlseg8};
9236 unsigned NF = Op->getNumValues() - 1;
9237 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9238 MVT XLenVT = Subtarget.getXLenVT();
9239 MVT VT = Op->getSimpleValueType(0);
9240 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9241
9242 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9243 Subtarget);
9244 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9245 auto *Load = cast<MemIntrinsicSDNode>(Op);
9246 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9247 ContainerVTs.push_back(MVT::Other);
9248 SDVTList VTs = DAG.getVTList(ContainerVTs);
9249 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9250 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9251 Ops.push_back(Op.getOperand(2));
9252 Ops.push_back(VL);
9253 SDValue Result =
9255 Load->getMemoryVT(), Load->getMemOperand());
9257 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9258 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9259 DAG, Subtarget));
9260 Results.push_back(Result.getValue(NF));
9261 return DAG.getMergeValues(Results, DL);
9262 }
9263 case Intrinsic::riscv_sf_vc_v_x_se:
9265 case Intrinsic::riscv_sf_vc_v_i_se:
9267 case Intrinsic::riscv_sf_vc_v_xv_se:
9269 case Intrinsic::riscv_sf_vc_v_iv_se:
9271 case Intrinsic::riscv_sf_vc_v_vv_se:
9273 case Intrinsic::riscv_sf_vc_v_fv_se:
9275 case Intrinsic::riscv_sf_vc_v_xvv_se:
9277 case Intrinsic::riscv_sf_vc_v_ivv_se:
9279 case Intrinsic::riscv_sf_vc_v_vvv_se:
9281 case Intrinsic::riscv_sf_vc_v_fvv_se:
9283 case Intrinsic::riscv_sf_vc_v_xvw_se:
9285 case Intrinsic::riscv_sf_vc_v_ivw_se:
9287 case Intrinsic::riscv_sf_vc_v_vvw_se:
9289 case Intrinsic::riscv_sf_vc_v_fvw_se:
9291 }
9292
9293 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9294}
9295
9296SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9297 SelectionDAG &DAG) const {
9298 unsigned IntNo = Op.getConstantOperandVal(1);
9299 switch (IntNo) {
9300 default:
9301 break;
9302 case Intrinsic::riscv_masked_strided_store: {
9303 SDLoc DL(Op);
9304 MVT XLenVT = Subtarget.getXLenVT();
9305
9306 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9307 // the selection of the masked intrinsics doesn't do this for us.
9308 SDValue Mask = Op.getOperand(5);
9309 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9310
9311 SDValue Val = Op.getOperand(2);
9312 MVT VT = Val.getSimpleValueType();
9313 MVT ContainerVT = VT;
9314 if (VT.isFixedLengthVector()) {
9315 ContainerVT = getContainerForFixedLengthVector(VT);
9316 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9317 }
9318 if (!IsUnmasked) {
9319 MVT MaskVT = getMaskTypeFor(ContainerVT);
9320 if (VT.isFixedLengthVector())
9321 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9322 }
9323
9324 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9325
9326 SDValue IntID = DAG.getTargetConstant(
9327 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9328 XLenVT);
9329
9330 auto *Store = cast<MemIntrinsicSDNode>(Op);
9331 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9332 Ops.push_back(Val);
9333 Ops.push_back(Op.getOperand(3)); // Ptr
9334 Ops.push_back(Op.getOperand(4)); // Stride
9335 if (!IsUnmasked)
9336 Ops.push_back(Mask);
9337 Ops.push_back(VL);
9338
9339 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9340 Ops, Store->getMemoryVT(),
9341 Store->getMemOperand());
9342 }
9343 case Intrinsic::riscv_seg2_store:
9344 case Intrinsic::riscv_seg3_store:
9345 case Intrinsic::riscv_seg4_store:
9346 case Intrinsic::riscv_seg5_store:
9347 case Intrinsic::riscv_seg6_store:
9348 case Intrinsic::riscv_seg7_store:
9349 case Intrinsic::riscv_seg8_store: {
9350 SDLoc DL(Op);
9351 static const Intrinsic::ID VssegInts[] = {
9352 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9353 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9354 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9355 Intrinsic::riscv_vsseg8};
9356 // Operands are (chain, int_id, vec*, ptr, vl)
9357 unsigned NF = Op->getNumOperands() - 4;
9358 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9359 MVT XLenVT = Subtarget.getXLenVT();
9360 MVT VT = Op->getOperand(2).getSimpleValueType();
9361 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9362
9363 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9364 Subtarget);
9365 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9366 SDValue Ptr = Op->getOperand(NF + 2);
9367
9368 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9369 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9370 for (unsigned i = 0; i < NF; i++)
9372 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9373 Ops.append({Ptr, VL});
9374
9375 return DAG.getMemIntrinsicNode(
9376 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9377 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9378 }
9379 case Intrinsic::riscv_sf_vc_xv_se:
9381 case Intrinsic::riscv_sf_vc_iv_se:
9383 case Intrinsic::riscv_sf_vc_vv_se:
9385 case Intrinsic::riscv_sf_vc_fv_se:
9387 case Intrinsic::riscv_sf_vc_xvv_se:
9389 case Intrinsic::riscv_sf_vc_ivv_se:
9391 case Intrinsic::riscv_sf_vc_vvv_se:
9393 case Intrinsic::riscv_sf_vc_fvv_se:
9395 case Intrinsic::riscv_sf_vc_xvw_se:
9397 case Intrinsic::riscv_sf_vc_ivw_se:
9399 case Intrinsic::riscv_sf_vc_vvw_se:
9401 case Intrinsic::riscv_sf_vc_fvw_se:
9403 }
9404
9405 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9406}
9407
9408static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9409 switch (ISDOpcode) {
9410 default:
9411 llvm_unreachable("Unhandled reduction");
9412 case ISD::VP_REDUCE_ADD:
9413 case ISD::VECREDUCE_ADD:
9415 case ISD::VP_REDUCE_UMAX:
9418 case ISD::VP_REDUCE_SMAX:
9421 case ISD::VP_REDUCE_UMIN:
9424 case ISD::VP_REDUCE_SMIN:
9427 case ISD::VP_REDUCE_AND:
9428 case ISD::VECREDUCE_AND:
9430 case ISD::VP_REDUCE_OR:
9431 case ISD::VECREDUCE_OR:
9433 case ISD::VP_REDUCE_XOR:
9434 case ISD::VECREDUCE_XOR:
9436 case ISD::VP_REDUCE_FADD:
9438 case ISD::VP_REDUCE_SEQ_FADD:
9440 case ISD::VP_REDUCE_FMAX:
9442 case ISD::VP_REDUCE_FMIN:
9444 }
9445
9446}
9447
9448SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9449 SelectionDAG &DAG,
9450 bool IsVP) const {
9451 SDLoc DL(Op);
9452 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9453 MVT VecVT = Vec.getSimpleValueType();
9454 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9455 Op.getOpcode() == ISD::VECREDUCE_OR ||
9456 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9457 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9458 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9459 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9460 "Unexpected reduction lowering");
9461
9462 MVT XLenVT = Subtarget.getXLenVT();
9463
9464 MVT ContainerVT = VecVT;
9465 if (VecVT.isFixedLengthVector()) {
9466 ContainerVT = getContainerForFixedLengthVector(VecVT);
9467 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9468 }
9469
9470 SDValue Mask, VL;
9471 if (IsVP) {
9472 Mask = Op.getOperand(2);
9473 VL = Op.getOperand(3);
9474 } else {
9475 std::tie(Mask, VL) =
9476 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9477 }
9478
9479 unsigned BaseOpc;
9481 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9482
9483 switch (Op.getOpcode()) {
9484 default:
9485 llvm_unreachable("Unhandled reduction");
9486 case ISD::VECREDUCE_AND:
9487 case ISD::VP_REDUCE_AND: {
9488 // vcpop ~x == 0
9489 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9490 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9491 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9492 CC = ISD::SETEQ;
9493 BaseOpc = ISD::AND;
9494 break;
9495 }
9496 case ISD::VECREDUCE_OR:
9497 case ISD::VP_REDUCE_OR:
9498 // vcpop x != 0
9499 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9500 CC = ISD::SETNE;
9501 BaseOpc = ISD::OR;
9502 break;
9503 case ISD::VECREDUCE_XOR:
9504 case ISD::VP_REDUCE_XOR: {
9505 // ((vcpop x) & 1) != 0
9506 SDValue One = DAG.getConstant(1, DL, XLenVT);
9507 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9508 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9509 CC = ISD::SETNE;
9510 BaseOpc = ISD::XOR;
9511 break;
9512 }
9513 }
9514
9515 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9516 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9517
9518 if (!IsVP)
9519 return SetCC;
9520
9521 // Now include the start value in the operation.
9522 // Note that we must return the start value when no elements are operated
9523 // upon. The vcpop instructions we've emitted in each case above will return
9524 // 0 for an inactive vector, and so we've already received the neutral value:
9525 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9526 // can simply include the start value.
9527 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9528}
9529
9530static bool isNonZeroAVL(SDValue AVL) {
9531 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9532 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9533 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9534 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9535}
9536
9537/// Helper to lower a reduction sequence of the form:
9538/// scalar = reduce_op vec, scalar_start
9539static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9540 SDValue StartValue, SDValue Vec, SDValue Mask,
9541 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9542 const RISCVSubtarget &Subtarget) {
9543 const MVT VecVT = Vec.getSimpleValueType();
9544 const MVT M1VT = getLMUL1VT(VecVT);
9545 const MVT XLenVT = Subtarget.getXLenVT();
9546 const bool NonZeroAVL = isNonZeroAVL(VL);
9547
9548 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9549 // or the original VT if fractional.
9550 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9551 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9552 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9553 // be the result of the reduction operation.
9554 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9555 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9556 DAG, Subtarget);
9557 if (M1VT != InnerVT)
9558 InitialValue =
9559 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9560 InitialValue, DAG.getVectorIdxConstant(0, DL));
9561 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9562 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9563 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9564 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9565 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9566 DAG.getVectorIdxConstant(0, DL));
9567}
9568
9569SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9570 SelectionDAG &DAG) const {
9571 SDLoc DL(Op);
9572 SDValue Vec = Op.getOperand(0);
9573 EVT VecEVT = Vec.getValueType();
9574
9575 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9576
9577 // Due to ordering in legalize types we may have a vector type that needs to
9578 // be split. Do that manually so we can get down to a legal type.
9579 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9581 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9582 VecEVT = Lo.getValueType();
9583 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9584 }
9585
9586 // TODO: The type may need to be widened rather than split. Or widened before
9587 // it can be split.
9588 if (!isTypeLegal(VecEVT))
9589 return SDValue();
9590
9591 MVT VecVT = VecEVT.getSimpleVT();
9592 MVT VecEltVT = VecVT.getVectorElementType();
9593 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9594
9595 MVT ContainerVT = VecVT;
9596 if (VecVT.isFixedLengthVector()) {
9597 ContainerVT = getContainerForFixedLengthVector(VecVT);
9598 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9599 }
9600
9601 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9602
9603 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9604 switch (BaseOpc) {
9605 case ISD::AND:
9606 case ISD::OR:
9607 case ISD::UMAX:
9608 case ISD::UMIN:
9609 case ISD::SMAX:
9610 case ISD::SMIN:
9611 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9612 DAG.getVectorIdxConstant(0, DL));
9613 }
9614 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9615 Mask, VL, DL, DAG, Subtarget);
9616}
9617
9618// Given a reduction op, this function returns the matching reduction opcode,
9619// the vector SDValue and the scalar SDValue required to lower this to a
9620// RISCVISD node.
9621static std::tuple<unsigned, SDValue, SDValue>
9623 const RISCVSubtarget &Subtarget) {
9624 SDLoc DL(Op);
9625 auto Flags = Op->getFlags();
9626 unsigned Opcode = Op.getOpcode();
9627 switch (Opcode) {
9628 default:
9629 llvm_unreachable("Unhandled reduction");
9630 case ISD::VECREDUCE_FADD: {
9631 // Use positive zero if we can. It is cheaper to materialize.
9632 SDValue Zero =
9633 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9634 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9635 }
9637 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9638 Op.getOperand(0));
9642 case ISD::VECREDUCE_FMAX: {
9643 SDValue Front =
9644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9645 DAG.getVectorIdxConstant(0, DL));
9646 unsigned RVVOpc =
9647 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9650 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9651 }
9652 }
9653}
9654
9655SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9656 SelectionDAG &DAG) const {
9657 SDLoc DL(Op);
9658 MVT VecEltVT = Op.getSimpleValueType();
9659
9660 unsigned RVVOpcode;
9661 SDValue VectorVal, ScalarVal;
9662 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9663 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9664 MVT VecVT = VectorVal.getSimpleValueType();
9665
9666 MVT ContainerVT = VecVT;
9667 if (VecVT.isFixedLengthVector()) {
9668 ContainerVT = getContainerForFixedLengthVector(VecVT);
9669 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9670 }
9671
9672 MVT ResVT = Op.getSimpleValueType();
9673 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9674 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9675 VL, DL, DAG, Subtarget);
9676 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9677 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9678 return Res;
9679
9680 if (Op->getFlags().hasNoNaNs())
9681 return Res;
9682
9683 // Force output to NaN if any element is Nan.
9684 SDValue IsNan =
9685 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9686 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9687 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9688 MVT XLenVT = Subtarget.getXLenVT();
9689 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9690 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9691 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9692 return DAG.getSelect(
9693 DL, ResVT, NoNaNs, Res,
9695 ResVT));
9696}
9697
9698SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9699 SelectionDAG &DAG) const {
9700 SDLoc DL(Op);
9701 SDValue Vec = Op.getOperand(1);
9702 EVT VecEVT = Vec.getValueType();
9703
9704 // TODO: The type may need to be widened rather than split. Or widened before
9705 // it can be split.
9706 if (!isTypeLegal(VecEVT))
9707 return SDValue();
9708
9709 MVT VecVT = VecEVT.getSimpleVT();
9710 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9711
9712 if (VecVT.isFixedLengthVector()) {
9713 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9714 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9715 }
9716
9717 SDValue VL = Op.getOperand(3);
9718 SDValue Mask = Op.getOperand(2);
9719 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9720 Vec, Mask, VL, DL, DAG, Subtarget);
9721}
9722
9723SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9724 SelectionDAG &DAG) const {
9725 SDValue Vec = Op.getOperand(0);
9726 SDValue SubVec = Op.getOperand(1);
9727 MVT VecVT = Vec.getSimpleValueType();
9728 MVT SubVecVT = SubVec.getSimpleValueType();
9729
9730 SDLoc DL(Op);
9731 MVT XLenVT = Subtarget.getXLenVT();
9732 unsigned OrigIdx = Op.getConstantOperandVal(2);
9733 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9734
9735 // We don't have the ability to slide mask vectors up indexed by their i1
9736 // elements; the smallest we can do is i8. Often we are able to bitcast to
9737 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9738 // into a scalable one, we might not necessarily have enough scalable
9739 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9740 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9741 (OrigIdx != 0 || !Vec.isUndef())) {
9742 if (VecVT.getVectorMinNumElements() >= 8 &&
9743 SubVecVT.getVectorMinNumElements() >= 8) {
9744 assert(OrigIdx % 8 == 0 && "Invalid index");
9745 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9746 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9747 "Unexpected mask vector lowering");
9748 OrigIdx /= 8;
9749 SubVecVT =
9750 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9751 SubVecVT.isScalableVector());
9752 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9753 VecVT.isScalableVector());
9754 Vec = DAG.getBitcast(VecVT, Vec);
9755 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9756 } else {
9757 // We can't slide this mask vector up indexed by its i1 elements.
9758 // This poses a problem when we wish to insert a scalable vector which
9759 // can't be re-expressed as a larger type. Just choose the slow path and
9760 // extend to a larger type, then truncate back down.
9761 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9762 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9763 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9764 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9765 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9766 Op.getOperand(2));
9767 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9768 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9769 }
9770 }
9771
9772 // If the subvector vector is a fixed-length type, we cannot use subregister
9773 // manipulation to simplify the codegen; we don't know which register of a
9774 // LMUL group contains the specific subvector as we only know the minimum
9775 // register size. Therefore we must slide the vector group up the full
9776 // amount.
9777 if (SubVecVT.isFixedLengthVector()) {
9778 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9779 return Op;
9780 MVT ContainerVT = VecVT;
9781 if (VecVT.isFixedLengthVector()) {
9782 ContainerVT = getContainerForFixedLengthVector(VecVT);
9783 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9784 }
9785
9786 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9787 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9788 DAG.getUNDEF(ContainerVT), SubVec,
9789 DAG.getVectorIdxConstant(0, DL));
9790 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9791 return DAG.getBitcast(Op.getValueType(), SubVec);
9792 }
9793
9794 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9795 DAG.getUNDEF(ContainerVT), SubVec,
9796 DAG.getVectorIdxConstant(0, DL));
9797 SDValue Mask =
9798 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9799 // Set the vector length to only the number of elements we care about. Note
9800 // that for slideup this includes the offset.
9801 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9802 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9803
9804 // Use tail agnostic policy if we're inserting over Vec's tail.
9806 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9807 Policy = RISCVII::TAIL_AGNOSTIC;
9808
9809 // If we're inserting into the lowest elements, use a tail undisturbed
9810 // vmv.v.v.
9811 if (OrigIdx == 0) {
9812 SubVec =
9813 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9814 } else {
9815 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9816 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9817 SlideupAmt, Mask, VL, Policy);
9818 }
9819
9820 if (VecVT.isFixedLengthVector())
9821 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9822 return DAG.getBitcast(Op.getValueType(), SubVec);
9823 }
9824
9825 unsigned SubRegIdx, RemIdx;
9826 std::tie(SubRegIdx, RemIdx) =
9828 VecVT, SubVecVT, OrigIdx, TRI);
9829
9830 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9831 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9832 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9833 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9834
9835 // 1. If the Idx has been completely eliminated and this subvector's size is
9836 // a vector register or a multiple thereof, or the surrounding elements are
9837 // undef, then this is a subvector insert which naturally aligns to a vector
9838 // register. These can easily be handled using subregister manipulation.
9839 // 2. If the subvector is smaller than a vector register, then the insertion
9840 // must preserve the undisturbed elements of the register. We do this by
9841 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9842 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9843 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9844 // LMUL=1 type back into the larger vector (resolving to another subregister
9845 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9846 // to avoid allocating a large register group to hold our subvector.
9847 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9848 return Op;
9849
9850 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9851 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9852 // (in our case undisturbed). This means we can set up a subvector insertion
9853 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9854 // size of the subvector.
9855 MVT InterSubVT = VecVT;
9856 SDValue AlignedExtract = Vec;
9857 unsigned AlignedIdx = OrigIdx - RemIdx;
9858 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9859 InterSubVT = getLMUL1VT(VecVT);
9860 // Extract a subvector equal to the nearest full vector register type. This
9861 // should resolve to a EXTRACT_SUBREG instruction.
9862 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9863 DAG.getVectorIdxConstant(AlignedIdx, DL));
9864 }
9865
9866 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9867 DAG.getUNDEF(InterSubVT), SubVec,
9868 DAG.getVectorIdxConstant(0, DL));
9869
9870 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9871
9872 ElementCount EndIndex =
9874 VL = computeVLMax(SubVecVT, DL, DAG);
9875
9876 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9878 if (EndIndex == InterSubVT.getVectorElementCount())
9879 Policy = RISCVII::TAIL_AGNOSTIC;
9880
9881 // If we're inserting into the lowest elements, use a tail undisturbed
9882 // vmv.v.v.
9883 if (RemIdx == 0) {
9884 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9885 SubVec, VL);
9886 } else {
9887 SDValue SlideupAmt =
9888 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9889
9890 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9891 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9892
9893 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9894 SlideupAmt, Mask, VL, Policy);
9895 }
9896
9897 // If required, insert this subvector back into the correct vector register.
9898 // This should resolve to an INSERT_SUBREG instruction.
9899 if (VecVT.bitsGT(InterSubVT))
9900 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9901 DAG.getVectorIdxConstant(AlignedIdx, DL));
9902
9903 // We might have bitcast from a mask type: cast back to the original type if
9904 // required.
9905 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9906}
9907
9908SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9909 SelectionDAG &DAG) const {
9910 SDValue Vec = Op.getOperand(0);
9911 MVT SubVecVT = Op.getSimpleValueType();
9912 MVT VecVT = Vec.getSimpleValueType();
9913
9914 SDLoc DL(Op);
9915 MVT XLenVT = Subtarget.getXLenVT();
9916 unsigned OrigIdx = Op.getConstantOperandVal(1);
9917 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9918
9919 // We don't have the ability to slide mask vectors down indexed by their i1
9920 // elements; the smallest we can do is i8. Often we are able to bitcast to
9921 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9922 // from a scalable one, we might not necessarily have enough scalable
9923 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9924 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9925 if (VecVT.getVectorMinNumElements() >= 8 &&
9926 SubVecVT.getVectorMinNumElements() >= 8) {
9927 assert(OrigIdx % 8 == 0 && "Invalid index");
9928 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9929 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9930 "Unexpected mask vector lowering");
9931 OrigIdx /= 8;
9932 SubVecVT =
9933 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9934 SubVecVT.isScalableVector());
9935 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9936 VecVT.isScalableVector());
9937 Vec = DAG.getBitcast(VecVT, Vec);
9938 } else {
9939 // We can't slide this mask vector down, indexed by its i1 elements.
9940 // This poses a problem when we wish to extract a scalable vector which
9941 // can't be re-expressed as a larger type. Just choose the slow path and
9942 // extend to a larger type, then truncate back down.
9943 // TODO: We could probably improve this when extracting certain fixed
9944 // from fixed, where we can extract as i8 and shift the correct element
9945 // right to reach the desired subvector?
9946 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9947 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9948 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9949 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9950 Op.getOperand(1));
9951 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9952 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9953 }
9954 }
9955
9956 // With an index of 0 this is a cast-like subvector, which can be performed
9957 // with subregister operations.
9958 if (OrigIdx == 0)
9959 return Op;
9960
9961 const auto VLen = Subtarget.getRealVLen();
9962
9963 // If the subvector vector is a fixed-length type and we don't know VLEN
9964 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9965 // don't know which register of a LMUL group contains the specific subvector
9966 // as we only know the minimum register size. Therefore we must slide the
9967 // vector group down the full amount.
9968 if (SubVecVT.isFixedLengthVector() && !VLen) {
9969 MVT ContainerVT = VecVT;
9970 if (VecVT.isFixedLengthVector()) {
9971 ContainerVT = getContainerForFixedLengthVector(VecVT);
9972 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9973 }
9974
9975 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9976 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9977 if (auto ShrunkVT =
9978 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9979 ContainerVT = *ShrunkVT;
9980 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9981 DAG.getVectorIdxConstant(0, DL));
9982 }
9983
9984 SDValue Mask =
9985 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9986 // Set the vector length to only the number of elements we care about. This
9987 // avoids sliding down elements we're going to discard straight away.
9988 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9989 Subtarget);
9990 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9991 SDValue Slidedown =
9992 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9993 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9994 // Now we can use a cast-like subvector extract to get the result.
9995 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9996 DAG.getVectorIdxConstant(0, DL));
9997 return DAG.getBitcast(Op.getValueType(), Slidedown);
9998 }
9999
10000 if (VecVT.isFixedLengthVector()) {
10001 VecVT = getContainerForFixedLengthVector(VecVT);
10002 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10003 }
10004
10005 MVT ContainerSubVecVT = SubVecVT;
10006 if (SubVecVT.isFixedLengthVector())
10007 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10008
10009 unsigned SubRegIdx;
10010 ElementCount RemIdx;
10011 // extract_subvector scales the index by vscale if the subvector is scalable,
10012 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10013 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10014 if (SubVecVT.isFixedLengthVector()) {
10015 assert(VLen);
10016 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10017 auto Decompose =
10019 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10020 SubRegIdx = Decompose.first;
10021 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10022 (OrigIdx % Vscale));
10023 } else {
10024 auto Decompose =
10026 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10027 SubRegIdx = Decompose.first;
10028 RemIdx = ElementCount::getScalable(Decompose.second);
10029 }
10030
10031 // If the Idx has been completely eliminated then this is a subvector extract
10032 // which naturally aligns to a vector register. These can easily be handled
10033 // using subregister manipulation.
10034 if (RemIdx.isZero()) {
10035 if (SubVecVT.isFixedLengthVector()) {
10036 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10037 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10038 }
10039 return Op;
10040 }
10041
10042 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10043 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10044 // divide exactly.
10045 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10046 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10047
10048 // If the vector type is an LMUL-group type, extract a subvector equal to the
10049 // nearest full vector register type.
10050 MVT InterSubVT = VecVT;
10051 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10052 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10053 // we should have successfully decomposed the extract into a subregister.
10054 assert(SubRegIdx != RISCV::NoSubRegister);
10055 InterSubVT = getLMUL1VT(VecVT);
10056 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10057 }
10058
10059 // Slide this vector register down by the desired number of elements in order
10060 // to place the desired subvector starting at element 0.
10061 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10062 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10063 if (SubVecVT.isFixedLengthVector())
10064 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10065 Subtarget);
10066 SDValue Slidedown =
10067 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10068 Vec, SlidedownAmt, Mask, VL);
10069
10070 // Now the vector is in the right position, extract our final subvector. This
10071 // should resolve to a COPY.
10072 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10073 DAG.getVectorIdxConstant(0, DL));
10074
10075 // We might have bitcast from a mask type: cast back to the original type if
10076 // required.
10077 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10078}
10079
10080// Widen a vector's operands to i8, then truncate its results back to the
10081// original type, typically i1. All operand and result types must be the same.
10083 SelectionDAG &DAG) {
10084 MVT VT = N.getSimpleValueType();
10085 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10087 for (SDValue Op : N->ops()) {
10088 assert(Op.getSimpleValueType() == VT &&
10089 "Operands and result must be same type");
10090 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10091 }
10092
10093 unsigned NumVals = N->getNumValues();
10094
10096 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10097 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10098 SmallVector<SDValue, 4> TruncVals;
10099 for (unsigned I = 0; I < NumVals; I++) {
10100 TruncVals.push_back(
10101 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10102 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10103 }
10104
10105 if (TruncVals.size() > 1)
10106 return DAG.getMergeValues(TruncVals, DL);
10107 return TruncVals.front();
10108}
10109
10110SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10111 SelectionDAG &DAG) const {
10112 SDLoc DL(Op);
10113 MVT VecVT = Op.getSimpleValueType();
10114
10115 assert(VecVT.isScalableVector() &&
10116 "vector_interleave on non-scalable vector!");
10117
10118 // 1 bit element vectors need to be widened to e8
10119 if (VecVT.getVectorElementType() == MVT::i1)
10120 return widenVectorOpsToi8(Op, DL, DAG);
10121
10122 // If the VT is LMUL=8, we need to split and reassemble.
10123 if (VecVT.getSizeInBits().getKnownMinValue() ==
10124 (8 * RISCV::RVVBitsPerBlock)) {
10125 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10126 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10127 EVT SplitVT = Op0Lo.getValueType();
10128
10130 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10132 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10133
10134 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10135 ResLo.getValue(0), ResHi.getValue(0));
10136 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10137 ResHi.getValue(1));
10138 return DAG.getMergeValues({Even, Odd}, DL);
10139 }
10140
10141 // Concatenate the two vectors as one vector to deinterleave
10142 MVT ConcatVT =
10145 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10146 Op.getOperand(0), Op.getOperand(1));
10147
10148 // We want to operate on all lanes, so get the mask and VL and mask for it
10149 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10150 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10151
10152 // We can deinterleave through vnsrl.wi if the element type is smaller than
10153 // ELEN
10154 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10155 SDValue Even =
10156 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10157 SDValue Odd =
10158 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10159 return DAG.getMergeValues({Even, Odd}, DL);
10160 }
10161
10162 // For the indices, use the same SEW to avoid an extra vsetvli
10163 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10164 // Create a vector of even indices {0, 2, 4, ...}
10165 SDValue EvenIdx =
10166 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10167 // Create a vector of odd indices {1, 3, 5, ... }
10168 SDValue OddIdx =
10169 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10170
10171 // Gather the even and odd elements into two separate vectors
10172 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10173 Concat, EvenIdx, Passthru, Mask, VL);
10174 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10175 Concat, OddIdx, Passthru, Mask, VL);
10176
10177 // Extract the result half of the gather for even and odd
10178 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10179 DAG.getVectorIdxConstant(0, DL));
10180 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10181 DAG.getVectorIdxConstant(0, DL));
10182
10183 return DAG.getMergeValues({Even, Odd}, DL);
10184}
10185
10186SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10187 SelectionDAG &DAG) const {
10188 SDLoc DL(Op);
10189 MVT VecVT = Op.getSimpleValueType();
10190
10191 assert(VecVT.isScalableVector() &&
10192 "vector_interleave on non-scalable vector!");
10193
10194 // i1 vectors need to be widened to i8
10195 if (VecVT.getVectorElementType() == MVT::i1)
10196 return widenVectorOpsToi8(Op, DL, DAG);
10197
10198 MVT XLenVT = Subtarget.getXLenVT();
10199 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10200
10201 // If the VT is LMUL=8, we need to split and reassemble.
10202 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10203 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10204 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10205 EVT SplitVT = Op0Lo.getValueType();
10206
10208 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10210 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10211
10212 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10213 ResLo.getValue(0), ResLo.getValue(1));
10214 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10215 ResHi.getValue(0), ResHi.getValue(1));
10216 return DAG.getMergeValues({Lo, Hi}, DL);
10217 }
10218
10219 SDValue Interleaved;
10220
10221 // If the element type is smaller than ELEN, then we can interleave with
10222 // vwaddu.vv and vwmaccu.vx
10223 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10224 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10225 DAG, Subtarget);
10226 } else {
10227 // Otherwise, fallback to using vrgathere16.vv
10228 MVT ConcatVT =
10231 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10232 Op.getOperand(0), Op.getOperand(1));
10233
10234 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10235
10236 // 0 1 2 3 4 5 6 7 ...
10237 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10238
10239 // 1 1 1 1 1 1 1 1 ...
10240 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10241
10242 // 1 0 1 0 1 0 1 0 ...
10243 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10244 OddMask = DAG.getSetCC(
10245 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10246 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10248
10249 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10250
10251 // Build up the index vector for interleaving the concatenated vector
10252 // 0 0 1 1 2 2 3 3 ...
10253 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10254 // 0 n 1 n+1 2 n+2 3 n+3 ...
10255 Idx =
10256 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10257
10258 // Then perform the interleave
10259 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10260 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10261 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10262 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10263 }
10264
10265 // Extract the two halves from the interleaved result
10266 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10267 DAG.getVectorIdxConstant(0, DL));
10268 SDValue Hi = DAG.getNode(
10269 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10271
10272 return DAG.getMergeValues({Lo, Hi}, DL);
10273}
10274
10275// Lower step_vector to the vid instruction. Any non-identity step value must
10276// be accounted for my manual expansion.
10277SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10278 SelectionDAG &DAG) const {
10279 SDLoc DL(Op);
10280 MVT VT = Op.getSimpleValueType();
10281 assert(VT.isScalableVector() && "Expected scalable vector");
10282 MVT XLenVT = Subtarget.getXLenVT();
10283 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10284 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10285 uint64_t StepValImm = Op.getConstantOperandVal(0);
10286 if (StepValImm != 1) {
10287 if (isPowerOf2_64(StepValImm)) {
10288 SDValue StepVal =
10289 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10290 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10291 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10292 } else {
10293 SDValue StepVal = lowerScalarSplat(
10294 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10295 VL, VT, DL, DAG, Subtarget);
10296 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10297 }
10298 }
10299 return StepVec;
10300}
10301
10302// Implement vector_reverse using vrgather.vv with indices determined by
10303// subtracting the id of each element from (VLMAX-1). This will convert
10304// the indices like so:
10305// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10306// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10307SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10308 SelectionDAG &DAG) const {
10309 SDLoc DL(Op);
10310 MVT VecVT = Op.getSimpleValueType();
10311 if (VecVT.getVectorElementType() == MVT::i1) {
10312 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10313 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10314 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10315 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10316 }
10317 unsigned EltSize = VecVT.getScalarSizeInBits();
10318 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10319 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10320 unsigned MaxVLMAX =
10321 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10322
10323 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10324 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10325
10326 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10327 // to use vrgatherei16.vv.
10328 // TODO: It's also possible to use vrgatherei16.vv for other types to
10329 // decrease register width for the index calculation.
10330 if (MaxVLMAX > 256 && EltSize == 8) {
10331 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10332 // Reverse each half, then reassemble them in reverse order.
10333 // NOTE: It's also possible that after splitting that VLMAX no longer
10334 // requires vrgatherei16.vv.
10335 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10336 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10337 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10338 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10339 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10340 // Reassemble the low and high pieces reversed.
10341 // FIXME: This is a CONCAT_VECTORS.
10342 SDValue Res =
10343 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10344 DAG.getVectorIdxConstant(0, DL));
10345 return DAG.getNode(
10346 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10347 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10348 }
10349
10350 // Just promote the int type to i16 which will double the LMUL.
10351 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10352 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10353 }
10354
10355 MVT XLenVT = Subtarget.getXLenVT();
10356 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10357
10358 // Calculate VLMAX-1 for the desired SEW.
10359 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10360 computeVLMax(VecVT, DL, DAG),
10361 DAG.getConstant(1, DL, XLenVT));
10362
10363 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10364 bool IsRV32E64 =
10365 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10366 SDValue SplatVL;
10367 if (!IsRV32E64)
10368 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10369 else
10370 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10371 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10372
10373 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10374 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10375 DAG.getUNDEF(IntVT), Mask, VL);
10376
10377 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10378 DAG.getUNDEF(VecVT), Mask, VL);
10379}
10380
10381SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10382 SelectionDAG &DAG) const {
10383 SDLoc DL(Op);
10384 SDValue V1 = Op.getOperand(0);
10385 SDValue V2 = Op.getOperand(1);
10386 MVT XLenVT = Subtarget.getXLenVT();
10387 MVT VecVT = Op.getSimpleValueType();
10388
10389 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10390
10391 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10392 SDValue DownOffset, UpOffset;
10393 if (ImmValue >= 0) {
10394 // The operand is a TargetConstant, we need to rebuild it as a regular
10395 // constant.
10396 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10397 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10398 } else {
10399 // The operand is a TargetConstant, we need to rebuild it as a regular
10400 // constant rather than negating the original operand.
10401 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10402 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10403 }
10404
10405 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10406
10407 SDValue SlideDown =
10408 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10409 DownOffset, TrueMask, UpOffset);
10410 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10411 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10413}
10414
10415SDValue
10416RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10417 SelectionDAG &DAG) const {
10418 SDLoc DL(Op);
10419 auto *Load = cast<LoadSDNode>(Op);
10420
10422 Load->getMemoryVT(),
10423 *Load->getMemOperand()) &&
10424 "Expecting a correctly-aligned load");
10425
10426 MVT VT = Op.getSimpleValueType();
10427 MVT XLenVT = Subtarget.getXLenVT();
10428 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10429
10430 // If we know the exact VLEN and our fixed length vector completely fills
10431 // the container, use a whole register load instead.
10432 const auto [MinVLMAX, MaxVLMAX] =
10433 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10434 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10435 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10436 MachineMemOperand *MMO = Load->getMemOperand();
10437 SDValue NewLoad =
10438 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10439 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10440 MMO->getAAInfo(), MMO->getRanges());
10441 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10442 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10443 }
10444
10445 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10446
10447 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10448 SDValue IntID = DAG.getTargetConstant(
10449 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10450 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10451 if (!IsMaskOp)
10452 Ops.push_back(DAG.getUNDEF(ContainerVT));
10453 Ops.push_back(Load->getBasePtr());
10454 Ops.push_back(VL);
10455 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10456 SDValue NewLoad =
10458 Load->getMemoryVT(), Load->getMemOperand());
10459
10460 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10461 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10462}
10463
10464SDValue
10465RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10466 SelectionDAG &DAG) const {
10467 SDLoc DL(Op);
10468 auto *Store = cast<StoreSDNode>(Op);
10469
10471 Store->getMemoryVT(),
10472 *Store->getMemOperand()) &&
10473 "Expecting a correctly-aligned store");
10474
10475 SDValue StoreVal = Store->getValue();
10476 MVT VT = StoreVal.getSimpleValueType();
10477 MVT XLenVT = Subtarget.getXLenVT();
10478
10479 // If the size less than a byte, we need to pad with zeros to make a byte.
10480 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10481 VT = MVT::v8i1;
10482 StoreVal =
10483 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10484 StoreVal, DAG.getVectorIdxConstant(0, DL));
10485 }
10486
10487 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10488
10489 SDValue NewValue =
10490 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10491
10492
10493 // If we know the exact VLEN and our fixed length vector completely fills
10494 // the container, use a whole register store instead.
10495 const auto [MinVLMAX, MaxVLMAX] =
10496 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10497 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10498 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10499 MachineMemOperand *MMO = Store->getMemOperand();
10500 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10501 MMO->getPointerInfo(), MMO->getBaseAlign(),
10502 MMO->getFlags(), MMO->getAAInfo());
10503 }
10504
10505 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10506 Subtarget);
10507
10508 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10509 SDValue IntID = DAG.getTargetConstant(
10510 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10511 return DAG.getMemIntrinsicNode(
10512 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10513 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10514 Store->getMemoryVT(), Store->getMemOperand());
10515}
10516
10517SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10518 SelectionDAG &DAG) const {
10519 SDLoc DL(Op);
10520 MVT VT = Op.getSimpleValueType();
10521
10522 const auto *MemSD = cast<MemSDNode>(Op);
10523 EVT MemVT = MemSD->getMemoryVT();
10524 MachineMemOperand *MMO = MemSD->getMemOperand();
10525 SDValue Chain = MemSD->getChain();
10526 SDValue BasePtr = MemSD->getBasePtr();
10527
10528 SDValue Mask, PassThru, VL;
10529 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10530 Mask = VPLoad->getMask();
10531 PassThru = DAG.getUNDEF(VT);
10532 VL = VPLoad->getVectorLength();
10533 } else {
10534 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10535 Mask = MLoad->getMask();
10536 PassThru = MLoad->getPassThru();
10537 }
10538
10539 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10540
10541 MVT XLenVT = Subtarget.getXLenVT();
10542
10543 MVT ContainerVT = VT;
10544 if (VT.isFixedLengthVector()) {
10545 ContainerVT = getContainerForFixedLengthVector(VT);
10546 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10547 if (!IsUnmasked) {
10548 MVT MaskVT = getMaskTypeFor(ContainerVT);
10549 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10550 }
10551 }
10552
10553 if (!VL)
10554 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10555
10556 unsigned IntID =
10557 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10558 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10559 if (IsUnmasked)
10560 Ops.push_back(DAG.getUNDEF(ContainerVT));
10561 else
10562 Ops.push_back(PassThru);
10563 Ops.push_back(BasePtr);
10564 if (!IsUnmasked)
10565 Ops.push_back(Mask);
10566 Ops.push_back(VL);
10567 if (!IsUnmasked)
10569
10570 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10571
10572 SDValue Result =
10573 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10574 Chain = Result.getValue(1);
10575
10576 if (VT.isFixedLengthVector())
10577 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10578
10579 return DAG.getMergeValues({Result, Chain}, DL);
10580}
10581
10582SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10583 SelectionDAG &DAG) const {
10584 SDLoc DL(Op);
10585
10586 const auto *MemSD = cast<MemSDNode>(Op);
10587 EVT MemVT = MemSD->getMemoryVT();
10588 MachineMemOperand *MMO = MemSD->getMemOperand();
10589 SDValue Chain = MemSD->getChain();
10590 SDValue BasePtr = MemSD->getBasePtr();
10591 SDValue Val, Mask, VL;
10592
10593 bool IsCompressingStore = false;
10594 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10595 Val = VPStore->getValue();
10596 Mask = VPStore->getMask();
10597 VL = VPStore->getVectorLength();
10598 } else {
10599 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10600 Val = MStore->getValue();
10601 Mask = MStore->getMask();
10602 IsCompressingStore = MStore->isCompressingStore();
10603 }
10604
10605 bool IsUnmasked =
10606 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10607
10608 MVT VT = Val.getSimpleValueType();
10609 MVT XLenVT = Subtarget.getXLenVT();
10610
10611 MVT ContainerVT = VT;
10612 if (VT.isFixedLengthVector()) {
10613 ContainerVT = getContainerForFixedLengthVector(VT);
10614
10615 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10616 if (!IsUnmasked || IsCompressingStore) {
10617 MVT MaskVT = getMaskTypeFor(ContainerVT);
10618 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10619 }
10620 }
10621
10622 if (!VL)
10623 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10624
10625 if (IsCompressingStore) {
10626 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10627 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10628 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10629 VL =
10630 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10631 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10632 }
10633
10634 unsigned IntID =
10635 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10636 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10637 Ops.push_back(Val);
10638 Ops.push_back(BasePtr);
10639 if (!IsUnmasked)
10640 Ops.push_back(Mask);
10641 Ops.push_back(VL);
10642
10644 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10645}
10646
10647SDValue
10648RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10649 SelectionDAG &DAG) const {
10650 MVT InVT = Op.getOperand(0).getSimpleValueType();
10651 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10652
10653 MVT VT = Op.getSimpleValueType();
10654
10655 SDValue Op1 =
10656 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10657 SDValue Op2 =
10658 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10659
10660 SDLoc DL(Op);
10661 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10662 DAG, Subtarget);
10663 MVT MaskVT = getMaskTypeFor(ContainerVT);
10664
10665 SDValue Cmp =
10666 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10667 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10668
10669 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10670}
10671
10672SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10673 SelectionDAG &DAG) const {
10674 unsigned Opc = Op.getOpcode();
10675 SDLoc DL(Op);
10676 SDValue Chain = Op.getOperand(0);
10677 SDValue Op1 = Op.getOperand(1);
10678 SDValue Op2 = Op.getOperand(2);
10679 SDValue CC = Op.getOperand(3);
10680 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10681 MVT VT = Op.getSimpleValueType();
10682 MVT InVT = Op1.getSimpleValueType();
10683
10684 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10685 // condition code.
10686 if (Opc == ISD::STRICT_FSETCCS) {
10687 // Expand strict_fsetccs(x, oeq) to
10688 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10689 SDVTList VTList = Op->getVTList();
10690 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10691 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10692 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10693 Op2, OLECCVal);
10694 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10695 Op1, OLECCVal);
10696 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10697 Tmp1.getValue(1), Tmp2.getValue(1));
10698 // Tmp1 and Tmp2 might be the same node.
10699 if (Tmp1 != Tmp2)
10700 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10701 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10702 }
10703
10704 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10705 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10706 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10707 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10708 Op2, OEQCCVal);
10709 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10710 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10711 }
10712 }
10713
10714 MVT ContainerInVT = InVT;
10715 if (InVT.isFixedLengthVector()) {
10716 ContainerInVT = getContainerForFixedLengthVector(InVT);
10717 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10718 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10719 }
10720 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10721
10722 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10723
10724 SDValue Res;
10725 if (Opc == ISD::STRICT_FSETCC &&
10726 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10727 CCVal == ISD::SETOLE)) {
10728 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10729 // active when both input elements are ordered.
10730 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10731 SDValue OrderMask1 = DAG.getNode(
10732 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10733 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10734 True, VL});
10735 SDValue OrderMask2 = DAG.getNode(
10736 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10737 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10738 True, VL});
10739 Mask =
10740 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10741 // Use Mask as the merge operand to let the result be 0 if either of the
10742 // inputs is unordered.
10744 DAG.getVTList(MaskVT, MVT::Other),
10745 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10746 } else {
10747 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10749 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10750 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10751 }
10752
10753 if (VT.isFixedLengthVector()) {
10754 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10755 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10756 }
10757 return Res;
10758}
10759
10760// Lower vector ABS to smax(X, sub(0, X)).
10761SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10762 SDLoc DL(Op);
10763 MVT VT = Op.getSimpleValueType();
10764 SDValue X = Op.getOperand(0);
10765
10766 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10767 "Unexpected type for ISD::ABS");
10768
10769 MVT ContainerVT = VT;
10770 if (VT.isFixedLengthVector()) {
10771 ContainerVT = getContainerForFixedLengthVector(VT);
10772 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10773 }
10774
10775 SDValue Mask, VL;
10776 if (Op->getOpcode() == ISD::VP_ABS) {
10777 Mask = Op->getOperand(1);
10778 if (VT.isFixedLengthVector())
10779 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10780 Subtarget);
10781 VL = Op->getOperand(2);
10782 } else
10783 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10784
10785 SDValue SplatZero = DAG.getNode(
10786 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10787 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10788 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10789 DAG.getUNDEF(ContainerVT), Mask, VL);
10790 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10791 DAG.getUNDEF(ContainerVT), Mask, VL);
10792
10793 if (VT.isFixedLengthVector())
10794 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10795 return Max;
10796}
10797
10798SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10799 SDValue Op, SelectionDAG &DAG) const {
10800 SDLoc DL(Op);
10801 MVT VT = Op.getSimpleValueType();
10802 SDValue Mag = Op.getOperand(0);
10803 SDValue Sign = Op.getOperand(1);
10804 assert(Mag.getValueType() == Sign.getValueType() &&
10805 "Can only handle COPYSIGN with matching types.");
10806
10807 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10808 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10809 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10810
10811 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10812
10813 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10814 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10815
10816 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10817}
10818
10819SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10820 SDValue Op, SelectionDAG &DAG) const {
10821 MVT VT = Op.getSimpleValueType();
10822 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10823
10824 MVT I1ContainerVT =
10825 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10826
10827 SDValue CC =
10828 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10829 SDValue Op1 =
10830 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10831 SDValue Op2 =
10832 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10833
10834 SDLoc DL(Op);
10835 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10836
10837 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10838 Op2, DAG.getUNDEF(ContainerVT), VL);
10839
10840 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10841}
10842
10843SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10844 SelectionDAG &DAG) const {
10845 unsigned NewOpc = getRISCVVLOp(Op);
10846 bool HasMergeOp = hasMergeOp(NewOpc);
10847 bool HasMask = hasMaskOp(NewOpc);
10848
10849 MVT VT = Op.getSimpleValueType();
10850 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10851
10852 // Create list of operands by converting existing ones to scalable types.
10854 for (const SDValue &V : Op->op_values()) {
10855 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10856
10857 // Pass through non-vector operands.
10858 if (!V.getValueType().isVector()) {
10859 Ops.push_back(V);
10860 continue;
10861 }
10862
10863 // "cast" fixed length vector to a scalable vector.
10864 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10865 "Only fixed length vectors are supported!");
10866 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10867 }
10868
10869 SDLoc DL(Op);
10870 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10871 if (HasMergeOp)
10872 Ops.push_back(DAG.getUNDEF(ContainerVT));
10873 if (HasMask)
10874 Ops.push_back(Mask);
10875 Ops.push_back(VL);
10876
10877 // StrictFP operations have two result values. Their lowered result should
10878 // have same result count.
10879 if (Op->isStrictFPOpcode()) {
10880 SDValue ScalableRes =
10881 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10882 Op->getFlags());
10883 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10884 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10885 }
10886
10887 SDValue ScalableRes =
10888 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10889 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10890}
10891
10892// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10893// * Operands of each node are assumed to be in the same order.
10894// * The EVL operand is promoted from i32 to i64 on RV64.
10895// * Fixed-length vectors are converted to their scalable-vector container
10896// types.
10897SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10898 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10899 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10900
10901 SDLoc DL(Op);
10902 MVT VT = Op.getSimpleValueType();
10904
10905 MVT ContainerVT = VT;
10906 if (VT.isFixedLengthVector())
10907 ContainerVT = getContainerForFixedLengthVector(VT);
10908
10909 for (const auto &OpIdx : enumerate(Op->ops())) {
10910 SDValue V = OpIdx.value();
10911 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10912 // Add dummy merge value before the mask. Or if there isn't a mask, before
10913 // EVL.
10914 if (HasMergeOp) {
10915 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10916 if (MaskIdx) {
10917 if (*MaskIdx == OpIdx.index())
10918 Ops.push_back(DAG.getUNDEF(ContainerVT));
10919 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10920 OpIdx.index()) {
10921 if (Op.getOpcode() == ISD::VP_MERGE) {
10922 // For VP_MERGE, copy the false operand instead of an undef value.
10923 Ops.push_back(Ops.back());
10924 } else {
10925 assert(Op.getOpcode() == ISD::VP_SELECT);
10926 // For VP_SELECT, add an undef value.
10927 Ops.push_back(DAG.getUNDEF(ContainerVT));
10928 }
10929 }
10930 }
10931 // Pass through operands which aren't fixed-length vectors.
10932 if (!V.getValueType().isFixedLengthVector()) {
10933 Ops.push_back(V);
10934 continue;
10935 }
10936 // "cast" fixed length vector to a scalable vector.
10937 MVT OpVT = V.getSimpleValueType();
10938 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10939 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10940 "Only fixed length vectors are supported!");
10941 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10942 }
10943
10944 if (!VT.isFixedLengthVector())
10945 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10946
10947 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10948
10949 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10950}
10951
10952SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10953 SelectionDAG &DAG) const {
10954 SDLoc DL(Op);
10955 MVT VT = Op.getSimpleValueType();
10956
10957 SDValue Src = Op.getOperand(0);
10958 // NOTE: Mask is dropped.
10959 SDValue VL = Op.getOperand(2);
10960
10961 MVT ContainerVT = VT;
10962 if (VT.isFixedLengthVector()) {
10963 ContainerVT = getContainerForFixedLengthVector(VT);
10964 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10965 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10966 }
10967
10968 MVT XLenVT = Subtarget.getXLenVT();
10969 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10970 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10971 DAG.getUNDEF(ContainerVT), Zero, VL);
10972
10973 SDValue SplatValue = DAG.getConstant(
10974 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10975 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10976 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10977
10978 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10979 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10980 if (!VT.isFixedLengthVector())
10981 return Result;
10982 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10983}
10984
10985SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10986 SelectionDAG &DAG) const {
10987 SDLoc DL(Op);
10988 MVT VT = Op.getSimpleValueType();
10989
10990 SDValue Op1 = Op.getOperand(0);
10991 SDValue Op2 = Op.getOperand(1);
10992 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10993 // NOTE: Mask is dropped.
10994 SDValue VL = Op.getOperand(4);
10995
10996 MVT ContainerVT = VT;
10997 if (VT.isFixedLengthVector()) {
10998 ContainerVT = getContainerForFixedLengthVector(VT);
10999 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11000 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11001 }
11002
11004 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11005
11006 switch (Condition) {
11007 default:
11008 break;
11009 // X != Y --> (X^Y)
11010 case ISD::SETNE:
11011 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11012 break;
11013 // X == Y --> ~(X^Y)
11014 case ISD::SETEQ: {
11015 SDValue Temp =
11016 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11017 Result =
11018 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11019 break;
11020 }
11021 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11022 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11023 case ISD::SETGT:
11024 case ISD::SETULT: {
11025 SDValue Temp =
11026 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11027 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11028 break;
11029 }
11030 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11031 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11032 case ISD::SETLT:
11033 case ISD::SETUGT: {
11034 SDValue Temp =
11035 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11036 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11037 break;
11038 }
11039 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11040 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11041 case ISD::SETGE:
11042 case ISD::SETULE: {
11043 SDValue Temp =
11044 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11045 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11046 break;
11047 }
11048 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11049 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11050 case ISD::SETLE:
11051 case ISD::SETUGE: {
11052 SDValue Temp =
11053 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11054 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11055 break;
11056 }
11057 }
11058
11059 if (!VT.isFixedLengthVector())
11060 return Result;
11061 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11062}
11063
11064// Lower Floating-Point/Integer Type-Convert VP SDNodes
11065SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11066 SelectionDAG &DAG) const {
11067 SDLoc DL(Op);
11068
11069 SDValue Src = Op.getOperand(0);
11070 SDValue Mask = Op.getOperand(1);
11071 SDValue VL = Op.getOperand(2);
11072 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11073
11074 MVT DstVT = Op.getSimpleValueType();
11075 MVT SrcVT = Src.getSimpleValueType();
11076 if (DstVT.isFixedLengthVector()) {
11077 DstVT = getContainerForFixedLengthVector(DstVT);
11078 SrcVT = getContainerForFixedLengthVector(SrcVT);
11079 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11080 MVT MaskVT = getMaskTypeFor(DstVT);
11081 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11082 }
11083
11084 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11085 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11086
11088 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11089 if (SrcVT.isInteger()) {
11090 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11091
11092 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11095
11096 // Do we need to do any pre-widening before converting?
11097 if (SrcEltSize == 1) {
11098 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11099 MVT XLenVT = Subtarget.getXLenVT();
11100 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11101 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11102 DAG.getUNDEF(IntVT), Zero, VL);
11103 SDValue One = DAG.getConstant(
11104 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11105 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11106 DAG.getUNDEF(IntVT), One, VL);
11107 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11108 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11109 } else if (DstEltSize > (2 * SrcEltSize)) {
11110 // Widen before converting.
11111 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11112 DstVT.getVectorElementCount());
11113 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11114 }
11115
11116 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11117 } else {
11118 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11119 "Wrong input/output vector types");
11120
11121 // Convert f16 to f32 then convert f32 to i64.
11122 if (DstEltSize > (2 * SrcEltSize)) {
11123 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11124 MVT InterimFVT =
11125 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11126 Src =
11127 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11128 }
11129
11130 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11131 }
11132 } else { // Narrowing + Conversion
11133 if (SrcVT.isInteger()) {
11134 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11135 // First do a narrowing convert to an FP type half the size, then round
11136 // the FP type to a small FP type if needed.
11137
11138 MVT InterimFVT = DstVT;
11139 if (SrcEltSize > (2 * DstEltSize)) {
11140 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11141 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11142 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11143 }
11144
11145 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11146
11147 if (InterimFVT != DstVT) {
11148 Src = Result;
11149 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11150 }
11151 } else {
11152 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11153 "Wrong input/output vector types");
11154 // First do a narrowing conversion to an integer half the size, then
11155 // truncate if needed.
11156
11157 if (DstEltSize == 1) {
11158 // First convert to the same size integer, then convert to mask using
11159 // setcc.
11160 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11161 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11162 DstVT.getVectorElementCount());
11163 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11164
11165 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11166 // otherwise the conversion was undefined.
11167 MVT XLenVT = Subtarget.getXLenVT();
11168 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11169 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11170 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11171 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11172 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11173 DAG.getUNDEF(DstVT), Mask, VL});
11174 } else {
11175 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11176 DstVT.getVectorElementCount());
11177
11178 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11179
11180 while (InterimIVT != DstVT) {
11181 SrcEltSize /= 2;
11182 Src = Result;
11183 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11184 DstVT.getVectorElementCount());
11185 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11186 Src, Mask, VL);
11187 }
11188 }
11189 }
11190 }
11191
11192 MVT VT = Op.getSimpleValueType();
11193 if (!VT.isFixedLengthVector())
11194 return Result;
11195 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11196}
11197
11198SDValue
11199RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11200 SelectionDAG &DAG) const {
11201 SDLoc DL(Op);
11202
11203 SDValue Op1 = Op.getOperand(0);
11204 SDValue Op2 = Op.getOperand(1);
11205 SDValue Offset = Op.getOperand(2);
11206 SDValue Mask = Op.getOperand(3);
11207 SDValue EVL1 = Op.getOperand(4);
11208 SDValue EVL2 = Op.getOperand(5);
11209
11210 const MVT XLenVT = Subtarget.getXLenVT();
11211 MVT VT = Op.getSimpleValueType();
11212 MVT ContainerVT = VT;
11213 if (VT.isFixedLengthVector()) {
11214 ContainerVT = getContainerForFixedLengthVector(VT);
11215 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11216 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11217 MVT MaskVT = getMaskTypeFor(ContainerVT);
11218 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11219 }
11220
11221 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11222 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11223
11224 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11225 if (IsMaskVector) {
11226 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11227
11228 // Expand input operands
11229 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11230 DAG.getUNDEF(ContainerVT),
11231 DAG.getConstant(1, DL, XLenVT), EVL1);
11232 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11233 DAG.getUNDEF(ContainerVT),
11234 DAG.getConstant(0, DL, XLenVT), EVL1);
11235 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11236 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11237
11238 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11239 DAG.getUNDEF(ContainerVT),
11240 DAG.getConstant(1, DL, XLenVT), EVL2);
11241 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11242 DAG.getUNDEF(ContainerVT),
11243 DAG.getConstant(0, DL, XLenVT), EVL2);
11244 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11245 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11246 }
11247
11248 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11249 SDValue DownOffset, UpOffset;
11250 if (ImmValue >= 0) {
11251 // The operand is a TargetConstant, we need to rebuild it as a regular
11252 // constant.
11253 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11254 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11255 } else {
11256 // The operand is a TargetConstant, we need to rebuild it as a regular
11257 // constant rather than negating the original operand.
11258 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11259 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11260 }
11261
11262 SDValue SlideDown =
11263 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11264 Op1, DownOffset, Mask, UpOffset);
11265 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11266 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11267
11268 if (IsMaskVector) {
11269 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11270 Result = DAG.getNode(
11271 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11272 {Result, DAG.getConstant(0, DL, ContainerVT),
11273 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11274 Mask, EVL2});
11275 }
11276
11277 if (!VT.isFixedLengthVector())
11278 return Result;
11279 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11280}
11281
11282SDValue
11283RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11284 SelectionDAG &DAG) const {
11285 SDLoc DL(Op);
11286 MVT VT = Op.getSimpleValueType();
11287 MVT XLenVT = Subtarget.getXLenVT();
11288
11289 SDValue Op1 = Op.getOperand(0);
11290 SDValue Mask = Op.getOperand(1);
11291 SDValue EVL = Op.getOperand(2);
11292
11293 MVT ContainerVT = VT;
11294 if (VT.isFixedLengthVector()) {
11295 ContainerVT = getContainerForFixedLengthVector(VT);
11296 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11297 MVT MaskVT = getMaskTypeFor(ContainerVT);
11298 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11299 }
11300
11301 MVT GatherVT = ContainerVT;
11302 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11303 // Check if we are working with mask vectors
11304 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11305 if (IsMaskVector) {
11306 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11307
11308 // Expand input operand
11309 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11310 DAG.getUNDEF(IndicesVT),
11311 DAG.getConstant(1, DL, XLenVT), EVL);
11312 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11313 DAG.getUNDEF(IndicesVT),
11314 DAG.getConstant(0, DL, XLenVT), EVL);
11315 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11316 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11317 }
11318
11319 unsigned EltSize = GatherVT.getScalarSizeInBits();
11320 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11321 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11322 unsigned MaxVLMAX =
11323 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11324
11325 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11326 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11327 // to use vrgatherei16.vv.
11328 // TODO: It's also possible to use vrgatherei16.vv for other types to
11329 // decrease register width for the index calculation.
11330 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11331 if (MaxVLMAX > 256 && EltSize == 8) {
11332 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11333 // Split the vector in half and reverse each half using a full register
11334 // reverse.
11335 // Swap the halves and concatenate them.
11336 // Slide the concatenated result by (VLMax - VL).
11337 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11338 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11339 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11340
11341 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11342 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11343
11344 // Reassemble the low and high pieces reversed.
11345 // NOTE: this Result is unmasked (because we do not need masks for
11346 // shuffles). If in the future this has to change, we can use a SELECT_VL
11347 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11348 SDValue Result =
11349 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11350
11351 // Slide off any elements from past EVL that were reversed into the low
11352 // elements.
11353 unsigned MinElts = GatherVT.getVectorMinNumElements();
11354 SDValue VLMax =
11355 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11356 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11357
11358 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11359 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11360
11361 if (IsMaskVector) {
11362 // Truncate Result back to a mask vector
11363 Result =
11364 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11365 {Result, DAG.getConstant(0, DL, GatherVT),
11367 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11368 }
11369
11370 if (!VT.isFixedLengthVector())
11371 return Result;
11372 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11373 }
11374
11375 // Just promote the int type to i16 which will double the LMUL.
11376 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11377 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11378 }
11379
11380 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11381 SDValue VecLen =
11382 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11383 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11384 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11385 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11386 DAG.getUNDEF(IndicesVT), Mask, EVL);
11387 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11388 DAG.getUNDEF(GatherVT), Mask, EVL);
11389
11390 if (IsMaskVector) {
11391 // Truncate Result back to a mask vector
11392 Result = DAG.getNode(
11393 RISCVISD::SETCC_VL, DL, ContainerVT,
11394 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11395 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11396 }
11397
11398 if (!VT.isFixedLengthVector())
11399 return Result;
11400 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11401}
11402
11403SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 MVT VT = Op.getSimpleValueType();
11406 if (VT.getVectorElementType() != MVT::i1)
11407 return lowerVPOp(Op, DAG);
11408
11409 // It is safe to drop mask parameter as masked-off elements are undef.
11410 SDValue Op1 = Op->getOperand(0);
11411 SDValue Op2 = Op->getOperand(1);
11412 SDValue VL = Op->getOperand(3);
11413
11414 MVT ContainerVT = VT;
11415 const bool IsFixed = VT.isFixedLengthVector();
11416 if (IsFixed) {
11417 ContainerVT = getContainerForFixedLengthVector(VT);
11418 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11419 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11420 }
11421
11422 SDLoc DL(Op);
11423 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11424 if (!IsFixed)
11425 return Val;
11426 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11427}
11428
11429SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc DL(Op);
11432 MVT XLenVT = Subtarget.getXLenVT();
11433 MVT VT = Op.getSimpleValueType();
11434 MVT ContainerVT = VT;
11435 if (VT.isFixedLengthVector())
11436 ContainerVT = getContainerForFixedLengthVector(VT);
11437
11438 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11439
11440 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11441 // Check if the mask is known to be all ones
11442 SDValue Mask = VPNode->getMask();
11443 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11444
11445 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11446 : Intrinsic::riscv_vlse_mask,
11447 DL, XLenVT);
11448 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11449 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11450 VPNode->getStride()};
11451 if (!IsUnmasked) {
11452 if (VT.isFixedLengthVector()) {
11453 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11454 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11455 }
11456 Ops.push_back(Mask);
11457 }
11458 Ops.push_back(VPNode->getVectorLength());
11459 if (!IsUnmasked) {
11460 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11461 Ops.push_back(Policy);
11462 }
11463
11464 SDValue Result =
11466 VPNode->getMemoryVT(), VPNode->getMemOperand());
11467 SDValue Chain = Result.getValue(1);
11468
11469 if (VT.isFixedLengthVector())
11470 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11471
11472 return DAG.getMergeValues({Result, Chain}, DL);
11473}
11474
11475SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 SDLoc DL(Op);
11478 MVT XLenVT = Subtarget.getXLenVT();
11479
11480 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11481 SDValue StoreVal = VPNode->getValue();
11482 MVT VT = StoreVal.getSimpleValueType();
11483 MVT ContainerVT = VT;
11484 if (VT.isFixedLengthVector()) {
11485 ContainerVT = getContainerForFixedLengthVector(VT);
11486 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11487 }
11488
11489 // Check if the mask is known to be all ones
11490 SDValue Mask = VPNode->getMask();
11491 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11492
11493 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11494 : Intrinsic::riscv_vsse_mask,
11495 DL, XLenVT);
11496 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11497 VPNode->getBasePtr(), VPNode->getStride()};
11498 if (!IsUnmasked) {
11499 if (VT.isFixedLengthVector()) {
11500 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11501 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11502 }
11503 Ops.push_back(Mask);
11504 }
11505 Ops.push_back(VPNode->getVectorLength());
11506
11507 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11508 Ops, VPNode->getMemoryVT(),
11509 VPNode->getMemOperand());
11510}
11511
11512// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11513// matched to a RVV indexed load. The RVV indexed load instructions only
11514// support the "unsigned unscaled" addressing mode; indices are implicitly
11515// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11516// signed or scaled indexing is extended to the XLEN value type and scaled
11517// accordingly.
11518SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11519 SelectionDAG &DAG) const {
11520 SDLoc DL(Op);
11521 MVT VT = Op.getSimpleValueType();
11522
11523 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11524 EVT MemVT = MemSD->getMemoryVT();
11525 MachineMemOperand *MMO = MemSD->getMemOperand();
11526 SDValue Chain = MemSD->getChain();
11527 SDValue BasePtr = MemSD->getBasePtr();
11528
11529 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11530 SDValue Index, Mask, PassThru, VL;
11531
11532 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11533 Index = VPGN->getIndex();
11534 Mask = VPGN->getMask();
11535 PassThru = DAG.getUNDEF(VT);
11536 VL = VPGN->getVectorLength();
11537 // VP doesn't support extending loads.
11539 } else {
11540 // Else it must be a MGATHER.
11541 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11542 Index = MGN->getIndex();
11543 Mask = MGN->getMask();
11544 PassThru = MGN->getPassThru();
11545 LoadExtType = MGN->getExtensionType();
11546 }
11547
11548 MVT IndexVT = Index.getSimpleValueType();
11549 MVT XLenVT = Subtarget.getXLenVT();
11550
11552 "Unexpected VTs!");
11553 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11554 // Targets have to explicitly opt-in for extending vector loads.
11555 assert(LoadExtType == ISD::NON_EXTLOAD &&
11556 "Unexpected extending MGATHER/VP_GATHER");
11557
11558 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11559 // the selection of the masked intrinsics doesn't do this for us.
11560 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11561
11562 MVT ContainerVT = VT;
11563 if (VT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VT);
11565 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11566 ContainerVT.getVectorElementCount());
11567
11568 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11569
11570 if (!IsUnmasked) {
11571 MVT MaskVT = getMaskTypeFor(ContainerVT);
11572 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11573 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11574 }
11575 }
11576
11577 if (!VL)
11578 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11579
11580 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11581 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11582 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11583 }
11584
11585 unsigned IntID =
11586 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11587 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11588 if (IsUnmasked)
11589 Ops.push_back(DAG.getUNDEF(ContainerVT));
11590 else
11591 Ops.push_back(PassThru);
11592 Ops.push_back(BasePtr);
11593 Ops.push_back(Index);
11594 if (!IsUnmasked)
11595 Ops.push_back(Mask);
11596 Ops.push_back(VL);
11597 if (!IsUnmasked)
11599
11600 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11601 SDValue Result =
11602 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11603 Chain = Result.getValue(1);
11604
11605 if (VT.isFixedLengthVector())
11606 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11607
11608 return DAG.getMergeValues({Result, Chain}, DL);
11609}
11610
11611// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11612// matched to a RVV indexed store. The RVV indexed store instructions only
11613// support the "unsigned unscaled" addressing mode; indices are implicitly
11614// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615// signed or scaled indexing is extended to the XLEN value type and scaled
11616// accordingly.
11617SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11621 EVT MemVT = MemSD->getMemoryVT();
11622 MachineMemOperand *MMO = MemSD->getMemOperand();
11623 SDValue Chain = MemSD->getChain();
11624 SDValue BasePtr = MemSD->getBasePtr();
11625
11626 [[maybe_unused]] bool IsTruncatingStore = false;
11627 SDValue Index, Mask, Val, VL;
11628
11629 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11630 Index = VPSN->getIndex();
11631 Mask = VPSN->getMask();
11632 Val = VPSN->getValue();
11633 VL = VPSN->getVectorLength();
11634 // VP doesn't support truncating stores.
11635 IsTruncatingStore = false;
11636 } else {
11637 // Else it must be a MSCATTER.
11638 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11639 Index = MSN->getIndex();
11640 Mask = MSN->getMask();
11641 Val = MSN->getValue();
11642 IsTruncatingStore = MSN->isTruncatingStore();
11643 }
11644
11645 MVT VT = Val.getSimpleValueType();
11646 MVT IndexVT = Index.getSimpleValueType();
11647 MVT XLenVT = Subtarget.getXLenVT();
11648
11650 "Unexpected VTs!");
11651 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11652 // Targets have to explicitly opt-in for extending vector loads and
11653 // truncating vector stores.
11654 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11655
11656 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11657 // the selection of the masked intrinsics doesn't do this for us.
11658 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11659
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector()) {
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11664 ContainerVT.getVectorElementCount());
11665
11666 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11667 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11668
11669 if (!IsUnmasked) {
11670 MVT MaskVT = getMaskTypeFor(ContainerVT);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 }
11673 }
11674
11675 if (!VL)
11676 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11677
11678 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11679 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11680 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11681 }
11682
11683 unsigned IntID =
11684 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11685 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11686 Ops.push_back(Val);
11687 Ops.push_back(BasePtr);
11688 Ops.push_back(Index);
11689 if (!IsUnmasked)
11690 Ops.push_back(Mask);
11691 Ops.push_back(VL);
11692
11694 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11695}
11696
11697SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11698 SelectionDAG &DAG) const {
11699 const MVT XLenVT = Subtarget.getXLenVT();
11700 SDLoc DL(Op);
11701 SDValue Chain = Op->getOperand(0);
11702 SDValue SysRegNo = DAG.getTargetConstant(
11703 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11704 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11705 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11706
11707 // Encoding used for rounding mode in RISC-V differs from that used in
11708 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11709 // table, which consists of a sequence of 4-bit fields, each representing
11710 // corresponding FLT_ROUNDS mode.
11711 static const int Table =
11717
11718 SDValue Shift =
11719 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11720 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11721 DAG.getConstant(Table, DL, XLenVT), Shift);
11722 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11723 DAG.getConstant(7, DL, XLenVT));
11724
11725 return DAG.getMergeValues({Masked, Chain}, DL);
11726}
11727
11728SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11729 SelectionDAG &DAG) const {
11730 const MVT XLenVT = Subtarget.getXLenVT();
11731 SDLoc DL(Op);
11732 SDValue Chain = Op->getOperand(0);
11733 SDValue RMValue = Op->getOperand(1);
11734 SDValue SysRegNo = DAG.getTargetConstant(
11735 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11736
11737 // Encoding used for rounding mode in RISC-V differs from that used in
11738 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11739 // a table, which consists of a sequence of 4-bit fields, each representing
11740 // corresponding RISC-V mode.
11741 static const unsigned Table =
11747
11748 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11749
11750 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11751 DAG.getConstant(2, DL, XLenVT));
11752 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11753 DAG.getConstant(Table, DL, XLenVT), Shift);
11754 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11755 DAG.getConstant(0x7, DL, XLenVT));
11756 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11757 RMValue);
11758}
11759
11760SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11761 SelectionDAG &DAG) const {
11763
11764 bool isRISCV64 = Subtarget.is64Bit();
11765 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11766
11767 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11768 return DAG.getFrameIndex(FI, PtrVT);
11769}
11770
11771// Returns the opcode of the target-specific SDNode that implements the 32-bit
11772// form of the given Opcode.
11773static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11774 switch (Opcode) {
11775 default:
11776 llvm_unreachable("Unexpected opcode");
11777 case ISD::SHL:
11778 return RISCVISD::SLLW;
11779 case ISD::SRA:
11780 return RISCVISD::SRAW;
11781 case ISD::SRL:
11782 return RISCVISD::SRLW;
11783 case ISD::SDIV:
11784 return RISCVISD::DIVW;
11785 case ISD::UDIV:
11786 return RISCVISD::DIVUW;
11787 case ISD::UREM:
11788 return RISCVISD::REMUW;
11789 case ISD::ROTL:
11790 return RISCVISD::ROLW;
11791 case ISD::ROTR:
11792 return RISCVISD::RORW;
11793 }
11794}
11795
11796// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11797// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11798// otherwise be promoted to i64, making it difficult to select the
11799// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11800// type i8/i16/i32 is lost.
11802 unsigned ExtOpc = ISD::ANY_EXTEND) {
11803 SDLoc DL(N);
11804 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11805 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11806 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11807 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11808 // ReplaceNodeResults requires we maintain the same type for the return value.
11809 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11810}
11811
11812// Converts the given 32-bit operation to a i64 operation with signed extension
11813// semantic to reduce the signed extension instructions.
11815 SDLoc DL(N);
11816 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11817 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11818 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11819 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11820 DAG.getValueType(MVT::i32));
11821 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11822}
11823
11826 SelectionDAG &DAG) const {
11827 SDLoc DL(N);
11828 switch (N->getOpcode()) {
11829 default:
11830 llvm_unreachable("Don't know how to custom type legalize this operation!");
11833 case ISD::FP_TO_SINT:
11834 case ISD::FP_TO_UINT: {
11835 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11836 "Unexpected custom legalisation");
11837 bool IsStrict = N->isStrictFPOpcode();
11838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11839 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11840 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11841 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11843 if (!isTypeLegal(Op0.getValueType()))
11844 return;
11845 if (IsStrict) {
11846 SDValue Chain = N->getOperand(0);
11847 // In absense of Zfh, promote f16 to f32, then convert.
11848 if (Op0.getValueType() == MVT::f16 &&
11849 !Subtarget.hasStdExtZfhOrZhinx()) {
11850 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11851 {Chain, Op0});
11852 Chain = Op0.getValue(1);
11853 }
11854 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11856 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11857 SDValue Res = DAG.getNode(
11858 Opc, DL, VTs, Chain, Op0,
11859 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11860 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11861 Results.push_back(Res.getValue(1));
11862 return;
11863 }
11864 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11865 // convert.
11866 if ((Op0.getValueType() == MVT::f16 &&
11867 !Subtarget.hasStdExtZfhOrZhinx()) ||
11868 Op0.getValueType() == MVT::bf16)
11869 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11872 SDValue Res =
11873 DAG.getNode(Opc, DL, MVT::i64, Op0,
11874 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11875 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11876 return;
11877 }
11878 // If the FP type needs to be softened, emit a library call using the 'si'
11879 // version. If we left it to default legalization we'd end up with 'di'. If
11880 // the FP type doesn't need to be softened just let generic type
11881 // legalization promote the result type.
11882 RTLIB::Libcall LC;
11883 if (IsSigned)
11884 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11885 else
11886 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11887 MakeLibCallOptions CallOptions;
11888 EVT OpVT = Op0.getValueType();
11889 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11890 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11891 SDValue Result;
11892 std::tie(Result, Chain) =
11893 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11894 Results.push_back(Result);
11895 if (IsStrict)
11896 Results.push_back(Chain);
11897 break;
11898 }
11899 case ISD::LROUND: {
11900 SDValue Op0 = N->getOperand(0);
11901 EVT Op0VT = Op0.getValueType();
11902 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11904 if (!isTypeLegal(Op0VT))
11905 return;
11906
11907 // In absense of Zfh, promote f16 to f32, then convert.
11908 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11909 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11910
11911 SDValue Res =
11912 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11913 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11915 return;
11916 }
11917 // If the FP type needs to be softened, emit a library call to lround. We'll
11918 // need to truncate the result. We assume any value that doesn't fit in i32
11919 // is allowed to return an unspecified value.
11920 RTLIB::Libcall LC =
11921 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11922 MakeLibCallOptions CallOptions;
11923 EVT OpVT = Op0.getValueType();
11924 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11925 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11926 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11927 Results.push_back(Result);
11928 break;
11929 }
11932 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11933 "has custom type legalization on riscv32");
11934
11935 SDValue LoCounter, HiCounter;
11936 MVT XLenVT = Subtarget.getXLenVT();
11937 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11938 LoCounter = DAG.getTargetConstant(
11939 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11940 HiCounter = DAG.getTargetConstant(
11941 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11942 } else {
11943 LoCounter = DAG.getTargetConstant(
11944 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11945 HiCounter = DAG.getTargetConstant(
11946 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11947 }
11948 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11950 N->getOperand(0), LoCounter, HiCounter);
11951
11952 Results.push_back(
11953 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11954 Results.push_back(RCW.getValue(2));
11955 break;
11956 }
11957 case ISD::LOAD: {
11958 if (!ISD::isNON_EXTLoad(N))
11959 return;
11960
11961 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11962 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11963 LoadSDNode *Ld = cast<LoadSDNode>(N);
11964
11965 SDLoc dl(N);
11966 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11967 Ld->getBasePtr(), Ld->getMemoryVT(),
11968 Ld->getMemOperand());
11969 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11970 Results.push_back(Res.getValue(1));
11971 return;
11972 }
11973 case ISD::MUL: {
11974 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11975 unsigned XLen = Subtarget.getXLen();
11976 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11977 if (Size > XLen) {
11978 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11979 SDValue LHS = N->getOperand(0);
11980 SDValue RHS = N->getOperand(1);
11981 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11982
11983 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11984 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11985 // We need exactly one side to be unsigned.
11986 if (LHSIsU == RHSIsU)
11987 return;
11988
11989 auto MakeMULPair = [&](SDValue S, SDValue U) {
11990 MVT XLenVT = Subtarget.getXLenVT();
11991 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11992 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11993 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11994 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11995 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11996 };
11997
11998 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11999 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12000
12001 // The other operand should be signed, but still prefer MULH when
12002 // possible.
12003 if (RHSIsU && LHSIsS && !RHSIsS)
12004 Results.push_back(MakeMULPair(LHS, RHS));
12005 else if (LHSIsU && RHSIsS && !LHSIsS)
12006 Results.push_back(MakeMULPair(RHS, LHS));
12007
12008 return;
12009 }
12010 [[fallthrough]];
12011 }
12012 case ISD::ADD:
12013 case ISD::SUB:
12014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12015 "Unexpected custom legalisation");
12016 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12017 break;
12018 case ISD::SHL:
12019 case ISD::SRA:
12020 case ISD::SRL:
12021 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12022 "Unexpected custom legalisation");
12023 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12024 // If we can use a BSET instruction, allow default promotion to apply.
12025 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12026 isOneConstant(N->getOperand(0)))
12027 break;
12028 Results.push_back(customLegalizeToWOp(N, DAG));
12029 break;
12030 }
12031
12032 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12033 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12034 // shift amount.
12035 if (N->getOpcode() == ISD::SHL) {
12036 SDLoc DL(N);
12037 SDValue NewOp0 =
12038 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12039 SDValue NewOp1 =
12040 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12041 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12042 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12043 DAG.getValueType(MVT::i32));
12044 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12045 }
12046
12047 break;
12048 case ISD::ROTL:
12049 case ISD::ROTR:
12050 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12051 "Unexpected custom legalisation");
12052 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12053 Subtarget.hasVendorXTHeadBb()) &&
12054 "Unexpected custom legalization");
12055 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12056 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12057 return;
12058 Results.push_back(customLegalizeToWOp(N, DAG));
12059 break;
12060 case ISD::CTTZ:
12062 case ISD::CTLZ:
12063 case ISD::CTLZ_ZERO_UNDEF: {
12064 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12065 "Unexpected custom legalisation");
12066
12067 SDValue NewOp0 =
12068 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12069 bool IsCTZ =
12070 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12071 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12072 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12073 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12074 return;
12075 }
12076 case ISD::SDIV:
12077 case ISD::UDIV:
12078 case ISD::UREM: {
12079 MVT VT = N->getSimpleValueType(0);
12080 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12081 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12082 "Unexpected custom legalisation");
12083 // Don't promote division/remainder by constant since we should expand those
12084 // to multiply by magic constant.
12086 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12087 !isIntDivCheap(N->getValueType(0), Attr))
12088 return;
12089
12090 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12091 // the upper 32 bits. For other types we need to sign or zero extend
12092 // based on the opcode.
12093 unsigned ExtOpc = ISD::ANY_EXTEND;
12094 if (VT != MVT::i32)
12095 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12097
12098 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12099 break;
12100 }
12101 case ISD::SADDO: {
12102 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12103 "Unexpected custom legalisation");
12104
12105 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12106 // use the default legalization.
12107 if (!isa<ConstantSDNode>(N->getOperand(1)))
12108 return;
12109
12110 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12111 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12112 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12113 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12114 DAG.getValueType(MVT::i32));
12115
12116 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12117
12118 // For an addition, the result should be less than one of the operands (LHS)
12119 // if and only if the other operand (RHS) is negative, otherwise there will
12120 // be overflow.
12121 // For a subtraction, the result should be less than one of the operands
12122 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12123 // otherwise there will be overflow.
12124 EVT OType = N->getValueType(1);
12125 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12126 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12127
12128 SDValue Overflow =
12129 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12130 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12131 Results.push_back(Overflow);
12132 return;
12133 }
12134 case ISD::UADDO:
12135 case ISD::USUBO: {
12136 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12137 "Unexpected custom legalisation");
12138 bool IsAdd = N->getOpcode() == ISD::UADDO;
12139 // Create an ADDW or SUBW.
12140 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12141 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12142 SDValue Res =
12143 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12144 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12145 DAG.getValueType(MVT::i32));
12146
12147 SDValue Overflow;
12148 if (IsAdd && isOneConstant(RHS)) {
12149 // Special case uaddo X, 1 overflowed if the addition result is 0.
12150 // The general case (X + C) < C is not necessarily beneficial. Although we
12151 // reduce the live range of X, we may introduce the materialization of
12152 // constant C, especially when the setcc result is used by branch. We have
12153 // no compare with constant and branch instructions.
12154 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12155 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12156 } else if (IsAdd && isAllOnesConstant(RHS)) {
12157 // Special case uaddo X, -1 overflowed if X != 0.
12158 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12159 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12160 } else {
12161 // Sign extend the LHS and perform an unsigned compare with the ADDW
12162 // result. Since the inputs are sign extended from i32, this is equivalent
12163 // to comparing the lower 32 bits.
12164 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12165 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12166 IsAdd ? ISD::SETULT : ISD::SETUGT);
12167 }
12168
12169 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12170 Results.push_back(Overflow);
12171 return;
12172 }
12173 case ISD::UADDSAT:
12174 case ISD::USUBSAT: {
12175 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12176 "Unexpected custom legalisation");
12177 if (Subtarget.hasStdExtZbb()) {
12178 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12179 // sign extend allows overflow of the lower 32 bits to be detected on
12180 // the promoted size.
12181 SDValue LHS =
12182 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12183 SDValue RHS =
12184 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12185 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12186 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12187 return;
12188 }
12189
12190 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12191 // promotion for UADDO/USUBO.
12192 Results.push_back(expandAddSubSat(N, DAG));
12193 return;
12194 }
12195 case ISD::SADDSAT:
12196 case ISD::SSUBSAT: {
12197 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12198 "Unexpected custom legalisation");
12199 Results.push_back(expandAddSubSat(N, DAG));
12200 return;
12201 }
12202 case ISD::ABS: {
12203 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12204 "Unexpected custom legalisation");
12205
12206 if (Subtarget.hasStdExtZbb()) {
12207 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12208 // This allows us to remember that the result is sign extended. Expanding
12209 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12210 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12211 N->getOperand(0));
12212 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12213 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12214 return;
12215 }
12216
12217 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12218 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12219
12220 // Freeze the source so we can increase it's use count.
12221 Src = DAG.getFreeze(Src);
12222
12223 // Copy sign bit to all bits using the sraiw pattern.
12224 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12225 DAG.getValueType(MVT::i32));
12226 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12227 DAG.getConstant(31, DL, MVT::i64));
12228
12229 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12230 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12231
12232 // NOTE: The result is only required to be anyextended, but sext is
12233 // consistent with type legalization of sub.
12234 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12235 DAG.getValueType(MVT::i32));
12236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12237 return;
12238 }
12239 case ISD::BITCAST: {
12240 EVT VT = N->getValueType(0);
12241 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12242 SDValue Op0 = N->getOperand(0);
12243 EVT Op0VT = Op0.getValueType();
12244 MVT XLenVT = Subtarget.getXLenVT();
12245 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12246 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12247 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12248 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12249 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12250 Subtarget.hasStdExtZfbfmin()) {
12251 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12252 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12253 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12254 Subtarget.hasStdExtFOrZfinx()) {
12255 SDValue FPConv =
12256 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12257 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12258 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12259 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12260 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12261 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12262 NewReg.getValue(0), NewReg.getValue(1));
12263 Results.push_back(RetReg);
12264 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12265 isTypeLegal(Op0VT)) {
12266 // Custom-legalize bitcasts from fixed-length vector types to illegal
12267 // scalar types in order to improve codegen. Bitcast the vector to a
12268 // one-element vector type whose element type is the same as the result
12269 // type, and extract the first element.
12270 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12271 if (isTypeLegal(BVT)) {
12272 SDValue BVec = DAG.getBitcast(BVT, Op0);
12273 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12274 DAG.getVectorIdxConstant(0, DL)));
12275 }
12276 }
12277 break;
12278 }
12279 case RISCVISD::BREV8: {
12280 MVT VT = N->getSimpleValueType(0);
12281 MVT XLenVT = Subtarget.getXLenVT();
12282 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12283 "Unexpected custom legalisation");
12284 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12285 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12286 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12287 // ReplaceNodeResults requires we maintain the same type for the return
12288 // value.
12289 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12290 break;
12291 }
12293 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12294 // type is illegal (currently only vXi64 RV32).
12295 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12296 // transferred to the destination register. We issue two of these from the
12297 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12298 // first element.
12299 SDValue Vec = N->getOperand(0);
12300 SDValue Idx = N->getOperand(1);
12301
12302 // The vector type hasn't been legalized yet so we can't issue target
12303 // specific nodes if it needs legalization.
12304 // FIXME: We would manually legalize if it's important.
12305 if (!isTypeLegal(Vec.getValueType()))
12306 return;
12307
12308 MVT VecVT = Vec.getSimpleValueType();
12309
12310 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12311 VecVT.getVectorElementType() == MVT::i64 &&
12312 "Unexpected EXTRACT_VECTOR_ELT legalization");
12313
12314 // If this is a fixed vector, we need to convert it to a scalable vector.
12315 MVT ContainerVT = VecVT;
12316 if (VecVT.isFixedLengthVector()) {
12317 ContainerVT = getContainerForFixedLengthVector(VecVT);
12318 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12319 }
12320
12321 MVT XLenVT = Subtarget.getXLenVT();
12322
12323 // Use a VL of 1 to avoid processing more elements than we need.
12324 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12325
12326 // Unless the index is known to be 0, we must slide the vector down to get
12327 // the desired element into index 0.
12328 if (!isNullConstant(Idx)) {
12329 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12330 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12331 }
12332
12333 // Extract the lower XLEN bits of the correct vector element.
12334 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12335
12336 // To extract the upper XLEN bits of the vector element, shift the first
12337 // element right by 32 bits and re-extract the lower XLEN bits.
12338 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12339 DAG.getUNDEF(ContainerVT),
12340 DAG.getConstant(32, DL, XLenVT), VL);
12341 SDValue LShr32 =
12342 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12343 DAG.getUNDEF(ContainerVT), Mask, VL);
12344
12345 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12346
12347 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12348 break;
12349 }
12351 unsigned IntNo = N->getConstantOperandVal(0);
12352 switch (IntNo) {
12353 default:
12355 "Don't know how to custom type legalize this intrinsic!");
12356 case Intrinsic::experimental_get_vector_length: {
12357 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12358 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12359 return;
12360 }
12361 case Intrinsic::experimental_cttz_elts: {
12362 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12363 Results.push_back(
12364 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12365 return;
12366 }
12367 case Intrinsic::riscv_orc_b:
12368 case Intrinsic::riscv_brev8:
12369 case Intrinsic::riscv_sha256sig0:
12370 case Intrinsic::riscv_sha256sig1:
12371 case Intrinsic::riscv_sha256sum0:
12372 case Intrinsic::riscv_sha256sum1:
12373 case Intrinsic::riscv_sm3p0:
12374 case Intrinsic::riscv_sm3p1: {
12375 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12376 return;
12377 unsigned Opc;
12378 switch (IntNo) {
12379 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12380 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12381 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12382 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12383 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12384 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12385 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12386 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12387 }
12388
12389 SDValue NewOp =
12390 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12391 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12392 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12393 return;
12394 }
12395 case Intrinsic::riscv_sm4ks:
12396 case Intrinsic::riscv_sm4ed: {
12397 unsigned Opc =
12398 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12399 SDValue NewOp0 =
12400 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12401 SDValue NewOp1 =
12402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12403 SDValue Res =
12404 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12405 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12406 return;
12407 }
12408 case Intrinsic::riscv_mopr: {
12409 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12410 return;
12411 SDValue NewOp =
12412 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12413 SDValue Res = DAG.getNode(
12414 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12415 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12416 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12417 return;
12418 }
12419 case Intrinsic::riscv_moprr: {
12420 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12421 return;
12422 SDValue NewOp0 =
12423 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12424 SDValue NewOp1 =
12425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12426 SDValue Res = DAG.getNode(
12427 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12428 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12429 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12430 return;
12431 }
12432 case Intrinsic::riscv_clmul: {
12433 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12434 return;
12435
12436 SDValue NewOp0 =
12437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12438 SDValue NewOp1 =
12439 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12440 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12441 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12442 return;
12443 }
12444 case Intrinsic::riscv_clmulh:
12445 case Intrinsic::riscv_clmulr: {
12446 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12447 return;
12448
12449 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12450 // to the full 128-bit clmul result of multiplying two xlen values.
12451 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12452 // upper 32 bits.
12453 //
12454 // The alternative is to mask the inputs to 32 bits and use clmul, but
12455 // that requires two shifts to mask each input without zext.w.
12456 // FIXME: If the inputs are known zero extended or could be freely
12457 // zero extended, the mask form would be better.
12458 SDValue NewOp0 =
12459 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12460 SDValue NewOp1 =
12461 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12462 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12463 DAG.getConstant(32, DL, MVT::i64));
12464 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12465 DAG.getConstant(32, DL, MVT::i64));
12466 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12468 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12469 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12470 DAG.getConstant(32, DL, MVT::i64));
12471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12472 return;
12473 }
12474 case Intrinsic::riscv_vmv_x_s: {
12475 EVT VT = N->getValueType(0);
12476 MVT XLenVT = Subtarget.getXLenVT();
12477 if (VT.bitsLT(XLenVT)) {
12478 // Simple case just extract using vmv.x.s and truncate.
12479 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12480 Subtarget.getXLenVT(), N->getOperand(1));
12481 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12482 return;
12483 }
12484
12485 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12486 "Unexpected custom legalization");
12487
12488 // We need to do the move in two steps.
12489 SDValue Vec = N->getOperand(1);
12490 MVT VecVT = Vec.getSimpleValueType();
12491
12492 // First extract the lower XLEN bits of the element.
12493 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12494
12495 // To extract the upper XLEN bits of the vector element, shift the first
12496 // element right by 32 bits and re-extract the lower XLEN bits.
12497 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12498
12499 SDValue ThirtyTwoV =
12500 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12501 DAG.getConstant(32, DL, XLenVT), VL);
12502 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12503 DAG.getUNDEF(VecVT), Mask, VL);
12504 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12505
12506 Results.push_back(
12507 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12508 break;
12509 }
12510 }
12511 break;
12512 }
12513 case ISD::VECREDUCE_ADD:
12514 case ISD::VECREDUCE_AND:
12515 case ISD::VECREDUCE_OR:
12516 case ISD::VECREDUCE_XOR:
12521 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12522 Results.push_back(V);
12523 break;
12524 case ISD::VP_REDUCE_ADD:
12525 case ISD::VP_REDUCE_AND:
12526 case ISD::VP_REDUCE_OR:
12527 case ISD::VP_REDUCE_XOR:
12528 case ISD::VP_REDUCE_SMAX:
12529 case ISD::VP_REDUCE_UMAX:
12530 case ISD::VP_REDUCE_SMIN:
12531 case ISD::VP_REDUCE_UMIN:
12532 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12533 Results.push_back(V);
12534 break;
12535 case ISD::GET_ROUNDING: {
12536 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12537 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12538 Results.push_back(Res.getValue(0));
12539 Results.push_back(Res.getValue(1));
12540 break;
12541 }
12542 }
12543}
12544
12545/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12546/// which corresponds to it.
12547static unsigned getVecReduceOpcode(unsigned Opc) {
12548 switch (Opc) {
12549 default:
12550 llvm_unreachable("Unhandled binary to transfrom reduction");
12551 case ISD::ADD:
12552 return ISD::VECREDUCE_ADD;
12553 case ISD::UMAX:
12554 return ISD::VECREDUCE_UMAX;
12555 case ISD::SMAX:
12556 return ISD::VECREDUCE_SMAX;
12557 case ISD::UMIN:
12558 return ISD::VECREDUCE_UMIN;
12559 case ISD::SMIN:
12560 return ISD::VECREDUCE_SMIN;
12561 case ISD::AND:
12562 return ISD::VECREDUCE_AND;
12563 case ISD::OR:
12564 return ISD::VECREDUCE_OR;
12565 case ISD::XOR:
12566 return ISD::VECREDUCE_XOR;
12567 case ISD::FADD:
12568 // Note: This is the associative form of the generic reduction opcode.
12569 return ISD::VECREDUCE_FADD;
12570 }
12571}
12572
12573/// Perform two related transforms whose purpose is to incrementally recognize
12574/// an explode_vector followed by scalar reduction as a vector reduction node.
12575/// This exists to recover from a deficiency in SLP which can't handle
12576/// forests with multiple roots sharing common nodes. In some cases, one
12577/// of the trees will be vectorized, and the other will remain (unprofitably)
12578/// scalarized.
12579static SDValue
12581 const RISCVSubtarget &Subtarget) {
12582
12583 // This transforms need to run before all integer types have been legalized
12584 // to i64 (so that the vector element type matches the add type), and while
12585 // it's safe to introduce odd sized vector types.
12587 return SDValue();
12588
12589 // Without V, this transform isn't useful. We could form the (illegal)
12590 // operations and let them be scalarized again, but there's really no point.
12591 if (!Subtarget.hasVInstructions())
12592 return SDValue();
12593
12594 const SDLoc DL(N);
12595 const EVT VT = N->getValueType(0);
12596 const unsigned Opc = N->getOpcode();
12597
12598 // For FADD, we only handle the case with reassociation allowed. We
12599 // could handle strict reduction order, but at the moment, there's no
12600 // known reason to, and the complexity isn't worth it.
12601 // TODO: Handle fminnum and fmaxnum here
12602 if (!VT.isInteger() &&
12603 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12604 return SDValue();
12605
12606 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12607 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12608 "Inconsistent mappings");
12609 SDValue LHS = N->getOperand(0);
12610 SDValue RHS = N->getOperand(1);
12611
12612 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12613 return SDValue();
12614
12615 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12616 std::swap(LHS, RHS);
12617
12618 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12619 !isa<ConstantSDNode>(RHS.getOperand(1)))
12620 return SDValue();
12621
12622 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12623 SDValue SrcVec = RHS.getOperand(0);
12624 EVT SrcVecVT = SrcVec.getValueType();
12625 assert(SrcVecVT.getVectorElementType() == VT);
12626 if (SrcVecVT.isScalableVector())
12627 return SDValue();
12628
12629 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12630 return SDValue();
12631
12632 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12633 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12634 // root of our reduction tree. TODO: We could extend this to any two
12635 // adjacent aligned constant indices if desired.
12636 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12637 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12638 uint64_t LHSIdx =
12639 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12640 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12641 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12642 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12643 DAG.getVectorIdxConstant(0, DL));
12644 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12645 }
12646 }
12647
12648 // Match (binop (reduce (extract_subvector V, 0),
12649 // (extract_vector_elt V, sizeof(SubVec))))
12650 // into a reduction of one more element from the original vector V.
12651 if (LHS.getOpcode() != ReduceOpc)
12652 return SDValue();
12653
12654 SDValue ReduceVec = LHS.getOperand(0);
12655 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12656 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12657 isNullConstant(ReduceVec.getOperand(1)) &&
12658 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12659 // For illegal types (e.g. 3xi32), most will be combined again into a
12660 // wider (hopefully legal) type. If this is a terminal state, we are
12661 // relying on type legalization here to produce something reasonable
12662 // and this lowering quality could probably be improved. (TODO)
12663 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12664 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12665 DAG.getVectorIdxConstant(0, DL));
12666 auto Flags = ReduceVec->getFlags();
12667 Flags.intersectWith(N->getFlags());
12668 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12669 }
12670
12671 return SDValue();
12672}
12673
12674
12675// Try to fold (<bop> x, (reduction.<bop> vec, start))
12677 const RISCVSubtarget &Subtarget) {
12678 auto BinOpToRVVReduce = [](unsigned Opc) {
12679 switch (Opc) {
12680 default:
12681 llvm_unreachable("Unhandled binary to transfrom reduction");
12682 case ISD::ADD:
12684 case ISD::UMAX:
12686 case ISD::SMAX:
12688 case ISD::UMIN:
12690 case ISD::SMIN:
12692 case ISD::AND:
12694 case ISD::OR:
12696 case ISD::XOR:
12698 case ISD::FADD:
12700 case ISD::FMAXNUM:
12702 case ISD::FMINNUM:
12704 }
12705 };
12706
12707 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12708 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12709 isNullConstant(V.getOperand(1)) &&
12710 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12711 };
12712
12713 unsigned Opc = N->getOpcode();
12714 unsigned ReduceIdx;
12715 if (IsReduction(N->getOperand(0), Opc))
12716 ReduceIdx = 0;
12717 else if (IsReduction(N->getOperand(1), Opc))
12718 ReduceIdx = 1;
12719 else
12720 return SDValue();
12721
12722 // Skip if FADD disallows reassociation but the combiner needs.
12723 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12724 return SDValue();
12725
12726 SDValue Extract = N->getOperand(ReduceIdx);
12727 SDValue Reduce = Extract.getOperand(0);
12728 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12729 return SDValue();
12730
12731 SDValue ScalarV = Reduce.getOperand(2);
12732 EVT ScalarVT = ScalarV.getValueType();
12733 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12734 ScalarV.getOperand(0)->isUndef() &&
12735 isNullConstant(ScalarV.getOperand(2)))
12736 ScalarV = ScalarV.getOperand(1);
12737
12738 // Make sure that ScalarV is a splat with VL=1.
12739 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12740 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12741 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12742 return SDValue();
12743
12744 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12745 return SDValue();
12746
12747 // Check the scalar of ScalarV is neutral element
12748 // TODO: Deal with value other than neutral element.
12749 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12750 0))
12751 return SDValue();
12752
12753 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12754 // FIXME: We might be able to improve this if operand 0 is undef.
12755 if (!isNonZeroAVL(Reduce.getOperand(5)))
12756 return SDValue();
12757
12758 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12759
12760 SDLoc DL(N);
12761 SDValue NewScalarV =
12762 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12763 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12764
12765 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12766 if (ScalarVT != ScalarV.getValueType())
12767 NewScalarV =
12768 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12769 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12770
12771 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12772 NewScalarV, Reduce.getOperand(3),
12773 Reduce.getOperand(4), Reduce.getOperand(5)};
12774 SDValue NewReduce =
12775 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12776 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12777 Extract.getOperand(1));
12778}
12779
12780// Optimize (add (shl x, c0), (shl y, c1)) ->
12781// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12783 const RISCVSubtarget &Subtarget) {
12784 // Perform this optimization only in the zba extension.
12785 if (!Subtarget.hasStdExtZba())
12786 return SDValue();
12787
12788 // Skip for vector types and larger types.
12789 EVT VT = N->getValueType(0);
12790 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12791 return SDValue();
12792
12793 // The two operand nodes must be SHL and have no other use.
12794 SDValue N0 = N->getOperand(0);
12795 SDValue N1 = N->getOperand(1);
12796 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12797 !N0->hasOneUse() || !N1->hasOneUse())
12798 return SDValue();
12799
12800 // Check c0 and c1.
12801 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12802 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12803 if (!N0C || !N1C)
12804 return SDValue();
12805 int64_t C0 = N0C->getSExtValue();
12806 int64_t C1 = N1C->getSExtValue();
12807 if (C0 <= 0 || C1 <= 0)
12808 return SDValue();
12809
12810 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12811 int64_t Bits = std::min(C0, C1);
12812 int64_t Diff = std::abs(C0 - C1);
12813 if (Diff != 1 && Diff != 2 && Diff != 3)
12814 return SDValue();
12815
12816 // Build nodes.
12817 SDLoc DL(N);
12818 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12819 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12820 SDValue NA0 =
12821 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12822 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12823 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12824}
12825
12826// Combine a constant select operand into its use:
12827//
12828// (and (select cond, -1, c), x)
12829// -> (select cond, x, (and x, c)) [AllOnes=1]
12830// (or (select cond, 0, c), x)
12831// -> (select cond, x, (or x, c)) [AllOnes=0]
12832// (xor (select cond, 0, c), x)
12833// -> (select cond, x, (xor x, c)) [AllOnes=0]
12834// (add (select cond, 0, c), x)
12835// -> (select cond, x, (add x, c)) [AllOnes=0]
12836// (sub x, (select cond, 0, c))
12837// -> (select cond, x, (sub x, c)) [AllOnes=0]
12839 SelectionDAG &DAG, bool AllOnes,
12840 const RISCVSubtarget &Subtarget) {
12841 EVT VT = N->getValueType(0);
12842
12843 // Skip vectors.
12844 if (VT.isVector())
12845 return SDValue();
12846
12847 if (!Subtarget.hasConditionalMoveFusion()) {
12848 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12849 if ((!Subtarget.hasStdExtZicond() &&
12850 !Subtarget.hasVendorXVentanaCondOps()) ||
12851 N->getOpcode() != ISD::AND)
12852 return SDValue();
12853
12854 // Maybe harmful when condition code has multiple use.
12855 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12856 return SDValue();
12857
12858 // Maybe harmful when VT is wider than XLen.
12859 if (VT.getSizeInBits() > Subtarget.getXLen())
12860 return SDValue();
12861 }
12862
12863 if ((Slct.getOpcode() != ISD::SELECT &&
12864 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12865 !Slct.hasOneUse())
12866 return SDValue();
12867
12868 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12870 };
12871
12872 bool SwapSelectOps;
12873 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12874 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12875 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12876 SDValue NonConstantVal;
12877 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12878 SwapSelectOps = false;
12879 NonConstantVal = FalseVal;
12880 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12881 SwapSelectOps = true;
12882 NonConstantVal = TrueVal;
12883 } else
12884 return SDValue();
12885
12886 // Slct is now know to be the desired identity constant when CC is true.
12887 TrueVal = OtherOp;
12888 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12889 // Unless SwapSelectOps says the condition should be false.
12890 if (SwapSelectOps)
12891 std::swap(TrueVal, FalseVal);
12892
12893 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12894 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12895 {Slct.getOperand(0), Slct.getOperand(1),
12896 Slct.getOperand(2), TrueVal, FalseVal});
12897
12898 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12899 {Slct.getOperand(0), TrueVal, FalseVal});
12900}
12901
12902// Attempt combineSelectAndUse on each operand of a commutative operator N.
12904 bool AllOnes,
12905 const RISCVSubtarget &Subtarget) {
12906 SDValue N0 = N->getOperand(0);
12907 SDValue N1 = N->getOperand(1);
12908 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12909 return Result;
12910 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12911 return Result;
12912 return SDValue();
12913}
12914
12915// Transform (add (mul x, c0), c1) ->
12916// (add (mul (add x, c1/c0), c0), c1%c0).
12917// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12918// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12919// to an infinite loop in DAGCombine if transformed.
12920// Or transform (add (mul x, c0), c1) ->
12921// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12922// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12923// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12924// lead to an infinite loop in DAGCombine if transformed.
12925// Or transform (add (mul x, c0), c1) ->
12926// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12927// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12928// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12929// lead to an infinite loop in DAGCombine if transformed.
12930// Or transform (add (mul x, c0), c1) ->
12931// (mul (add x, c1/c0), c0).
12932// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12934 const RISCVSubtarget &Subtarget) {
12935 // Skip for vector types and larger types.
12936 EVT VT = N->getValueType(0);
12937 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12938 return SDValue();
12939 // The first operand node must be a MUL and has no other use.
12940 SDValue N0 = N->getOperand(0);
12941 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12942 return SDValue();
12943 // Check if c0 and c1 match above conditions.
12944 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12945 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12946 if (!N0C || !N1C)
12947 return SDValue();
12948 // If N0C has multiple uses it's possible one of the cases in
12949 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12950 // in an infinite loop.
12951 if (!N0C->hasOneUse())
12952 return SDValue();
12953 int64_t C0 = N0C->getSExtValue();
12954 int64_t C1 = N1C->getSExtValue();
12955 int64_t CA, CB;
12956 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12957 return SDValue();
12958 // Search for proper CA (non-zero) and CB that both are simm12.
12959 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12960 !isInt<12>(C0 * (C1 / C0))) {
12961 CA = C1 / C0;
12962 CB = C1 % C0;
12963 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12964 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12965 CA = C1 / C0 + 1;
12966 CB = C1 % C0 - C0;
12967 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12968 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12969 CA = C1 / C0 - 1;
12970 CB = C1 % C0 + C0;
12971 } else
12972 return SDValue();
12973 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12974 SDLoc DL(N);
12975 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12976 DAG.getConstant(CA, DL, VT));
12977 SDValue New1 =
12978 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12979 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12980}
12981
12982// add (zext, zext) -> zext (add (zext, zext))
12983// sub (zext, zext) -> sext (sub (zext, zext))
12984// mul (zext, zext) -> zext (mul (zext, zext))
12985// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12986// udiv (zext, zext) -> zext (udiv (zext, zext))
12987// srem (zext, zext) -> zext (srem (zext, zext))
12988// urem (zext, zext) -> zext (urem (zext, zext))
12989//
12990// where the sum of the extend widths match, and the the range of the bin op
12991// fits inside the width of the narrower bin op. (For profitability on rvv, we
12992// use a power of two for both inner and outer extend.)
12994
12995 EVT VT = N->getValueType(0);
12996 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12997 return SDValue();
12998
12999 SDValue N0 = N->getOperand(0);
13000 SDValue N1 = N->getOperand(1);
13002 return SDValue();
13003 if (!N0.hasOneUse() || !N1.hasOneUse())
13004 return SDValue();
13005
13006 SDValue Src0 = N0.getOperand(0);
13007 SDValue Src1 = N1.getOperand(0);
13008 EVT SrcVT = Src0.getValueType();
13009 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13010 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13011 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13012 return SDValue();
13013
13014 LLVMContext &C = *DAG.getContext();
13016 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13017
13018 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13019 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13020
13021 // Src0 and Src1 are zero extended, so they're always positive if signed.
13022 //
13023 // sub can produce a negative from two positive operands, so it needs sign
13024 // extended. Other nodes produce a positive from two positive operands, so
13025 // zero extend instead.
13026 unsigned OuterExtend =
13027 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13028
13029 return DAG.getNode(
13030 OuterExtend, SDLoc(N), VT,
13031 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13032}
13033
13034// Try to turn (add (xor bool, 1) -1) into (neg bool).
13036 SDValue N0 = N->getOperand(0);
13037 SDValue N1 = N->getOperand(1);
13038 EVT VT = N->getValueType(0);
13039 SDLoc DL(N);
13040
13041 // RHS should be -1.
13042 if (!isAllOnesConstant(N1))
13043 return SDValue();
13044
13045 // Look for (xor X, 1).
13046 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13047 return SDValue();
13048
13049 // First xor input should be 0 or 1.
13051 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13052 return SDValue();
13053
13054 // Emit a negate of the setcc.
13055 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13056 N0.getOperand(0));
13057}
13058
13060 const RISCVSubtarget &Subtarget) {
13061 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13062 return V;
13063 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13064 return V;
13065 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13066 return V;
13067 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13068 return V;
13069 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13070 return V;
13071 if (SDValue V = combineBinOpOfZExt(N, DAG))
13072 return V;
13073
13074 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13075 // (select lhs, rhs, cc, x, (add x, y))
13076 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13077}
13078
13079// Try to turn a sub boolean RHS and constant LHS into an addi.
13081 SDValue N0 = N->getOperand(0);
13082 SDValue N1 = N->getOperand(1);
13083 EVT VT = N->getValueType(0);
13084 SDLoc DL(N);
13085
13086 // Require a constant LHS.
13087 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13088 if (!N0C)
13089 return SDValue();
13090
13091 // All our optimizations involve subtracting 1 from the immediate and forming
13092 // an ADDI. Make sure the new immediate is valid for an ADDI.
13093 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13094 if (!ImmValMinus1.isSignedIntN(12))
13095 return SDValue();
13096
13097 SDValue NewLHS;
13098 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13099 // (sub constant, (setcc x, y, eq/neq)) ->
13100 // (add (setcc x, y, neq/eq), constant - 1)
13101 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13102 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13103 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13104 return SDValue();
13105 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13106 NewLHS =
13107 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13108 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13109 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13110 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13111 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13112 NewLHS = N1.getOperand(0);
13113 } else
13114 return SDValue();
13115
13116 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13117 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13118}
13119
13121 const RISCVSubtarget &Subtarget) {
13122 if (SDValue V = combineSubOfBoolean(N, DAG))
13123 return V;
13124
13125 EVT VT = N->getValueType(0);
13126 SDValue N0 = N->getOperand(0);
13127 SDValue N1 = N->getOperand(1);
13128 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13129 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13130 isNullConstant(N1.getOperand(1))) {
13131 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13132 if (CCVal == ISD::SETLT) {
13133 SDLoc DL(N);
13134 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13135 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13136 DAG.getConstant(ShAmt, DL, VT));
13137 }
13138 }
13139
13140 if (SDValue V = combineBinOpOfZExt(N, DAG))
13141 return V;
13142
13143 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13144 // (select lhs, rhs, cc, x, (sub x, y))
13145 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13146}
13147
13148// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13149// Legalizing setcc can introduce xors like this. Doing this transform reduces
13150// the number of xors and may allow the xor to fold into a branch condition.
13152 SDValue N0 = N->getOperand(0);
13153 SDValue N1 = N->getOperand(1);
13154 bool IsAnd = N->getOpcode() == ISD::AND;
13155
13156 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13157 return SDValue();
13158
13159 if (!N0.hasOneUse() || !N1.hasOneUse())
13160 return SDValue();
13161
13162 SDValue N01 = N0.getOperand(1);
13163 SDValue N11 = N1.getOperand(1);
13164
13165 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13166 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13167 // operation is And, allow one of the Xors to use -1.
13168 if (isOneConstant(N01)) {
13169 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13170 return SDValue();
13171 } else if (isOneConstant(N11)) {
13172 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13173 if (!(IsAnd && isAllOnesConstant(N01)))
13174 return SDValue();
13175 } else
13176 return SDValue();
13177
13178 EVT VT = N->getValueType(0);
13179
13180 SDValue N00 = N0.getOperand(0);
13181 SDValue N10 = N1.getOperand(0);
13182
13183 // The LHS of the xors needs to be 0/1.
13185 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13186 return SDValue();
13187
13188 // Invert the opcode and insert a new xor.
13189 SDLoc DL(N);
13190 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13191 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13192 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13193}
13194
13196 const RISCVSubtarget &Subtarget) {
13197 SDValue N0 = N->getOperand(0);
13198 EVT VT = N->getValueType(0);
13199
13200 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13201 // extending X. This is safe since we only need the LSB after the shift and
13202 // shift amounts larger than 31 would produce poison. If we wait until
13203 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13204 // to use a BEXT instruction.
13205 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13206 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13207 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13208 SDLoc DL(N0);
13209 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13210 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13211 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13212 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13213 }
13214
13215 return SDValue();
13216}
13217
13218// Combines two comparison operation and logic operation to one selection
13219// operation(min, max) and logic operation. Returns new constructed Node if
13220// conditions for optimization are satisfied.
13223 const RISCVSubtarget &Subtarget) {
13224 SelectionDAG &DAG = DCI.DAG;
13225
13226 SDValue N0 = N->getOperand(0);
13227 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13228 // extending X. This is safe since we only need the LSB after the shift and
13229 // shift amounts larger than 31 would produce poison. If we wait until
13230 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13231 // to use a BEXT instruction.
13232 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13233 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13234 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13235 N0.hasOneUse()) {
13236 SDLoc DL(N);
13237 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13238 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13239 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13240 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13241 DAG.getConstant(1, DL, MVT::i64));
13242 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13243 }
13244
13245 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13246 return V;
13247 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13248 return V;
13249
13250 if (DCI.isAfterLegalizeDAG())
13251 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13252 return V;
13253
13254 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13255 // (select lhs, rhs, cc, x, (and x, y))
13256 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13257}
13258
13259// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13260// FIXME: Generalize to other binary operators with same operand.
13262 SelectionDAG &DAG) {
13263 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13264
13265 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13267 !N0.hasOneUse() || !N1.hasOneUse())
13268 return SDValue();
13269
13270 // Should have the same condition.
13271 SDValue Cond = N0.getOperand(1);
13272 if (Cond != N1.getOperand(1))
13273 return SDValue();
13274
13275 SDValue TrueV = N0.getOperand(0);
13276 SDValue FalseV = N1.getOperand(0);
13277
13278 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13279 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13280 !isOneConstant(TrueV.getOperand(1)) ||
13281 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13282 return SDValue();
13283
13284 EVT VT = N->getValueType(0);
13285 SDLoc DL(N);
13286
13287 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13288 Cond);
13289 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13290 Cond);
13291 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13292 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13293}
13294
13296 const RISCVSubtarget &Subtarget) {
13297 SelectionDAG &DAG = DCI.DAG;
13298
13299 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13300 return V;
13301 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13302 return V;
13303
13304 if (DCI.isAfterLegalizeDAG())
13305 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13306 return V;
13307
13308 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13309 // We may be able to pull a common operation out of the true and false value.
13310 SDValue N0 = N->getOperand(0);
13311 SDValue N1 = N->getOperand(1);
13312 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13313 return V;
13314 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13315 return V;
13316
13317 // fold (or (select cond, 0, y), x) ->
13318 // (select cond, x, (or x, y))
13319 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13320}
13321
13323 const RISCVSubtarget &Subtarget) {
13324 SDValue N0 = N->getOperand(0);
13325 SDValue N1 = N->getOperand(1);
13326
13327 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13328 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13329 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13330 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13331 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13332 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13333 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13334 SDLoc DL(N);
13335 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13336 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13337 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13338 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13339 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13340 }
13341
13342 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13343 // NOTE: Assumes ROL being legal means ROLW is legal.
13344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13345 if (N0.getOpcode() == RISCVISD::SLLW &&
13347 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13348 SDLoc DL(N);
13349 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13350 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13351 }
13352
13353 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13354 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13355 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13356 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13357 if (ConstN00 && CC == ISD::SETLT) {
13358 EVT VT = N0.getValueType();
13359 SDLoc DL(N0);
13360 const APInt &Imm = ConstN00->getAPIntValue();
13361 if ((Imm + 1).isSignedIntN(12))
13362 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13363 DAG.getConstant(Imm + 1, DL, VT), CC);
13364 }
13365 }
13366
13367 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13368 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13369 // would have been promoted to i32, but the setcc would have i64 result.
13370 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13371 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13372 SDValue N00 = N0.getOperand(0);
13373 SDLoc DL(N);
13374 SDValue LHS = N00.getOperand(0);
13375 SDValue RHS = N00.getOperand(1);
13376 SDValue CC = N00.getOperand(2);
13377 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13378 LHS.getValueType());
13379 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13380 LHS, RHS, NotCC);
13381 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13382 }
13383
13384 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13385 return V;
13386 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13387 return V;
13388
13389 // fold (xor (select cond, 0, y), x) ->
13390 // (select cond, x, (xor x, y))
13391 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13392}
13393
13394// Try to expand a scalar multiply to a faster sequence.
13397 const RISCVSubtarget &Subtarget) {
13398
13399 EVT VT = N->getValueType(0);
13400
13401 // LI + MUL is usually smaller than the alternative sequence.
13403 return SDValue();
13404
13405 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13406 return SDValue();
13407
13408 if (VT != Subtarget.getXLenVT())
13409 return SDValue();
13410
13411 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13412 return SDValue();
13413
13414 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13415 if (!CNode)
13416 return SDValue();
13417 uint64_t MulAmt = CNode->getZExtValue();
13418
13419 for (uint64_t Divisor : {3, 5, 9}) {
13420 if (MulAmt % Divisor != 0)
13421 continue;
13422 uint64_t MulAmt2 = MulAmt / Divisor;
13423 // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13424 // Matched in tablegen, avoid perturbing patterns.
13425 if (isPowerOf2_64(MulAmt2))
13426 return SDValue();
13427
13428 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13429 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13430 SDLoc DL(N);
13431 SDValue X = DAG.getFreeze(N->getOperand(0));
13432 SDValue Mul359 =
13433 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13434 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13435 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13436 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13437 Mul359);
13438 }
13439 }
13440
13441 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13442 // shXadd. First check if this a sum of two power of 2s because that's
13443 // easy. Then count how many zeros are up to the first bit.
13444 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13445 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13446 if (ScaleShift >= 1 && ScaleShift < 4) {
13447 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13448 SDLoc DL(N);
13449 SDValue X = DAG.getFreeze(N->getOperand(0));
13450 SDValue Shift1 =
13451 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13452 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13453 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13454 }
13455 }
13456
13457 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13458 // This is the two instruction form, there are also three instruction
13459 // variants we could implement. e.g.
13460 // (2^(1,2,3) * 3,5,9 + 1) << C2
13461 // 2^(C1>3) * 3,5,9 +/- 1
13462 for (uint64_t Divisor : {3, 5, 9}) {
13463 uint64_t C = MulAmt - 1;
13464 if (C <= Divisor)
13465 continue;
13466 unsigned TZ = llvm::countr_zero(C);
13467 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13468 SDLoc DL(N);
13469 SDValue X = DAG.getFreeze(N->getOperand(0));
13470 SDValue Mul359 =
13471 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13472 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13473 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13474 DAG.getConstant(TZ, DL, VT), X);
13475 }
13476 }
13477
13478 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13479 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13480 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13481 if (ScaleShift >= 1 && ScaleShift < 4) {
13482 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13483 SDLoc DL(N);
13484 SDValue X = DAG.getFreeze(N->getOperand(0));
13485 SDValue Shift1 =
13486 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13487 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13488 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13489 DAG.getConstant(ScaleShift, DL, VT), X));
13490 }
13491 }
13492
13493 return SDValue();
13494}
13495
13496
13499 const RISCVSubtarget &Subtarget) {
13500 EVT VT = N->getValueType(0);
13501 if (!VT.isVector())
13502 return expandMul(N, DAG, DCI, Subtarget);
13503
13504 SDLoc DL(N);
13505 SDValue N0 = N->getOperand(0);
13506 SDValue N1 = N->getOperand(1);
13507 SDValue MulOper;
13508 unsigned AddSubOpc;
13509
13510 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13511 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13512 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13513 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13514 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13515 AddSubOpc = V->getOpcode();
13516 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13517 SDValue Opnd = V->getOperand(1);
13518 MulOper = V->getOperand(0);
13519 if (AddSubOpc == ISD::SUB)
13520 std::swap(Opnd, MulOper);
13521 if (isOneOrOneSplat(Opnd))
13522 return true;
13523 }
13524 return false;
13525 };
13526
13527 if (IsAddSubWith1(N0)) {
13528 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13529 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13530 }
13531
13532 if (IsAddSubWith1(N1)) {
13533 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13534 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13535 }
13536
13537 if (SDValue V = combineBinOpOfZExt(N, DAG))
13538 return V;
13539
13540 return SDValue();
13541}
13542
13543/// According to the property that indexed load/store instructions zero-extend
13544/// their indices, try to narrow the type of index operand.
13545static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13546 if (isIndexTypeSigned(IndexType))
13547 return false;
13548
13549 if (!N->hasOneUse())
13550 return false;
13551
13552 EVT VT = N.getValueType();
13553 SDLoc DL(N);
13554
13555 // In general, what we're doing here is seeing if we can sink a truncate to
13556 // a smaller element type into the expression tree building our index.
13557 // TODO: We can generalize this and handle a bunch more cases if useful.
13558
13559 // Narrow a buildvector to the narrowest element type. This requires less
13560 // work and less register pressure at high LMUL, and creates smaller constants
13561 // which may be cheaper to materialize.
13562 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13563 KnownBits Known = DAG.computeKnownBits(N);
13564 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13565 LLVMContext &C = *DAG.getContext();
13566 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13567 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13568 N = DAG.getNode(ISD::TRUNCATE, DL,
13569 VT.changeVectorElementType(ResultVT), N);
13570 return true;
13571 }
13572 }
13573
13574 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13575 if (N.getOpcode() != ISD::SHL)
13576 return false;
13577
13578 SDValue N0 = N.getOperand(0);
13579 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13581 return false;
13582 if (!N0->hasOneUse())
13583 return false;
13584
13585 APInt ShAmt;
13586 SDValue N1 = N.getOperand(1);
13587 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13588 return false;
13589
13590 SDValue Src = N0.getOperand(0);
13591 EVT SrcVT = Src.getValueType();
13592 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13593 unsigned ShAmtV = ShAmt.getZExtValue();
13594 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13595 NewElen = std::max(NewElen, 8U);
13596
13597 // Skip if NewElen is not narrower than the original extended type.
13598 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13599 return false;
13600
13601 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13602 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13603
13604 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13605 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13606 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13607 return true;
13608}
13609
13610// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13611// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13612// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13613// can become a sext.w instead of a shift pair.
13615 const RISCVSubtarget &Subtarget) {
13616 SDValue N0 = N->getOperand(0);
13617 SDValue N1 = N->getOperand(1);
13618 EVT VT = N->getValueType(0);
13619 EVT OpVT = N0.getValueType();
13620
13621 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13622 return SDValue();
13623
13624 // RHS needs to be a constant.
13625 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13626 if (!N1C)
13627 return SDValue();
13628
13629 // LHS needs to be (and X, 0xffffffff).
13630 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13631 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13632 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13633 return SDValue();
13634
13635 // Looking for an equality compare.
13636 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13637 if (!isIntEqualitySetCC(Cond))
13638 return SDValue();
13639
13640 // Don't do this if the sign bit is provably zero, it will be turned back into
13641 // an AND.
13642 APInt SignMask = APInt::getOneBitSet(64, 31);
13643 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13644 return SDValue();
13645
13646 const APInt &C1 = N1C->getAPIntValue();
13647
13648 SDLoc dl(N);
13649 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13650 // to be equal.
13651 if (C1.getActiveBits() > 32)
13652 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13653
13654 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13655 N0.getOperand(0), DAG.getValueType(MVT::i32));
13656 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13657 dl, OpVT), Cond);
13658}
13659
13660static SDValue
13662 const RISCVSubtarget &Subtarget) {
13663 SDValue Src = N->getOperand(0);
13664 EVT VT = N->getValueType(0);
13665
13666 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13667 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13668 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13669 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13670 Src.getOperand(0));
13671
13672 return SDValue();
13673}
13674
13675namespace {
13676// Forward declaration of the structure holding the necessary information to
13677// apply a combine.
13678struct CombineResult;
13679
13680enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13681/// Helper class for folding sign/zero extensions.
13682/// In particular, this class is used for the following combines:
13683/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13684/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13685/// mul | mul_vl -> vwmul(u) | vwmul_su
13686/// shl | shl_vl -> vwsll
13687/// fadd -> vfwadd | vfwadd_w
13688/// fsub -> vfwsub | vfwsub_w
13689/// fmul -> vfwmul
13690/// An object of this class represents an operand of the operation we want to
13691/// combine.
13692/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13693/// NodeExtensionHelper for `a` and one for `b`.
13694///
13695/// This class abstracts away how the extension is materialized and
13696/// how its number of users affect the combines.
13697///
13698/// In particular:
13699/// - VWADD_W is conceptually == add(op0, sext(op1))
13700/// - VWADDU_W == add(op0, zext(op1))
13701/// - VWSUB_W == sub(op0, sext(op1))
13702/// - VWSUBU_W == sub(op0, zext(op1))
13703/// - VFWADD_W == fadd(op0, fpext(op1))
13704/// - VFWSUB_W == fsub(op0, fpext(op1))
13705/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13706/// zext|sext(smaller_value).
13707struct NodeExtensionHelper {
13708 /// Records if this operand is like being zero extended.
13709 bool SupportsZExt;
13710 /// Records if this operand is like being sign extended.
13711 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13712 /// instance, a splat constant (e.g., 3), would support being both sign and
13713 /// zero extended.
13714 bool SupportsSExt;
13715 /// Records if this operand is like being floating-Point extended.
13716 bool SupportsFPExt;
13717 /// This boolean captures whether we care if this operand would still be
13718 /// around after the folding happens.
13719 bool EnforceOneUse;
13720 /// Original value that this NodeExtensionHelper represents.
13721 SDValue OrigOperand;
13722
13723 /// Get the value feeding the extension or the value itself.
13724 /// E.g., for zext(a), this would return a.
13725 SDValue getSource() const {
13726 switch (OrigOperand.getOpcode()) {
13727 case ISD::ZERO_EXTEND:
13728 case ISD::SIGN_EXTEND:
13729 case RISCVISD::VSEXT_VL:
13730 case RISCVISD::VZEXT_VL:
13732 return OrigOperand.getOperand(0);
13733 default:
13734 return OrigOperand;
13735 }
13736 }
13737
13738 /// Check if this instance represents a splat.
13739 bool isSplat() const {
13740 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13741 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13742 }
13743
13744 /// Get the extended opcode.
13745 unsigned getExtOpc(ExtKind SupportsExt) const {
13746 switch (SupportsExt) {
13747 case ExtKind::SExt:
13748 return RISCVISD::VSEXT_VL;
13749 case ExtKind::ZExt:
13750 return RISCVISD::VZEXT_VL;
13751 case ExtKind::FPExt:
13753 }
13754 llvm_unreachable("Unknown ExtKind enum");
13755 }
13756
13757 /// Get or create a value that can feed \p Root with the given extension \p
13758 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13759 /// operand. \see ::getSource().
13760 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13761 const RISCVSubtarget &Subtarget,
13762 std::optional<ExtKind> SupportsExt) const {
13763 if (!SupportsExt.has_value())
13764 return OrigOperand;
13765
13766 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13767
13768 SDValue Source = getSource();
13769 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13770 if (Source.getValueType() == NarrowVT)
13771 return Source;
13772
13773 unsigned ExtOpc = getExtOpc(*SupportsExt);
13774
13775 // If we need an extension, we should be changing the type.
13776 SDLoc DL(OrigOperand);
13777 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13778 switch (OrigOperand.getOpcode()) {
13779 case ISD::ZERO_EXTEND:
13780 case ISD::SIGN_EXTEND:
13781 case RISCVISD::VSEXT_VL:
13782 case RISCVISD::VZEXT_VL:
13784 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13785 case ISD::SPLAT_VECTOR:
13786 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13788 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13789 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13790 default:
13791 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13792 // and that operand should already have the right NarrowVT so no
13793 // extension should be required at this point.
13794 llvm_unreachable("Unsupported opcode");
13795 }
13796 }
13797
13798 /// Helper function to get the narrow type for \p Root.
13799 /// The narrow type is the type of \p Root where we divided the size of each
13800 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13801 /// \pre Both the narrow type and the original type should be legal.
13802 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13803 MVT VT = Root->getSimpleValueType(0);
13804
13805 // Determine the narrow size.
13806 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13807
13808 MVT EltVT = SupportsExt == ExtKind::FPExt
13809 ? MVT::getFloatingPointVT(NarrowSize)
13810 : MVT::getIntegerVT(NarrowSize);
13811
13812 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13813 "Trying to extend something we can't represent");
13814 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13815 return NarrowVT;
13816 }
13817
13818 /// Get the opcode to materialize:
13819 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13820 static unsigned getSExtOpcode(unsigned Opcode) {
13821 switch (Opcode) {
13822 case ISD::ADD:
13823 case RISCVISD::ADD_VL:
13826 case ISD::OR:
13827 return RISCVISD::VWADD_VL;
13828 case ISD::SUB:
13829 case RISCVISD::SUB_VL:
13832 return RISCVISD::VWSUB_VL;
13833 case ISD::MUL:
13834 case RISCVISD::MUL_VL:
13835 return RISCVISD::VWMUL_VL;
13836 default:
13837 llvm_unreachable("Unexpected opcode");
13838 }
13839 }
13840
13841 /// Get the opcode to materialize:
13842 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13843 static unsigned getZExtOpcode(unsigned Opcode) {
13844 switch (Opcode) {
13845 case ISD::ADD:
13846 case RISCVISD::ADD_VL:
13849 case ISD::OR:
13850 return RISCVISD::VWADDU_VL;
13851 case ISD::SUB:
13852 case RISCVISD::SUB_VL:
13855 return RISCVISD::VWSUBU_VL;
13856 case ISD::MUL:
13857 case RISCVISD::MUL_VL:
13858 return RISCVISD::VWMULU_VL;
13859 case ISD::SHL:
13860 case RISCVISD::SHL_VL:
13861 return RISCVISD::VWSLL_VL;
13862 default:
13863 llvm_unreachable("Unexpected opcode");
13864 }
13865 }
13866
13867 /// Get the opcode to materialize:
13868 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13869 static unsigned getFPExtOpcode(unsigned Opcode) {
13870 switch (Opcode) {
13871 case RISCVISD::FADD_VL:
13873 return RISCVISD::VFWADD_VL;
13874 case RISCVISD::FSUB_VL:
13876 return RISCVISD::VFWSUB_VL;
13877 case RISCVISD::FMUL_VL:
13878 return RISCVISD::VFWMUL_VL;
13879 default:
13880 llvm_unreachable("Unexpected opcode");
13881 }
13882 }
13883
13884 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13885 /// newOpcode(a, b).
13886 static unsigned getSUOpcode(unsigned Opcode) {
13887 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13888 "SU is only supported for MUL");
13889 return RISCVISD::VWMULSU_VL;
13890 }
13891
13892 /// Get the opcode to materialize
13893 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
13894 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13895 switch (Opcode) {
13896 case ISD::ADD:
13897 case RISCVISD::ADD_VL:
13898 case ISD::OR:
13899 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13901 case ISD::SUB:
13902 case RISCVISD::SUB_VL:
13903 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13905 case RISCVISD::FADD_VL:
13906 return RISCVISD::VFWADD_W_VL;
13907 case RISCVISD::FSUB_VL:
13908 return RISCVISD::VFWSUB_W_VL;
13909 default:
13910 llvm_unreachable("Unexpected opcode");
13911 }
13912 }
13913
13914 using CombineToTry = std::function<std::optional<CombineResult>(
13915 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13916 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13917 const RISCVSubtarget &)>;
13918
13919 /// Check if this node needs to be fully folded or extended for all users.
13920 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13921
13922 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13923 const RISCVSubtarget &Subtarget) {
13924 unsigned Opc = OrigOperand.getOpcode();
13925 MVT VT = OrigOperand.getSimpleValueType();
13926
13927 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13928 "Unexpected Opcode");
13929
13930 // The pasthru must be undef for tail agnostic.
13931 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
13932 return;
13933
13934 // Get the scalar value.
13935 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
13936 : OrigOperand.getOperand(1);
13937
13938 // See if we have enough sign bits or zero bits in the scalar to use a
13939 // widening opcode by splatting to smaller element size.
13940 unsigned EltBits = VT.getScalarSizeInBits();
13941 unsigned ScalarBits = Op.getValueSizeInBits();
13942 // Make sure we're getting all element bits from the scalar register.
13943 // FIXME: Support implicit sign extension of vmv.v.x?
13944 if (ScalarBits < EltBits)
13945 return;
13946
13947 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13948 // If the narrow type cannot be expressed with a legal VMV,
13949 // this is not a valid candidate.
13950 if (NarrowSize < 8)
13951 return;
13952
13953 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13954 SupportsSExt = true;
13955
13956 if (DAG.MaskedValueIsZero(Op,
13957 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13958 SupportsZExt = true;
13959
13960 EnforceOneUse = false;
13961 }
13962
13963 /// Helper method to set the various fields of this struct based on the
13964 /// type of \p Root.
13965 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13966 const RISCVSubtarget &Subtarget) {
13967 SupportsZExt = false;
13968 SupportsSExt = false;
13969 SupportsFPExt = false;
13970 EnforceOneUse = true;
13971 unsigned Opc = OrigOperand.getOpcode();
13972 // For the nodes we handle below, we end up using their inputs directly: see
13973 // getSource(). However since they either don't have a passthru or we check
13974 // that their passthru is undef, we can safely ignore their mask and VL.
13975 switch (Opc) {
13976 case ISD::ZERO_EXTEND:
13977 case ISD::SIGN_EXTEND: {
13978 MVT VT = OrigOperand.getSimpleValueType();
13979 if (!VT.isVector())
13980 break;
13981
13982 SDValue NarrowElt = OrigOperand.getOperand(0);
13983 MVT NarrowVT = NarrowElt.getSimpleValueType();
13984 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
13985 if (NarrowVT.getVectorElementType() == MVT::i1)
13986 break;
13987
13988 SupportsZExt = Opc == ISD::ZERO_EXTEND;
13989 SupportsSExt = Opc == ISD::SIGN_EXTEND;
13990 break;
13991 }
13992 case RISCVISD::VZEXT_VL:
13993 SupportsZExt = true;
13994 break;
13995 case RISCVISD::VSEXT_VL:
13996 SupportsSExt = true;
13997 break;
13999 SupportsFPExt = true;
14000 break;
14001 case ISD::SPLAT_VECTOR:
14003 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14004 break;
14005 default:
14006 break;
14007 }
14008 }
14009
14010 /// Check if \p Root supports any extension folding combines.
14011 static bool isSupportedRoot(const SDNode *Root,
14012 const RISCVSubtarget &Subtarget) {
14013 switch (Root->getOpcode()) {
14014 case ISD::ADD:
14015 case ISD::SUB:
14016 case ISD::MUL: {
14017 return Root->getValueType(0).isScalableVector();
14018 }
14019 case ISD::OR: {
14020 return Root->getValueType(0).isScalableVector() &&
14021 Root->getFlags().hasDisjoint();
14022 }
14023 // Vector Widening Integer Add/Sub/Mul Instructions
14024 case RISCVISD::ADD_VL:
14025 case RISCVISD::MUL_VL:
14028 case RISCVISD::SUB_VL:
14031 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14032 case RISCVISD::FADD_VL:
14033 case RISCVISD::FSUB_VL:
14034 case RISCVISD::FMUL_VL:
14037 return true;
14038 case ISD::SHL:
14039 return Root->getValueType(0).isScalableVector() &&
14040 Subtarget.hasStdExtZvbb();
14041 case RISCVISD::SHL_VL:
14042 return Subtarget.hasStdExtZvbb();
14043 default:
14044 return false;
14045 }
14046 }
14047
14048 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14049 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14050 const RISCVSubtarget &Subtarget) {
14051 assert(isSupportedRoot(Root, Subtarget) &&
14052 "Trying to build an helper with an "
14053 "unsupported root");
14054 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14056 OrigOperand = Root->getOperand(OperandIdx);
14057
14058 unsigned Opc = Root->getOpcode();
14059 switch (Opc) {
14060 // We consider
14061 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14062 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14063 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14070 if (OperandIdx == 1) {
14071 SupportsZExt =
14073 SupportsSExt =
14075 SupportsFPExt =
14077 // There's no existing extension here, so we don't have to worry about
14078 // making sure it gets removed.
14079 EnforceOneUse = false;
14080 break;
14081 }
14082 [[fallthrough]];
14083 default:
14084 fillUpExtensionSupport(Root, DAG, Subtarget);
14085 break;
14086 }
14087 }
14088
14089 /// Helper function to get the Mask and VL from \p Root.
14090 static std::pair<SDValue, SDValue>
14091 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14092 const RISCVSubtarget &Subtarget) {
14093 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14094 switch (Root->getOpcode()) {
14095 case ISD::ADD:
14096 case ISD::SUB:
14097 case ISD::MUL:
14098 case ISD::OR:
14099 case ISD::SHL: {
14100 SDLoc DL(Root);
14101 MVT VT = Root->getSimpleValueType(0);
14102 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14103 }
14104 default:
14105 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14106 }
14107 }
14108
14109 /// Helper function to check if \p N is commutative with respect to the
14110 /// foldings that are supported by this class.
14111 static bool isCommutative(const SDNode *N) {
14112 switch (N->getOpcode()) {
14113 case ISD::ADD:
14114 case ISD::MUL:
14115 case ISD::OR:
14116 case RISCVISD::ADD_VL:
14117 case RISCVISD::MUL_VL:
14120 case RISCVISD::FADD_VL:
14121 case RISCVISD::FMUL_VL:
14123 return true;
14124 case ISD::SUB:
14125 case RISCVISD::SUB_VL:
14128 case RISCVISD::FSUB_VL:
14130 case ISD::SHL:
14131 case RISCVISD::SHL_VL:
14132 return false;
14133 default:
14134 llvm_unreachable("Unexpected opcode");
14135 }
14136 }
14137
14138 /// Get a list of combine to try for folding extensions in \p Root.
14139 /// Note that each returned CombineToTry function doesn't actually modify
14140 /// anything. Instead they produce an optional CombineResult that if not None,
14141 /// need to be materialized for the combine to be applied.
14142 /// \see CombineResult::materialize.
14143 /// If the related CombineToTry function returns std::nullopt, that means the
14144 /// combine didn't match.
14145 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14146};
14147
14148/// Helper structure that holds all the necessary information to materialize a
14149/// combine that does some extension folding.
14150struct CombineResult {
14151 /// Opcode to be generated when materializing the combine.
14152 unsigned TargetOpcode;
14153 // No value means no extension is needed.
14154 std::optional<ExtKind> LHSExt;
14155 std::optional<ExtKind> RHSExt;
14156 /// Root of the combine.
14157 SDNode *Root;
14158 /// LHS of the TargetOpcode.
14159 NodeExtensionHelper LHS;
14160 /// RHS of the TargetOpcode.
14161 NodeExtensionHelper RHS;
14162
14163 CombineResult(unsigned TargetOpcode, SDNode *Root,
14164 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14165 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14166 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14167 LHS(LHS), RHS(RHS) {}
14168
14169 /// Return a value that uses TargetOpcode and that can be used to replace
14170 /// Root.
14171 /// The actual replacement is *not* done in that method.
14172 SDValue materialize(SelectionDAG &DAG,
14173 const RISCVSubtarget &Subtarget) const {
14174 SDValue Mask, VL, Merge;
14175 std::tie(Mask, VL) =
14176 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14177 switch (Root->getOpcode()) {
14178 default:
14179 Merge = Root->getOperand(2);
14180 break;
14181 case ISD::ADD:
14182 case ISD::SUB:
14183 case ISD::MUL:
14184 case ISD::OR:
14185 case ISD::SHL:
14186 Merge = DAG.getUNDEF(Root->getValueType(0));
14187 break;
14188 }
14189 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14190 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14191 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14192 Merge, Mask, VL);
14193 }
14194};
14195
14196/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14197/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14198/// are zext) and LHS and RHS can be folded into Root.
14199/// AllowExtMask define which form `ext` can take in this pattern.
14200///
14201/// \note If the pattern can match with both zext and sext, the returned
14202/// CombineResult will feature the zext result.
14203///
14204/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14205/// can be used to apply the pattern.
14206static std::optional<CombineResult>
14207canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14208 const NodeExtensionHelper &RHS,
14209 uint8_t AllowExtMask, SelectionDAG &DAG,
14210 const RISCVSubtarget &Subtarget) {
14211 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14212 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14213 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14214 /*RHSExt=*/{ExtKind::ZExt});
14215 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14216 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14217 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14218 /*RHSExt=*/{ExtKind::SExt});
14219 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14220 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14221 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14222 /*RHSExt=*/{ExtKind::FPExt});
14223 return std::nullopt;
14224}
14225
14226/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14227/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14228/// are zext) and LHS and RHS can be folded into Root.
14229///
14230/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14231/// can be used to apply the pattern.
14232static std::optional<CombineResult>
14233canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14234 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14235 const RISCVSubtarget &Subtarget) {
14236 return canFoldToVWWithSameExtensionImpl(
14237 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14238 Subtarget);
14239}
14240
14241/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14242///
14243/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14244/// can be used to apply the pattern.
14245static std::optional<CombineResult>
14246canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14247 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14248 const RISCVSubtarget &Subtarget) {
14249 if (RHS.SupportsFPExt)
14250 return CombineResult(
14251 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14252 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14253
14254 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14255 // sext/zext?
14256 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14257 // purposes.
14258 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14259 return CombineResult(
14260 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14261 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14262 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14263 return CombineResult(
14264 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14265 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14266 return std::nullopt;
14267}
14268
14269/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14270///
14271/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14272/// can be used to apply the pattern.
14273static std::optional<CombineResult>
14274canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14275 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14276 const RISCVSubtarget &Subtarget) {
14277 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14278 Subtarget);
14279}
14280
14281/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14282///
14283/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14284/// can be used to apply the pattern.
14285static std::optional<CombineResult>
14286canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14287 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14288 const RISCVSubtarget &Subtarget) {
14289 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14290 Subtarget);
14291}
14292
14293/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14294///
14295/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14296/// can be used to apply the pattern.
14297static std::optional<CombineResult>
14298canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14299 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14300 const RISCVSubtarget &Subtarget) {
14301 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14302 Subtarget);
14303}
14304
14305/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14306///
14307/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14308/// can be used to apply the pattern.
14309static std::optional<CombineResult>
14310canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14311 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14312 const RISCVSubtarget &Subtarget) {
14313
14314 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14315 return std::nullopt;
14316 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14317 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14318 /*RHSExt=*/{ExtKind::ZExt});
14319}
14320
14322NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14323 SmallVector<CombineToTry> Strategies;
14324 switch (Root->getOpcode()) {
14325 case ISD::ADD:
14326 case ISD::SUB:
14327 case ISD::OR:
14328 case RISCVISD::ADD_VL:
14329 case RISCVISD::SUB_VL:
14330 case RISCVISD::FADD_VL:
14331 case RISCVISD::FSUB_VL:
14332 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14333 Strategies.push_back(canFoldToVWWithSameExtension);
14334 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14335 Strategies.push_back(canFoldToVW_W);
14336 break;
14337 case RISCVISD::FMUL_VL:
14338 Strategies.push_back(canFoldToVWWithSameExtension);
14339 break;
14340 case ISD::MUL:
14341 case RISCVISD::MUL_VL:
14342 // mul -> vwmul(u)
14343 Strategies.push_back(canFoldToVWWithSameExtension);
14344 // mul -> vwmulsu
14345 Strategies.push_back(canFoldToVW_SU);
14346 break;
14347 case ISD::SHL:
14348 case RISCVISD::SHL_VL:
14349 // shl -> vwsll
14350 Strategies.push_back(canFoldToVWWithZEXT);
14351 break;
14354 // vwadd_w|vwsub_w -> vwadd|vwsub
14355 Strategies.push_back(canFoldToVWWithSEXT);
14356 break;
14359 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14360 Strategies.push_back(canFoldToVWWithZEXT);
14361 break;
14364 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14365 Strategies.push_back(canFoldToVWWithFPEXT);
14366 break;
14367 default:
14368 llvm_unreachable("Unexpected opcode");
14369 }
14370 return Strategies;
14371}
14372} // End anonymous namespace.
14373
14374/// Combine a binary operation to its equivalent VW or VW_W form.
14375/// The supported combines are:
14376/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14377/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14378/// mul | mul_vl -> vwmul(u) | vwmul_su
14379/// shl | shl_vl -> vwsll
14380/// fadd_vl -> vfwadd | vfwadd_w
14381/// fsub_vl -> vfwsub | vfwsub_w
14382/// fmul_vl -> vfwmul
14383/// vwadd_w(u) -> vwadd(u)
14384/// vwsub_w(u) -> vwsub(u)
14385/// vfwadd_w -> vfwadd
14386/// vfwsub_w -> vfwsub
14389 const RISCVSubtarget &Subtarget) {
14390 SelectionDAG &DAG = DCI.DAG;
14391 if (DCI.isBeforeLegalize())
14392 return SDValue();
14393
14394 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14395 return SDValue();
14396
14397 SmallVector<SDNode *> Worklist;
14398 SmallSet<SDNode *, 8> Inserted;
14399 Worklist.push_back(N);
14400 Inserted.insert(N);
14401 SmallVector<CombineResult> CombinesToApply;
14402
14403 while (!Worklist.empty()) {
14404 SDNode *Root = Worklist.pop_back_val();
14405 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14406 return SDValue();
14407
14408 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14409 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14410 auto AppendUsersIfNeeded = [&Worklist,
14411 &Inserted](const NodeExtensionHelper &Op) {
14412 if (Op.needToPromoteOtherUsers()) {
14413 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14414 if (Inserted.insert(TheUse).second)
14415 Worklist.push_back(TheUse);
14416 }
14417 }
14418 };
14419
14420 // Control the compile time by limiting the number of node we look at in
14421 // total.
14422 if (Inserted.size() > ExtensionMaxWebSize)
14423 return SDValue();
14424
14426 NodeExtensionHelper::getSupportedFoldings(N);
14427
14428 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14429 bool Matched = false;
14430 for (int Attempt = 0;
14431 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14432 ++Attempt) {
14433
14434 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14435 FoldingStrategies) {
14436 std::optional<CombineResult> Res =
14437 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14438 if (Res) {
14439 Matched = true;
14440 CombinesToApply.push_back(*Res);
14441 // All the inputs that are extended need to be folded, otherwise
14442 // we would be leaving the old input (since it is may still be used),
14443 // and the new one.
14444 if (Res->LHSExt.has_value())
14445 AppendUsersIfNeeded(LHS);
14446 if (Res->RHSExt.has_value())
14447 AppendUsersIfNeeded(RHS);
14448 break;
14449 }
14450 }
14451 std::swap(LHS, RHS);
14452 }
14453 // Right now we do an all or nothing approach.
14454 if (!Matched)
14455 return SDValue();
14456 }
14457 // Store the value for the replacement of the input node separately.
14458 SDValue InputRootReplacement;
14459 // We do the RAUW after we materialize all the combines, because some replaced
14460 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14461 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14462 // yet-to-be-visited CombinesToApply roots.
14464 ValuesToReplace.reserve(CombinesToApply.size());
14465 for (CombineResult Res : CombinesToApply) {
14466 SDValue NewValue = Res.materialize(DAG, Subtarget);
14467 if (!InputRootReplacement) {
14468 assert(Res.Root == N &&
14469 "First element is expected to be the current node");
14470 InputRootReplacement = NewValue;
14471 } else {
14472 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14473 }
14474 }
14475 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14476 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14477 DCI.AddToWorklist(OldNewValues.second.getNode());
14478 }
14479 return InputRootReplacement;
14480}
14481
14482// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14483// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14484// y will be the Passthru and cond will be the Mask.
14486 unsigned Opc = N->getOpcode();
14489
14490 SDValue Y = N->getOperand(0);
14491 SDValue MergeOp = N->getOperand(1);
14492 unsigned MergeOpc = MergeOp.getOpcode();
14493
14494 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14495 return SDValue();
14496
14497 SDValue X = MergeOp->getOperand(1);
14498
14499 if (!MergeOp.hasOneUse())
14500 return SDValue();
14501
14502 // Passthru should be undef
14503 SDValue Passthru = N->getOperand(2);
14504 if (!Passthru.isUndef())
14505 return SDValue();
14506
14507 // Mask should be all ones
14508 SDValue Mask = N->getOperand(3);
14509 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14510 return SDValue();
14511
14512 // False value of MergeOp should be all zeros
14513 SDValue Z = MergeOp->getOperand(2);
14514
14515 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14516 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14517 Z = Z.getOperand(1);
14518
14519 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14520 return SDValue();
14521
14522 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14523 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14524 N->getFlags());
14525}
14526
14529 const RISCVSubtarget &Subtarget) {
14530 [[maybe_unused]] unsigned Opc = N->getOpcode();
14533
14534 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14535 return V;
14536
14537 return combineVWADDSUBWSelect(N, DCI.DAG);
14538}
14539
14540// Helper function for performMemPairCombine.
14541// Try to combine the memory loads/stores LSNode1 and LSNode2
14542// into a single memory pair operation.
14544 LSBaseSDNode *LSNode2, SDValue BasePtr,
14545 uint64_t Imm) {
14547 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14548
14549 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14550 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14551 return SDValue();
14552
14554 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14555
14556 // The new operation has twice the width.
14557 MVT XLenVT = Subtarget.getXLenVT();
14558 EVT MemVT = LSNode1->getMemoryVT();
14559 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14560 MachineMemOperand *MMO = LSNode1->getMemOperand();
14562 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14563
14564 if (LSNode1->getOpcode() == ISD::LOAD) {
14565 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14566 unsigned Opcode;
14567 if (MemVT == MVT::i32)
14568 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14569 else
14570 Opcode = RISCVISD::TH_LDD;
14571
14572 SDValue Res = DAG.getMemIntrinsicNode(
14573 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14574 {LSNode1->getChain(), BasePtr,
14575 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14576 NewMemVT, NewMMO);
14577
14578 SDValue Node1 =
14579 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14580 SDValue Node2 =
14581 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14582
14583 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14584 return Node1;
14585 } else {
14586 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14587
14588 SDValue Res = DAG.getMemIntrinsicNode(
14589 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14590 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14591 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14592 NewMemVT, NewMMO);
14593
14594 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14595 return Res;
14596 }
14597}
14598
14599// Try to combine two adjacent loads/stores to a single pair instruction from
14600// the XTHeadMemPair vendor extension.
14603 SelectionDAG &DAG = DCI.DAG;
14605 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14606
14607 // Target does not support load/store pair.
14608 if (!Subtarget.hasVendorXTHeadMemPair())
14609 return SDValue();
14610
14611 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14612 EVT MemVT = LSNode1->getMemoryVT();
14613 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14614
14615 // No volatile, indexed or atomic loads/stores.
14616 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14617 return SDValue();
14618
14619 // Function to get a base + constant representation from a memory value.
14620 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14621 if (Ptr->getOpcode() == ISD::ADD)
14622 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14623 return {Ptr->getOperand(0), C1->getZExtValue()};
14624 return {Ptr, 0};
14625 };
14626
14627 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14628
14629 SDValue Chain = N->getOperand(0);
14630 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14631 UI != UE; ++UI) {
14632 SDUse &Use = UI.getUse();
14633 if (Use.getUser() != N && Use.getResNo() == 0 &&
14634 Use.getUser()->getOpcode() == N->getOpcode()) {
14635 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14636
14637 // No volatile, indexed or atomic loads/stores.
14638 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14639 continue;
14640
14641 // Check if LSNode1 and LSNode2 have the same type and extension.
14642 if (LSNode1->getOpcode() == ISD::LOAD)
14643 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14644 cast<LoadSDNode>(LSNode1)->getExtensionType())
14645 continue;
14646
14647 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14648 continue;
14649
14650 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14651
14652 // Check if the base pointer is the same for both instruction.
14653 if (Base1 != Base2)
14654 continue;
14655
14656 // Check if the offsets match the XTHeadMemPair encoding contraints.
14657 bool Valid = false;
14658 if (MemVT == MVT::i32) {
14659 // Check for adjacent i32 values and a 2-bit index.
14660 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14661 Valid = true;
14662 } else if (MemVT == MVT::i64) {
14663 // Check for adjacent i64 values and a 2-bit index.
14664 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14665 Valid = true;
14666 }
14667
14668 if (!Valid)
14669 continue;
14670
14671 // Try to combine.
14672 if (SDValue Res =
14673 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14674 return Res;
14675 }
14676 }
14677
14678 return SDValue();
14679}
14680
14681// Fold
14682// (fp_to_int (froundeven X)) -> fcvt X, rne
14683// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14684// (fp_to_int (ffloor X)) -> fcvt X, rdn
14685// (fp_to_int (fceil X)) -> fcvt X, rup
14686// (fp_to_int (fround X)) -> fcvt X, rmm
14687// (fp_to_int (frint X)) -> fcvt X
14690 const RISCVSubtarget &Subtarget) {
14691 SelectionDAG &DAG = DCI.DAG;
14692 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14693 MVT XLenVT = Subtarget.getXLenVT();
14694
14695 SDValue Src = N->getOperand(0);
14696
14697 // Don't do this for strict-fp Src.
14698 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14699 return SDValue();
14700
14701 // Ensure the FP type is legal.
14702 if (!TLI.isTypeLegal(Src.getValueType()))
14703 return SDValue();
14704
14705 // Don't do this for f16 with Zfhmin and not Zfh.
14706 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14707 return SDValue();
14708
14709 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14710 // If the result is invalid, we didn't find a foldable instruction.
14711 if (FRM == RISCVFPRndMode::Invalid)
14712 return SDValue();
14713
14714 SDLoc DL(N);
14715 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14716 EVT VT = N->getValueType(0);
14717
14718 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14719 MVT SrcVT = Src.getSimpleValueType();
14720 MVT SrcContainerVT = SrcVT;
14721 MVT ContainerVT = VT.getSimpleVT();
14722 SDValue XVal = Src.getOperand(0);
14723
14724 // For widening and narrowing conversions we just combine it into a
14725 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14726 // end up getting lowered to their appropriate pseudo instructions based on
14727 // their operand types
14728 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14729 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14730 return SDValue();
14731
14732 // Make fixed-length vectors scalable first
14733 if (SrcVT.isFixedLengthVector()) {
14734 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14735 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14736 ContainerVT =
14737 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14738 }
14739
14740 auto [Mask, VL] =
14741 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14742
14743 SDValue FpToInt;
14744 if (FRM == RISCVFPRndMode::RTZ) {
14745 // Use the dedicated trunc static rounding mode if we're truncating so we
14746 // don't need to generate calls to fsrmi/fsrm
14747 unsigned Opc =
14749 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14750 } else if (FRM == RISCVFPRndMode::DYN) {
14751 unsigned Opc =
14753 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14754 } else {
14755 unsigned Opc =
14757 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14758 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14759 }
14760
14761 // If converted from fixed-length to scalable, convert back
14762 if (VT.isFixedLengthVector())
14763 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14764
14765 return FpToInt;
14766 }
14767
14768 // Only handle XLen or i32 types. Other types narrower than XLen will
14769 // eventually be legalized to XLenVT.
14770 if (VT != MVT::i32 && VT != XLenVT)
14771 return SDValue();
14772
14773 unsigned Opc;
14774 if (VT == XLenVT)
14775 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14776 else
14778
14779 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14780 DAG.getTargetConstant(FRM, DL, XLenVT));
14781 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14782}
14783
14784// Fold
14785// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14786// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14787// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14788// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14789// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14790// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14793 const RISCVSubtarget &Subtarget) {
14794 SelectionDAG &DAG = DCI.DAG;
14795 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14796 MVT XLenVT = Subtarget.getXLenVT();
14797
14798 // Only handle XLen types. Other types narrower than XLen will eventually be
14799 // legalized to XLenVT.
14800 EVT DstVT = N->getValueType(0);
14801 if (DstVT != XLenVT)
14802 return SDValue();
14803
14804 SDValue Src = N->getOperand(0);
14805
14806 // Don't do this for strict-fp Src.
14807 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14808 return SDValue();
14809
14810 // Ensure the FP type is also legal.
14811 if (!TLI.isTypeLegal(Src.getValueType()))
14812 return SDValue();
14813
14814 // Don't do this for f16 with Zfhmin and not Zfh.
14815 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14816 return SDValue();
14817
14818 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14819
14820 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14821 if (FRM == RISCVFPRndMode::Invalid)
14822 return SDValue();
14823
14824 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14825
14826 unsigned Opc;
14827 if (SatVT == DstVT)
14828 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14829 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14831 else
14832 return SDValue();
14833 // FIXME: Support other SatVTs by clamping before or after the conversion.
14834
14835 Src = Src.getOperand(0);
14836
14837 SDLoc DL(N);
14838 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
14839 DAG.getTargetConstant(FRM, DL, XLenVT));
14840
14841 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14842 // extend.
14843 if (Opc == RISCVISD::FCVT_WU_RV64)
14844 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14845
14846 // RISC-V FP-to-int conversions saturate to the destination register size, but
14847 // don't produce 0 for nan.
14848 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14849 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14850}
14851
14852// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14853// smaller than XLenVT.
14855 const RISCVSubtarget &Subtarget) {
14856 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14857
14858 SDValue Src = N->getOperand(0);
14859 if (Src.getOpcode() != ISD::BSWAP)
14860 return SDValue();
14861
14862 EVT VT = N->getValueType(0);
14863 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14864 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14865 return SDValue();
14866
14867 SDLoc DL(N);
14868 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14869}
14870
14871// Convert from one FMA opcode to another based on whether we are negating the
14872// multiply result and/or the accumulator.
14873// NOTE: Only supports RVV operations with VL.
14874static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14875 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14876 if (NegMul) {
14877 // clang-format off
14878 switch (Opcode) {
14879 default: llvm_unreachable("Unexpected opcode");
14880 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14881 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14882 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14883 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14888 }
14889 // clang-format on
14890 }
14891
14892 // Negating the accumulator changes ADD<->SUB.
14893 if (NegAcc) {
14894 // clang-format off
14895 switch (Opcode) {
14896 default: llvm_unreachable("Unexpected opcode");
14897 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14898 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14899 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14900 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14905 }
14906 // clang-format on
14907 }
14908
14909 return Opcode;
14910}
14911
14913 // Fold FNEG_VL into FMA opcodes.
14914 // The first operand of strict-fp is chain.
14915 unsigned Offset = N->isTargetStrictFPOpcode();
14916 SDValue A = N->getOperand(0 + Offset);
14917 SDValue B = N->getOperand(1 + Offset);
14918 SDValue C = N->getOperand(2 + Offset);
14919 SDValue Mask = N->getOperand(3 + Offset);
14920 SDValue VL = N->getOperand(4 + Offset);
14921
14922 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14923 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
14924 V.getOperand(2) == VL) {
14925 // Return the negated input.
14926 V = V.getOperand(0);
14927 return true;
14928 }
14929
14930 return false;
14931 };
14932
14933 bool NegA = invertIfNegative(A);
14934 bool NegB = invertIfNegative(B);
14935 bool NegC = invertIfNegative(C);
14936
14937 // If no operands are negated, we're done.
14938 if (!NegA && !NegB && !NegC)
14939 return SDValue();
14940
14941 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
14942 if (N->isTargetStrictFPOpcode())
14943 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
14944 {N->getOperand(0), A, B, C, Mask, VL});
14945 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
14946 VL);
14947}
14948
14950 const RISCVSubtarget &Subtarget) {
14952 return V;
14953
14954 if (N->getValueType(0).isScalableVector() &&
14955 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14956 (Subtarget.hasVInstructionsF16Minimal() &&
14957 !Subtarget.hasVInstructionsF16())) {
14958 return SDValue();
14959 }
14960
14961 // FIXME: Ignore strict opcodes for now.
14962 if (N->isTargetStrictFPOpcode())
14963 return SDValue();
14964
14965 // Try to form widening FMA.
14966 SDValue Op0 = N->getOperand(0);
14967 SDValue Op1 = N->getOperand(1);
14968 SDValue Mask = N->getOperand(3);
14969 SDValue VL = N->getOperand(4);
14970
14971 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14973 return SDValue();
14974
14975 // TODO: Refactor to handle more complex cases similar to
14976 // combineBinOp_VLToVWBinOp_VL.
14977 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14978 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14979 return SDValue();
14980
14981 // Check the mask and VL are the same.
14982 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
14983 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
14984 return SDValue();
14985
14986 unsigned NewOpc;
14987 switch (N->getOpcode()) {
14988 default:
14989 llvm_unreachable("Unexpected opcode");
14991 NewOpc = RISCVISD::VFWMADD_VL;
14992 break;
14994 NewOpc = RISCVISD::VFWNMSUB_VL;
14995 break;
14997 NewOpc = RISCVISD::VFWNMADD_VL;
14998 break;
15000 NewOpc = RISCVISD::VFWMSUB_VL;
15001 break;
15002 }
15003
15004 Op0 = Op0.getOperand(0);
15005 Op1 = Op1.getOperand(0);
15006
15007 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15008 N->getOperand(2), Mask, VL);
15009}
15010
15012 const RISCVSubtarget &Subtarget) {
15013 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15014
15015 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15016 return SDValue();
15017
15018 if (!isa<ConstantSDNode>(N->getOperand(1)))
15019 return SDValue();
15020 uint64_t ShAmt = N->getConstantOperandVal(1);
15021 if (ShAmt > 32)
15022 return SDValue();
15023
15024 SDValue N0 = N->getOperand(0);
15025
15026 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15027 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15028 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15029 if (ShAmt < 32 &&
15030 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15031 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15032 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15033 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15034 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15035 if (LShAmt < 32) {
15036 SDLoc ShlDL(N0.getOperand(0));
15037 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15038 N0.getOperand(0).getOperand(0),
15039 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15040 SDLoc DL(N);
15041 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15042 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15043 }
15044 }
15045
15046 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15047 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15048 //
15049 // Also try these folds where an add or sub is in the middle.
15050 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15051 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15052 SDValue Shl;
15053 ConstantSDNode *AddC = nullptr;
15054
15055 // We might have an ADD or SUB between the SRA and SHL.
15056 bool IsAdd = N0.getOpcode() == ISD::ADD;
15057 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15058 // Other operand needs to be a constant we can modify.
15059 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15060 if (!AddC)
15061 return SDValue();
15062
15063 // AddC needs to have at least 32 trailing zeros.
15064 if (AddC->getAPIntValue().countr_zero() < 32)
15065 return SDValue();
15066
15067 // All users should be a shift by constant less than or equal to 32. This
15068 // ensures we'll do this optimization for each of them to produce an
15069 // add/sub+sext_inreg they can all share.
15070 for (SDNode *U : N0->uses()) {
15071 if (U->getOpcode() != ISD::SRA ||
15072 !isa<ConstantSDNode>(U->getOperand(1)) ||
15073 U->getConstantOperandVal(1) > 32)
15074 return SDValue();
15075 }
15076
15077 Shl = N0.getOperand(IsAdd ? 0 : 1);
15078 } else {
15079 // Not an ADD or SUB.
15080 Shl = N0;
15081 }
15082
15083 // Look for a shift left by 32.
15084 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15085 Shl.getConstantOperandVal(1) != 32)
15086 return SDValue();
15087
15088 // We if we didn't look through an add/sub, then the shl should have one use.
15089 // If we did look through an add/sub, the sext_inreg we create is free so
15090 // we're only creating 2 new instructions. It's enough to only remove the
15091 // original sra+add/sub.
15092 if (!AddC && !Shl.hasOneUse())
15093 return SDValue();
15094
15095 SDLoc DL(N);
15096 SDValue In = Shl.getOperand(0);
15097
15098 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15099 // constant.
15100 if (AddC) {
15101 SDValue ShiftedAddC =
15102 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15103 if (IsAdd)
15104 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15105 else
15106 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15107 }
15108
15109 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15110 DAG.getValueType(MVT::i32));
15111 if (ShAmt == 32)
15112 return SExt;
15113
15114 return DAG.getNode(
15115 ISD::SHL, DL, MVT::i64, SExt,
15116 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15117}
15118
15119// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15120// the result is used as the conditon of a br_cc or select_cc we can invert,
15121// inverting the setcc is free, and Z is 0/1. Caller will invert the
15122// br_cc/select_cc.
15124 bool IsAnd = Cond.getOpcode() == ISD::AND;
15125 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15126 return SDValue();
15127
15128 if (!Cond.hasOneUse())
15129 return SDValue();
15130
15131 SDValue Setcc = Cond.getOperand(0);
15132 SDValue Xor = Cond.getOperand(1);
15133 // Canonicalize setcc to LHS.
15134 if (Setcc.getOpcode() != ISD::SETCC)
15135 std::swap(Setcc, Xor);
15136 // LHS should be a setcc and RHS should be an xor.
15137 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15138 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15139 return SDValue();
15140
15141 // If the condition is an And, SimplifyDemandedBits may have changed
15142 // (xor Z, 1) to (not Z).
15143 SDValue Xor1 = Xor.getOperand(1);
15144 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15145 return SDValue();
15146
15147 EVT VT = Cond.getValueType();
15148 SDValue Xor0 = Xor.getOperand(0);
15149
15150 // The LHS of the xor needs to be 0/1.
15152 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15153 return SDValue();
15154
15155 // We can only invert integer setccs.
15156 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15157 if (!SetCCOpVT.isScalarInteger())
15158 return SDValue();
15159
15160 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15161 if (ISD::isIntEqualitySetCC(CCVal)) {
15162 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15163 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15164 Setcc.getOperand(1), CCVal);
15165 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15166 // Invert (setlt 0, X) by converting to (setlt X, 1).
15167 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15168 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15169 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15170 // (setlt X, 1) by converting to (setlt 0, X).
15171 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15172 DAG.getConstant(0, SDLoc(Setcc), VT),
15173 Setcc.getOperand(0), CCVal);
15174 } else
15175 return SDValue();
15176
15177 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15178 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15179}
15180
15181// Perform common combines for BR_CC and SELECT_CC condtions.
15182static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15183 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15184 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15185
15186 // As far as arithmetic right shift always saves the sign,
15187 // shift can be omitted.
15188 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15189 // setge (sra X, N), 0 -> setge X, 0
15190 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15191 LHS.getOpcode() == ISD::SRA) {
15192 LHS = LHS.getOperand(0);
15193 return true;
15194 }
15195
15196 if (!ISD::isIntEqualitySetCC(CCVal))
15197 return false;
15198
15199 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15200 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15201 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15202 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15203 // If we're looking for eq 0 instead of ne 0, we need to invert the
15204 // condition.
15205 bool Invert = CCVal == ISD::SETEQ;
15206 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15207 if (Invert)
15208 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15209
15210 RHS = LHS.getOperand(1);
15211 LHS = LHS.getOperand(0);
15212 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15213
15214 CC = DAG.getCondCode(CCVal);
15215 return true;
15216 }
15217
15218 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15219 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15220 RHS = LHS.getOperand(1);
15221 LHS = LHS.getOperand(0);
15222 return true;
15223 }
15224
15225 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15226 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15227 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15228 SDValue LHS0 = LHS.getOperand(0);
15229 if (LHS0.getOpcode() == ISD::AND &&
15230 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15231 uint64_t Mask = LHS0.getConstantOperandVal(1);
15232 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15233 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15234 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15235 CC = DAG.getCondCode(CCVal);
15236
15237 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15238 LHS = LHS0.getOperand(0);
15239 if (ShAmt != 0)
15240 LHS =
15241 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15242 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15243 return true;
15244 }
15245 }
15246 }
15247
15248 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15249 // This can occur when legalizing some floating point comparisons.
15250 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15251 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15252 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15253 CC = DAG.getCondCode(CCVal);
15254 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15255 return true;
15256 }
15257
15258 if (isNullConstant(RHS)) {
15259 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15260 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15261 CC = DAG.getCondCode(CCVal);
15262 LHS = NewCond;
15263 return true;
15264 }
15265 }
15266
15267 return false;
15268}
15269
15270// Fold
15271// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15272// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15273// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15274// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15276 SDValue TrueVal, SDValue FalseVal,
15277 bool Swapped) {
15278 bool Commutative = true;
15279 unsigned Opc = TrueVal.getOpcode();
15280 switch (Opc) {
15281 default:
15282 return SDValue();
15283 case ISD::SHL:
15284 case ISD::SRA:
15285 case ISD::SRL:
15286 case ISD::SUB:
15287 Commutative = false;
15288 break;
15289 case ISD::ADD:
15290 case ISD::OR:
15291 case ISD::XOR:
15292 break;
15293 }
15294
15295 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15296 return SDValue();
15297
15298 unsigned OpToFold;
15299 if (FalseVal == TrueVal.getOperand(0))
15300 OpToFold = 0;
15301 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15302 OpToFold = 1;
15303 else
15304 return SDValue();
15305
15306 EVT VT = N->getValueType(0);
15307 SDLoc DL(N);
15308 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15309 EVT OtherOpVT = OtherOp->getValueType(0);
15310 SDValue IdentityOperand =
15311 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15312 if (!Commutative)
15313 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15314 assert(IdentityOperand && "No identity operand!");
15315
15316 if (Swapped)
15317 std::swap(OtherOp, IdentityOperand);
15318 SDValue NewSel =
15319 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15320 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15321}
15322
15323// This tries to get rid of `select` and `icmp` that are being used to handle
15324// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15326 SDValue Cond = N->getOperand(0);
15327
15328 // This represents either CTTZ or CTLZ instruction.
15329 SDValue CountZeroes;
15330
15331 SDValue ValOnZero;
15332
15333 if (Cond.getOpcode() != ISD::SETCC)
15334 return SDValue();
15335
15336 if (!isNullConstant(Cond->getOperand(1)))
15337 return SDValue();
15338
15339 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15340 if (CCVal == ISD::CondCode::SETEQ) {
15341 CountZeroes = N->getOperand(2);
15342 ValOnZero = N->getOperand(1);
15343 } else if (CCVal == ISD::CondCode::SETNE) {
15344 CountZeroes = N->getOperand(1);
15345 ValOnZero = N->getOperand(2);
15346 } else {
15347 return SDValue();
15348 }
15349
15350 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15351 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15352 CountZeroes = CountZeroes.getOperand(0);
15353
15354 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15355 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15356 CountZeroes.getOpcode() != ISD::CTLZ &&
15357 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15358 return SDValue();
15359
15360 if (!isNullConstant(ValOnZero))
15361 return SDValue();
15362
15363 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15364 if (Cond->getOperand(0) != CountZeroesArgument)
15365 return SDValue();
15366
15367 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15368 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15369 CountZeroes.getValueType(), CountZeroesArgument);
15370 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15371 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15372 CountZeroes.getValueType(), CountZeroesArgument);
15373 }
15374
15375 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15376 SDValue BitWidthMinusOne =
15377 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15378
15379 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15380 CountZeroes, BitWidthMinusOne);
15381 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15382}
15383
15385 const RISCVSubtarget &Subtarget) {
15386 SDValue Cond = N->getOperand(0);
15387 SDValue True = N->getOperand(1);
15388 SDValue False = N->getOperand(2);
15389 SDLoc DL(N);
15390 EVT VT = N->getValueType(0);
15391 EVT CondVT = Cond.getValueType();
15392
15393 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15394 return SDValue();
15395
15396 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15397 // BEXTI, where C is power of 2.
15398 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15399 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15400 SDValue LHS = Cond.getOperand(0);
15401 SDValue RHS = Cond.getOperand(1);
15402 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15403 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15404 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15405 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15406 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15407 return DAG.getSelect(DL, VT,
15408 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15409 False, True);
15410 }
15411 }
15412 return SDValue();
15413}
15414
15416 const RISCVSubtarget &Subtarget) {
15417 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15418 return Folded;
15419
15420 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15421 return V;
15422
15423 if (Subtarget.hasConditionalMoveFusion())
15424 return SDValue();
15425
15426 SDValue TrueVal = N->getOperand(1);
15427 SDValue FalseVal = N->getOperand(2);
15428 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15429 return V;
15430 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15431}
15432
15433/// If we have a build_vector where each lane is binop X, C, where C
15434/// is a constant (but not necessarily the same constant on all lanes),
15435/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15436/// We assume that materializing a constant build vector will be no more
15437/// expensive that performing O(n) binops.
15439 const RISCVSubtarget &Subtarget,
15440 const RISCVTargetLowering &TLI) {
15441 SDLoc DL(N);
15442 EVT VT = N->getValueType(0);
15443
15444 assert(!VT.isScalableVector() && "unexpected build vector");
15445
15446 if (VT.getVectorNumElements() == 1)
15447 return SDValue();
15448
15449 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15450 if (!TLI.isBinOp(Opcode))
15451 return SDValue();
15452
15453 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15454 return SDValue();
15455
15456 // This BUILD_VECTOR involves an implicit truncation, and sinking
15457 // truncates through binops is non-trivial.
15458 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15459 return SDValue();
15460
15461 SmallVector<SDValue> LHSOps;
15462 SmallVector<SDValue> RHSOps;
15463 for (SDValue Op : N->ops()) {
15464 if (Op.isUndef()) {
15465 // We can't form a divide or remainder from undef.
15466 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15467 return SDValue();
15468
15469 LHSOps.push_back(Op);
15470 RHSOps.push_back(Op);
15471 continue;
15472 }
15473
15474 // TODO: We can handle operations which have an neutral rhs value
15475 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15476 // of profit in a more explicit manner.
15477 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15478 return SDValue();
15479
15480 LHSOps.push_back(Op.getOperand(0));
15481 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15482 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15483 return SDValue();
15484 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15485 // have different LHS and RHS types.
15486 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15487 return SDValue();
15488
15489 RHSOps.push_back(Op.getOperand(1));
15490 }
15491
15492 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15493 DAG.getBuildVector(VT, DL, RHSOps));
15494}
15495
15497 const RISCVSubtarget &Subtarget,
15498 const RISCVTargetLowering &TLI) {
15499 SDValue InVec = N->getOperand(0);
15500 SDValue InVal = N->getOperand(1);
15501 SDValue EltNo = N->getOperand(2);
15502 SDLoc DL(N);
15503
15504 EVT VT = InVec.getValueType();
15505 if (VT.isScalableVector())
15506 return SDValue();
15507
15508 if (!InVec.hasOneUse())
15509 return SDValue();
15510
15511 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15512 // move the insert_vector_elts into the arms of the binop. Note that
15513 // the new RHS must be a constant.
15514 const unsigned InVecOpcode = InVec->getOpcode();
15515 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15516 InVal.hasOneUse()) {
15517 SDValue InVecLHS = InVec->getOperand(0);
15518 SDValue InVecRHS = InVec->getOperand(1);
15519 SDValue InValLHS = InVal->getOperand(0);
15520 SDValue InValRHS = InVal->getOperand(1);
15521
15523 return SDValue();
15524 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15525 return SDValue();
15526 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15527 // have different LHS and RHS types.
15528 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15529 return SDValue();
15531 InVecLHS, InValLHS, EltNo);
15533 InVecRHS, InValRHS, EltNo);
15534 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15535 }
15536
15537 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15538 // move the insert_vector_elt to the source operand of the concat_vector.
15539 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15540 return SDValue();
15541
15542 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15543 if (!IndexC)
15544 return SDValue();
15545 unsigned Elt = IndexC->getZExtValue();
15546
15547 EVT ConcatVT = InVec.getOperand(0).getValueType();
15548 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15549 return SDValue();
15550 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15551 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15552
15553 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15554 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15555 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15556 ConcatOp, InVal, NewIdx);
15557
15558 SmallVector<SDValue> ConcatOps;
15559 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15560 ConcatOps[ConcatOpIdx] = ConcatOp;
15561 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15562}
15563
15564// If we're concatenating a series of vector loads like
15565// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15566// Then we can turn this into a strided load by widening the vector elements
15567// vlse32 p, stride=n
15569 const RISCVSubtarget &Subtarget,
15570 const RISCVTargetLowering &TLI) {
15571 SDLoc DL(N);
15572 EVT VT = N->getValueType(0);
15573
15574 // Only perform this combine on legal MVTs.
15575 if (!TLI.isTypeLegal(VT))
15576 return SDValue();
15577
15578 // TODO: Potentially extend this to scalable vectors
15579 if (VT.isScalableVector())
15580 return SDValue();
15581
15582 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15583 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15584 !SDValue(BaseLd, 0).hasOneUse())
15585 return SDValue();
15586
15587 EVT BaseLdVT = BaseLd->getValueType(0);
15588
15589 // Go through the loads and check that they're strided
15591 Lds.push_back(BaseLd);
15592 Align Align = BaseLd->getAlign();
15593 for (SDValue Op : N->ops().drop_front()) {
15594 auto *Ld = dyn_cast<LoadSDNode>(Op);
15595 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15596 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15597 Ld->getValueType(0) != BaseLdVT)
15598 return SDValue();
15599
15600 Lds.push_back(Ld);
15601
15602 // The common alignment is the most restrictive (smallest) of all the loads
15603 Align = std::min(Align, Ld->getAlign());
15604 }
15605
15606 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15607 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15608 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15609 // If the load ptrs can be decomposed into a common (Base + Index) with a
15610 // common constant stride, then return the constant stride.
15611 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15612 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15613 if (BIO1.equalBaseIndex(BIO2, DAG))
15614 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15615
15616 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15617 SDValue P1 = Ld1->getBasePtr();
15618 SDValue P2 = Ld2->getBasePtr();
15619 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15620 return {{P2.getOperand(1), false}};
15621 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15622 return {{P1.getOperand(1), true}};
15623
15624 return std::nullopt;
15625 };
15626
15627 // Get the distance between the first and second loads
15628 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15629 if (!BaseDiff)
15630 return SDValue();
15631
15632 // Check all the loads are the same distance apart
15633 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15634 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15635 return SDValue();
15636
15637 // TODO: At this point, we've successfully matched a generalized gather
15638 // load. Maybe we should emit that, and then move the specialized
15639 // matchers above and below into a DAG combine?
15640
15641 // Get the widened scalar type, e.g. v4i8 -> i64
15642 unsigned WideScalarBitWidth =
15643 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15644 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15645
15646 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15647 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15648 if (!TLI.isTypeLegal(WideVecVT))
15649 return SDValue();
15650
15651 // Check that the operation is legal
15652 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15653 return SDValue();
15654
15655 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15656 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15657 ? std::get<SDValue>(StrideVariant)
15658 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15659 Lds[0]->getOffset().getValueType());
15660 if (MustNegateStride)
15661 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15662
15663 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15664 SDValue IntID =
15665 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15666 Subtarget.getXLenVT());
15667
15668 SDValue AllOneMask =
15669 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15670 DAG.getConstant(1, DL, MVT::i1));
15671
15672 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15673 BaseLd->getBasePtr(), Stride, AllOneMask};
15674
15675 uint64_t MemSize;
15676 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15677 ConstStride && ConstStride->getSExtValue() >= 0)
15678 // total size = (elsize * n) + (stride - elsize) * (n-1)
15679 // = elsize + stride * (n-1)
15680 MemSize = WideScalarVT.getSizeInBits() +
15681 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15682 else
15683 // If Stride isn't constant, then we can't know how much it will load
15685
15687 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15688 Align);
15689
15690 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15691 Ops, WideVecVT, MMO);
15692 for (SDValue Ld : N->ops())
15693 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15694
15695 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15696}
15697
15699 const RISCVSubtarget &Subtarget) {
15700
15701 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15702
15703 if (N->getValueType(0).isFixedLengthVector())
15704 return SDValue();
15705
15706 SDValue Addend = N->getOperand(0);
15707 SDValue MulOp = N->getOperand(1);
15708
15709 if (N->getOpcode() == RISCVISD::ADD_VL) {
15710 SDValue AddMergeOp = N->getOperand(2);
15711 if (!AddMergeOp.isUndef())
15712 return SDValue();
15713 }
15714
15715 auto IsVWMulOpc = [](unsigned Opc) {
15716 switch (Opc) {
15717 case RISCVISD::VWMUL_VL:
15720 return true;
15721 default:
15722 return false;
15723 }
15724 };
15725
15726 if (!IsVWMulOpc(MulOp.getOpcode()))
15727 std::swap(Addend, MulOp);
15728
15729 if (!IsVWMulOpc(MulOp.getOpcode()))
15730 return SDValue();
15731
15732 SDValue MulMergeOp = MulOp.getOperand(2);
15733
15734 if (!MulMergeOp.isUndef())
15735 return SDValue();
15736
15737 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15738 const RISCVSubtarget &Subtarget) {
15739 if (N->getOpcode() == ISD::ADD) {
15740 SDLoc DL(N);
15741 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15742 Subtarget);
15743 }
15744 return std::make_pair(N->getOperand(3), N->getOperand(4));
15745 }(N, DAG, Subtarget);
15746
15747 SDValue MulMask = MulOp.getOperand(3);
15748 SDValue MulVL = MulOp.getOperand(4);
15749
15750 if (AddMask != MulMask || AddVL != MulVL)
15751 return SDValue();
15752
15753 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15754 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15755 "Unexpected opcode after VWMACC_VL");
15756 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15757 "Unexpected opcode after VWMACC_VL!");
15758 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15759 "Unexpected opcode after VWMUL_VL!");
15760 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15761 "Unexpected opcode after VWMUL_VL!");
15762
15763 SDLoc DL(N);
15764 EVT VT = N->getValueType(0);
15765 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15766 AddVL};
15767 return DAG.getNode(Opc, DL, VT, Ops);
15768}
15769
15771 ISD::MemIndexType &IndexType,
15773 if (!DCI.isBeforeLegalize())
15774 return false;
15775
15776 SelectionDAG &DAG = DCI.DAG;
15777 const MVT XLenVT =
15778 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15779
15780 const EVT IndexVT = Index.getValueType();
15781
15782 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15783 // mode, so anything else must be manually legalized.
15784 if (!isIndexTypeSigned(IndexType))
15785 return false;
15786
15787 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15788 // Any index legalization should first promote to XLenVT, so we don't lose
15789 // bits when scaling. This may create an illegal index type so we let
15790 // LLVM's legalization take care of the splitting.
15791 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15793 IndexVT.changeVectorElementType(XLenVT), Index);
15794 }
15795 IndexType = ISD::UNSIGNED_SCALED;
15796 return true;
15797}
15798
15799/// Match the index vector of a scatter or gather node as the shuffle mask
15800/// which performs the rearrangement if possible. Will only match if
15801/// all lanes are touched, and thus replacing the scatter or gather with
15802/// a unit strided access and shuffle is legal.
15804 SmallVector<int> &ShuffleMask) {
15805 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15806 return false;
15808 return false;
15809
15810 const unsigned ElementSize = VT.getScalarStoreSize();
15811 const unsigned NumElems = VT.getVectorNumElements();
15812
15813 // Create the shuffle mask and check all bits active
15814 assert(ShuffleMask.empty());
15815 BitVector ActiveLanes(NumElems);
15816 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15817 // TODO: We've found an active bit of UB, and could be
15818 // more aggressive here if desired.
15819 if (Index->getOperand(i)->isUndef())
15820 return false;
15821 uint64_t C = Index->getConstantOperandVal(i);
15822 if (C % ElementSize != 0)
15823 return false;
15824 C = C / ElementSize;
15825 if (C >= NumElems)
15826 return false;
15827 ShuffleMask.push_back(C);
15828 ActiveLanes.set(C);
15829 }
15830 return ActiveLanes.all();
15831}
15832
15833/// Match the index of a gather or scatter operation as an operation
15834/// with twice the element width and half the number of elements. This is
15835/// generally profitable (if legal) because these operations are linear
15836/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15837/// come out ahead.
15839 Align BaseAlign, const RISCVSubtarget &ST) {
15840 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15841 return false;
15843 return false;
15844
15845 // Attempt a doubling. If we can use a element type 4x or 8x in
15846 // size, this will happen via multiply iterations of the transform.
15847 const unsigned NumElems = VT.getVectorNumElements();
15848 if (NumElems % 2 != 0)
15849 return false;
15850
15851 const unsigned ElementSize = VT.getScalarStoreSize();
15852 const unsigned WiderElementSize = ElementSize * 2;
15853 if (WiderElementSize > ST.getELen()/8)
15854 return false;
15855
15856 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15857 return false;
15858
15859 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15860 // TODO: We've found an active bit of UB, and could be
15861 // more aggressive here if desired.
15862 if (Index->getOperand(i)->isUndef())
15863 return false;
15864 // TODO: This offset check is too strict if we support fully
15865 // misaligned memory operations.
15866 uint64_t C = Index->getConstantOperandVal(i);
15867 if (i % 2 == 0) {
15868 if (C % WiderElementSize != 0)
15869 return false;
15870 continue;
15871 }
15872 uint64_t Last = Index->getConstantOperandVal(i-1);
15873 if (C != Last + ElementSize)
15874 return false;
15875 }
15876 return true;
15877}
15878
15879
15881 DAGCombinerInfo &DCI) const {
15882 SelectionDAG &DAG = DCI.DAG;
15883 const MVT XLenVT = Subtarget.getXLenVT();
15884 SDLoc DL(N);
15885
15886 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15887 // bits are demanded. N will be added to the Worklist if it was not deleted.
15888 // Caller should return SDValue(N, 0) if this returns true.
15889 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15890 SDValue Op = N->getOperand(OpNo);
15891 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
15892 if (!SimplifyDemandedBits(Op, Mask, DCI))
15893 return false;
15894
15895 if (N->getOpcode() != ISD::DELETED_NODE)
15896 DCI.AddToWorklist(N);
15897 return true;
15898 };
15899
15900 switch (N->getOpcode()) {
15901 default:
15902 break;
15903 case RISCVISD::SplitF64: {
15904 SDValue Op0 = N->getOperand(0);
15905 // If the input to SplitF64 is just BuildPairF64 then the operation is
15906 // redundant. Instead, use BuildPairF64's operands directly.
15907 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15908 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
15909
15910 if (Op0->isUndef()) {
15911 SDValue Lo = DAG.getUNDEF(MVT::i32);
15912 SDValue Hi = DAG.getUNDEF(MVT::i32);
15913 return DCI.CombineTo(N, Lo, Hi);
15914 }
15915
15916 // It's cheaper to materialise two 32-bit integers than to load a double
15917 // from the constant pool and transfer it to integer registers through the
15918 // stack.
15919 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
15920 APInt V = C->getValueAPF().bitcastToAPInt();
15921 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15922 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15923 return DCI.CombineTo(N, Lo, Hi);
15924 }
15925
15926 // This is a target-specific version of a DAGCombine performed in
15927 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15928 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15929 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15930 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15931 !Op0.getNode()->hasOneUse())
15932 break;
15933 SDValue NewSplitF64 =
15934 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15935 Op0.getOperand(0));
15936 SDValue Lo = NewSplitF64.getValue(0);
15937 SDValue Hi = NewSplitF64.getValue(1);
15938 APInt SignBit = APInt::getSignMask(32);
15939 if (Op0.getOpcode() == ISD::FNEG) {
15940 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15941 DAG.getConstant(SignBit, DL, MVT::i32));
15942 return DCI.CombineTo(N, Lo, NewHi);
15943 }
15944 assert(Op0.getOpcode() == ISD::FABS);
15945 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15946 DAG.getConstant(~SignBit, DL, MVT::i32));
15947 return DCI.CombineTo(N, Lo, NewHi);
15948 }
15949 case RISCVISD::SLLW:
15950 case RISCVISD::SRAW:
15951 case RISCVISD::SRLW:
15952 case RISCVISD::RORW:
15953 case RISCVISD::ROLW: {
15954 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15955 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15956 SimplifyDemandedLowBitsHelper(1, 5))
15957 return SDValue(N, 0);
15958
15959 break;
15960 }
15961 case RISCVISD::CLZW:
15962 case RISCVISD::CTZW: {
15963 // Only the lower 32 bits of the first operand are read
15964 if (SimplifyDemandedLowBitsHelper(0, 32))
15965 return SDValue(N, 0);
15966 break;
15967 }
15969 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15970 // conversion is unnecessary and can be replaced with the
15971 // FMV_X_ANYEXTW_RV64 operand.
15972 SDValue Op0 = N->getOperand(0);
15974 return Op0.getOperand(0);
15975 break;
15976 }
15979 SDLoc DL(N);
15980 SDValue Op0 = N->getOperand(0);
15981 MVT VT = N->getSimpleValueType(0);
15982 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15983 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15984 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15985 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
15986 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
15987 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
15988 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
15989 assert(Op0.getOperand(0).getValueType() == VT &&
15990 "Unexpected value type!");
15991 return Op0.getOperand(0);
15992 }
15993
15994 // This is a target-specific version of a DAGCombine performed in
15995 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15996 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15997 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15998 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15999 !Op0.getNode()->hasOneUse())
16000 break;
16001 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16002 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16003 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16004 if (Op0.getOpcode() == ISD::FNEG)
16005 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16006 DAG.getConstant(SignBit, DL, VT));
16007
16008 assert(Op0.getOpcode() == ISD::FABS);
16009 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16010 DAG.getConstant(~SignBit, DL, VT));
16011 }
16012 case ISD::ABS: {
16013 EVT VT = N->getValueType(0);
16014 SDValue N0 = N->getOperand(0);
16015 // abs (sext) -> zext (abs)
16016 // abs (zext) -> zext (handled elsewhere)
16017 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16018 SDValue Src = N0.getOperand(0);
16019 SDLoc DL(N);
16020 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16021 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16022 }
16023 break;
16024 }
16025 case ISD::ADD: {
16026 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16027 return V;
16028 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16029 return V;
16030 return performADDCombine(N, DAG, Subtarget);
16031 }
16032 case ISD::SUB: {
16033 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16034 return V;
16035 return performSUBCombine(N, DAG, Subtarget);
16036 }
16037 case ISD::AND:
16038 return performANDCombine(N, DCI, Subtarget);
16039 case ISD::OR: {
16040 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16041 return V;
16042 return performORCombine(N, DCI, Subtarget);
16043 }
16044 case ISD::XOR:
16045 return performXORCombine(N, DAG, Subtarget);
16046 case ISD::MUL:
16047 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16048 return V;
16049 return performMULCombine(N, DAG, DCI, Subtarget);
16050 case ISD::SDIV:
16051 case ISD::UDIV:
16052 case ISD::SREM:
16053 case ISD::UREM:
16054 if (SDValue V = combineBinOpOfZExt(N, DAG))
16055 return V;
16056 break;
16057 case ISD::FADD:
16058 case ISD::UMAX:
16059 case ISD::UMIN:
16060 case ISD::SMAX:
16061 case ISD::SMIN:
16062 case ISD::FMAXNUM:
16063 case ISD::FMINNUM: {
16064 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16065 return V;
16066 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16067 return V;
16068 return SDValue();
16069 }
16070 case ISD::SETCC:
16071 return performSETCCCombine(N, DAG, Subtarget);
16073 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16074 case ISD::ZERO_EXTEND:
16075 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16076 // type legalization. This is safe because fp_to_uint produces poison if
16077 // it overflows.
16078 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16079 SDValue Src = N->getOperand(0);
16080 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16081 isTypeLegal(Src.getOperand(0).getValueType()))
16082 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16083 Src.getOperand(0));
16084 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16085 isTypeLegal(Src.getOperand(1).getValueType())) {
16086 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16087 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16088 Src.getOperand(0), Src.getOperand(1));
16089 DCI.CombineTo(N, Res);
16090 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16091 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16092 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16093 }
16094 }
16095 return SDValue();
16097 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16098 // This would be benefit for the cases where X and Y are both the same value
16099 // type of low precision vectors. Since the truncate would be lowered into
16100 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16101 // restriction, such pattern would be expanded into a series of "vsetvli"
16102 // and "vnsrl" instructions later to reach this point.
16103 auto IsTruncNode = [](SDValue V) {
16104 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16105 return false;
16106 SDValue VL = V.getOperand(2);
16107 auto *C = dyn_cast<ConstantSDNode>(VL);
16108 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16109 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16110 (isa<RegisterSDNode>(VL) &&
16111 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16112 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16113 IsVLMAXForVMSET;
16114 };
16115
16116 SDValue Op = N->getOperand(0);
16117
16118 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16119 // to distinguish such pattern.
16120 while (IsTruncNode(Op)) {
16121 if (!Op.hasOneUse())
16122 return SDValue();
16123 Op = Op.getOperand(0);
16124 }
16125
16126 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16127 SDValue N0 = Op.getOperand(0);
16128 SDValue N1 = Op.getOperand(1);
16129 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16130 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16131 SDValue N00 = N0.getOperand(0);
16132 SDValue N10 = N1.getOperand(0);
16133 if (N00.getValueType().isVector() &&
16134 N00.getValueType() == N10.getValueType() &&
16135 N->getValueType(0) == N10.getValueType()) {
16136 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16137 SDValue SMin = DAG.getNode(
16138 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16139 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16140 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16141 }
16142 }
16143 }
16144 break;
16145 }
16146 case ISD::TRUNCATE:
16147 return performTRUNCATECombine(N, DAG, Subtarget);
16148 case ISD::SELECT:
16149 return performSELECTCombine(N, DAG, Subtarget);
16152 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
16153 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
16154 if (N->getOperand(1).getOpcode() == ISD::XOR &&
16155 isOneConstant(N->getOperand(1).getOperand(1))) {
16156 SDValue Cond = N->getOperand(1).getOperand(0);
16157 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
16158 if (DAG.MaskedValueIsZero(Cond, Mask)) {
16159 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
16162 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
16163 N->getOperand(0), Cond);
16164 }
16165 }
16166 return SDValue();
16167
16168 case RISCVISD::SELECT_CC: {
16169 // Transform
16170 SDValue LHS = N->getOperand(0);
16171 SDValue RHS = N->getOperand(1);
16172 SDValue CC = N->getOperand(2);
16173 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16174 SDValue TrueV = N->getOperand(3);
16175 SDValue FalseV = N->getOperand(4);
16176 SDLoc DL(N);
16177 EVT VT = N->getValueType(0);
16178
16179 // If the True and False values are the same, we don't need a select_cc.
16180 if (TrueV == FalseV)
16181 return TrueV;
16182
16183 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16184 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16185 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16186 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16187 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16188 if (CCVal == ISD::CondCode::SETGE)
16189 std::swap(TrueV, FalseV);
16190
16191 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16192 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16193 // Only handle simm12, if it is not in this range, it can be considered as
16194 // register.
16195 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16196 isInt<12>(TrueSImm - FalseSImm)) {
16197 SDValue SRA =
16198 DAG.getNode(ISD::SRA, DL, VT, LHS,
16199 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16200 SDValue AND =
16201 DAG.getNode(ISD::AND, DL, VT, SRA,
16202 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16203 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16204 }
16205
16206 if (CCVal == ISD::CondCode::SETGE)
16207 std::swap(TrueV, FalseV);
16208 }
16209
16210 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16211 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16212 {LHS, RHS, CC, TrueV, FalseV});
16213
16214 if (!Subtarget.hasConditionalMoveFusion()) {
16215 // (select c, -1, y) -> -c | y
16216 if (isAllOnesConstant(TrueV)) {
16217 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16218 SDValue Neg = DAG.getNegative(C, DL, VT);
16219 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16220 }
16221 // (select c, y, -1) -> -!c | y
16222 if (isAllOnesConstant(FalseV)) {
16223 SDValue C =
16224 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16225 SDValue Neg = DAG.getNegative(C, DL, VT);
16226 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16227 }
16228
16229 // (select c, 0, y) -> -!c & y
16230 if (isNullConstant(TrueV)) {
16231 SDValue C =
16232 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16233 SDValue Neg = DAG.getNegative(C, DL, VT);
16234 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16235 }
16236 // (select c, y, 0) -> -c & y
16237 if (isNullConstant(FalseV)) {
16238 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16239 SDValue Neg = DAG.getNegative(C, DL, VT);
16240 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16241 }
16242 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16243 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16244 if (((isOneConstant(FalseV) && LHS == TrueV &&
16245 CCVal == ISD::CondCode::SETNE) ||
16246 (isOneConstant(TrueV) && LHS == FalseV &&
16247 CCVal == ISD::CondCode::SETEQ)) &&
16249 // freeze it to be safe.
16250 LHS = DAG.getFreeze(LHS);
16252 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16253 }
16254 }
16255
16256 // If both true/false are an xor with 1, pull through the select.
16257 // This can occur after op legalization if both operands are setccs that
16258 // require an xor to invert.
16259 // FIXME: Generalize to other binary ops with identical operand?
16260 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16261 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16262 isOneConstant(TrueV.getOperand(1)) &&
16263 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16264 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16265 TrueV.getOperand(0), FalseV.getOperand(0));
16266 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16267 }
16268
16269 return SDValue();
16270 }
16271 case RISCVISD::BR_CC: {
16272 SDValue LHS = N->getOperand(1);
16273 SDValue RHS = N->getOperand(2);
16274 SDValue CC = N->getOperand(3);
16275 SDLoc DL(N);
16276
16277 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16278 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16279 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16280
16281 return SDValue();
16282 }
16283 case ISD::BITREVERSE:
16284 return performBITREVERSECombine(N, DAG, Subtarget);
16285 case ISD::FP_TO_SINT:
16286 case ISD::FP_TO_UINT:
16287 return performFP_TO_INTCombine(N, DCI, Subtarget);
16290 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16291 case ISD::FCOPYSIGN: {
16292 EVT VT = N->getValueType(0);
16293 if (!VT.isVector())
16294 break;
16295 // There is a form of VFSGNJ which injects the negated sign of its second
16296 // operand. Try and bubble any FNEG up after the extend/round to produce
16297 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16298 // TRUNC=1.
16299 SDValue In2 = N->getOperand(1);
16300 // Avoid cases where the extend/round has multiple uses, as duplicating
16301 // those is typically more expensive than removing a fneg.
16302 if (!In2.hasOneUse())
16303 break;
16304 if (In2.getOpcode() != ISD::FP_EXTEND &&
16305 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16306 break;
16307 In2 = In2.getOperand(0);
16308 if (In2.getOpcode() != ISD::FNEG)
16309 break;
16310 SDLoc DL(N);
16311 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16312 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16313 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16314 }
16315 case ISD::MGATHER: {
16316 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16317 const EVT VT = N->getValueType(0);
16318 SDValue Index = MGN->getIndex();
16319 SDValue ScaleOp = MGN->getScale();
16320 ISD::MemIndexType IndexType = MGN->getIndexType();
16321 assert(!MGN->isIndexScaled() &&
16322 "Scaled gather/scatter should not be formed");
16323
16324 SDLoc DL(N);
16325 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16326 return DAG.getMaskedGather(
16327 N->getVTList(), MGN->getMemoryVT(), DL,
16328 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16329 MGN->getBasePtr(), Index, ScaleOp},
16330 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16331
16332 if (narrowIndex(Index, IndexType, DAG))
16333 return DAG.getMaskedGather(
16334 N->getVTList(), MGN->getMemoryVT(), DL,
16335 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16336 MGN->getBasePtr(), Index, ScaleOp},
16337 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16338
16339 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16340 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16341 // The sequence will be XLenVT, not the type of Index. Tell
16342 // isSimpleVIDSequence this so we avoid overflow.
16343 if (std::optional<VIDSequence> SimpleVID =
16344 isSimpleVIDSequence(Index, Subtarget.getXLen());
16345 SimpleVID && SimpleVID->StepDenominator == 1) {
16346 const int64_t StepNumerator = SimpleVID->StepNumerator;
16347 const int64_t Addend = SimpleVID->Addend;
16348
16349 // Note: We don't need to check alignment here since (by assumption
16350 // from the existance of the gather), our offsets must be sufficiently
16351 // aligned.
16352
16353 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16354 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16355 assert(IndexType == ISD::UNSIGNED_SCALED);
16356 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16357 DAG.getConstant(Addend, DL, PtrVT));
16358
16359 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16360 SDValue IntID =
16361 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16362 XLenVT);
16363 SDValue Ops[] =
16364 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16365 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16367 Ops, VT, MGN->getMemOperand());
16368 }
16369 }
16370
16371 SmallVector<int> ShuffleMask;
16372 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16373 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16374 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16375 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16376 MGN->getMask(), DAG.getUNDEF(VT),
16377 MGN->getMemoryVT(), MGN->getMemOperand(),
16379 SDValue Shuffle =
16380 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16381 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16382 }
16383
16384 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16385 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16386 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16387 SmallVector<SDValue> NewIndices;
16388 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16389 NewIndices.push_back(Index.getOperand(i));
16390 EVT IndexVT = Index.getValueType()
16392 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16393
16394 unsigned ElementSize = VT.getScalarStoreSize();
16395 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16396 auto EltCnt = VT.getVectorElementCount();
16397 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16398 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16399 EltCnt.divideCoefficientBy(2));
16400 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16401 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16402 EltCnt.divideCoefficientBy(2));
16403 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16404
16405 SDValue Gather =
16406 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16407 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16408 Index, ScaleOp},
16409 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16410 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16411 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16412 }
16413 break;
16414 }
16415 case ISD::MSCATTER:{
16416 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16417 SDValue Index = MSN->getIndex();
16418 SDValue ScaleOp = MSN->getScale();
16419 ISD::MemIndexType IndexType = MSN->getIndexType();
16420 assert(!MSN->isIndexScaled() &&
16421 "Scaled gather/scatter should not be formed");
16422
16423 SDLoc DL(N);
16424 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16425 return DAG.getMaskedScatter(
16426 N->getVTList(), MSN->getMemoryVT(), DL,
16427 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16428 Index, ScaleOp},
16429 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16430
16431 if (narrowIndex(Index, IndexType, DAG))
16432 return DAG.getMaskedScatter(
16433 N->getVTList(), MSN->getMemoryVT(), DL,
16434 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16435 Index, ScaleOp},
16436 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16437
16438 EVT VT = MSN->getValue()->getValueType(0);
16439 SmallVector<int> ShuffleMask;
16440 if (!MSN->isTruncatingStore() &&
16441 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16442 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16443 DAG.getUNDEF(VT), ShuffleMask);
16444 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16445 DAG.getUNDEF(XLenVT), MSN->getMask(),
16446 MSN->getMemoryVT(), MSN->getMemOperand(),
16447 ISD::UNINDEXED, false);
16448 }
16449 break;
16450 }
16451 case ISD::VP_GATHER: {
16452 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16453 SDValue Index = VPGN->getIndex();
16454 SDValue ScaleOp = VPGN->getScale();
16455 ISD::MemIndexType IndexType = VPGN->getIndexType();
16456 assert(!VPGN->isIndexScaled() &&
16457 "Scaled gather/scatter should not be formed");
16458
16459 SDLoc DL(N);
16460 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16461 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16462 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16463 ScaleOp, VPGN->getMask(),
16464 VPGN->getVectorLength()},
16465 VPGN->getMemOperand(), IndexType);
16466
16467 if (narrowIndex(Index, IndexType, DAG))
16468 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16469 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16470 ScaleOp, VPGN->getMask(),
16471 VPGN->getVectorLength()},
16472 VPGN->getMemOperand(), IndexType);
16473
16474 break;
16475 }
16476 case ISD::VP_SCATTER: {
16477 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16478 SDValue Index = VPSN->getIndex();
16479 SDValue ScaleOp = VPSN->getScale();
16480 ISD::MemIndexType IndexType = VPSN->getIndexType();
16481 assert(!VPSN->isIndexScaled() &&
16482 "Scaled gather/scatter should not be formed");
16483
16484 SDLoc DL(N);
16485 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16486 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16487 {VPSN->getChain(), VPSN->getValue(),
16488 VPSN->getBasePtr(), Index, ScaleOp,
16489 VPSN->getMask(), VPSN->getVectorLength()},
16490 VPSN->getMemOperand(), IndexType);
16491
16492 if (narrowIndex(Index, IndexType, DAG))
16493 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16494 {VPSN->getChain(), VPSN->getValue(),
16495 VPSN->getBasePtr(), Index, ScaleOp,
16496 VPSN->getMask(), VPSN->getVectorLength()},
16497 VPSN->getMemOperand(), IndexType);
16498 break;
16499 }
16500 case RISCVISD::SHL_VL:
16501 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16502 return V;
16503 [[fallthrough]];
16504 case RISCVISD::SRA_VL:
16505 case RISCVISD::SRL_VL: {
16506 SDValue ShAmt = N->getOperand(1);
16508 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16509 SDLoc DL(N);
16510 SDValue VL = N->getOperand(4);
16511 EVT VT = N->getValueType(0);
16512 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16513 ShAmt.getOperand(1), VL);
16514 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16515 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16516 }
16517 break;
16518 }
16519 case ISD::SRA:
16520 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16521 return V;
16522 [[fallthrough]];
16523 case ISD::SRL:
16524 case ISD::SHL: {
16525 if (N->getOpcode() == ISD::SHL) {
16526 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16527 return V;
16528 }
16529 SDValue ShAmt = N->getOperand(1);
16531 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16532 SDLoc DL(N);
16533 EVT VT = N->getValueType(0);
16534 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16535 ShAmt.getOperand(1),
16536 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16537 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16538 }
16539 break;
16540 }
16541 case RISCVISD::ADD_VL:
16542 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16543 return V;
16544 return combineToVWMACC(N, DAG, Subtarget);
16549 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16550 case RISCVISD::SUB_VL:
16551 case RISCVISD::MUL_VL:
16552 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16561 return performVFMADD_VLCombine(N, DAG, Subtarget);
16562 case RISCVISD::FADD_VL:
16563 case RISCVISD::FSUB_VL:
16564 case RISCVISD::FMUL_VL:
16566 case RISCVISD::VFWSUB_W_VL: {
16567 if (N->getValueType(0).isScalableVector() &&
16568 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16569 (Subtarget.hasVInstructionsF16Minimal() &&
16570 !Subtarget.hasVInstructionsF16()))
16571 return SDValue();
16572 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16573 }
16574 case ISD::LOAD:
16575 case ISD::STORE: {
16576 if (DCI.isAfterLegalizeDAG())
16577 if (SDValue V = performMemPairCombine(N, DCI))
16578 return V;
16579
16580 if (N->getOpcode() != ISD::STORE)
16581 break;
16582
16583 auto *Store = cast<StoreSDNode>(N);
16584 SDValue Chain = Store->getChain();
16585 EVT MemVT = Store->getMemoryVT();
16586 SDValue Val = Store->getValue();
16587 SDLoc DL(N);
16588
16589 bool IsScalarizable =
16590 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16591 Store->isSimple() &&
16592 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16593 isPowerOf2_64(MemVT.getSizeInBits()) &&
16594 MemVT.getSizeInBits() <= Subtarget.getXLen();
16595
16596 // If sufficiently aligned we can scalarize stores of constant vectors of
16597 // any power-of-two size up to XLen bits, provided that they aren't too
16598 // expensive to materialize.
16599 // vsetivli zero, 2, e8, m1, ta, ma
16600 // vmv.v.i v8, 4
16601 // vse64.v v8, (a0)
16602 // ->
16603 // li a1, 1028
16604 // sh a1, 0(a0)
16605 if (DCI.isBeforeLegalize() && IsScalarizable &&
16607 // Get the constant vector bits
16608 APInt NewC(Val.getValueSizeInBits(), 0);
16609 uint64_t EltSize = Val.getScalarValueSizeInBits();
16610 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16611 if (Val.getOperand(i).isUndef())
16612 continue;
16613 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16614 i * EltSize);
16615 }
16616 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16617
16618 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16619 true) <= 2 &&
16621 NewVT, *Store->getMemOperand())) {
16622 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16623 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16624 Store->getPointerInfo(), Store->getOriginalAlign(),
16625 Store->getMemOperand()->getFlags());
16626 }
16627 }
16628
16629 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16630 // vsetivli zero, 2, e16, m1, ta, ma
16631 // vle16.v v8, (a0)
16632 // vse16.v v8, (a1)
16633 if (auto *L = dyn_cast<LoadSDNode>(Val);
16634 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16635 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16636 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16637 L->getMemoryVT() == MemVT) {
16638 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16640 NewVT, *Store->getMemOperand()) &&
16642 NewVT, *L->getMemOperand())) {
16643 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16644 L->getPointerInfo(), L->getOriginalAlign(),
16645 L->getMemOperand()->getFlags());
16646 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16647 Store->getPointerInfo(), Store->getOriginalAlign(),
16648 Store->getMemOperand()->getFlags());
16649 }
16650 }
16651
16652 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16653 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16654 // any illegal types.
16655 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16656 (DCI.isAfterLegalizeDAG() &&
16658 isNullConstant(Val.getOperand(1)))) {
16659 SDValue Src = Val.getOperand(0);
16660 MVT VecVT = Src.getSimpleValueType();
16661 // VecVT should be scalable and memory VT should match the element type.
16662 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16663 MemVT == VecVT.getVectorElementType()) {
16664 SDLoc DL(N);
16665 MVT MaskVT = getMaskTypeFor(VecVT);
16666 return DAG.getStoreVP(
16667 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16668 DAG.getConstant(1, DL, MaskVT),
16669 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16670 Store->getMemOperand(), Store->getAddressingMode(),
16671 Store->isTruncatingStore(), /*IsCompress*/ false);
16672 }
16673 }
16674
16675 break;
16676 }
16677 case ISD::SPLAT_VECTOR: {
16678 EVT VT = N->getValueType(0);
16679 // Only perform this combine on legal MVT types.
16680 if (!isTypeLegal(VT))
16681 break;
16682 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16683 DAG, Subtarget))
16684 return Gather;
16685 break;
16686 }
16687 case ISD::BUILD_VECTOR:
16688 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16689 return V;
16690 break;
16692 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16693 return V;
16694 break;
16696 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16697 return V;
16698 break;
16699 case RISCVISD::VFMV_V_F_VL: {
16700 const MVT VT = N->getSimpleValueType(0);
16701 SDValue Passthru = N->getOperand(0);
16702 SDValue Scalar = N->getOperand(1);
16703 SDValue VL = N->getOperand(2);
16704
16705 // If VL is 1, we can use vfmv.s.f.
16706 if (isOneConstant(VL))
16707 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16708 break;
16709 }
16710 case RISCVISD::VMV_V_X_VL: {
16711 const MVT VT = N->getSimpleValueType(0);
16712 SDValue Passthru = N->getOperand(0);
16713 SDValue Scalar = N->getOperand(1);
16714 SDValue VL = N->getOperand(2);
16715
16716 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16717 // scalar input.
16718 unsigned ScalarSize = Scalar.getValueSizeInBits();
16719 unsigned EltWidth = VT.getScalarSizeInBits();
16720 if (ScalarSize > EltWidth && Passthru.isUndef())
16721 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16722 return SDValue(N, 0);
16723
16724 // If VL is 1 and the scalar value won't benefit from immediate, we can
16725 // use vmv.s.x.
16726 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16727 if (isOneConstant(VL) &&
16728 (!Const || Const->isZero() ||
16729 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16730 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16731
16732 break;
16733 }
16734 case RISCVISD::VFMV_S_F_VL: {
16735 SDValue Src = N->getOperand(1);
16736 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16737 // into an undef vector.
16738 // TODO: Could use a vslide or vmv.v.v for non-undef.
16739 if (N->getOperand(0).isUndef() &&
16740 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16741 isNullConstant(Src.getOperand(1)) &&
16742 Src.getOperand(0).getValueType().isScalableVector()) {
16743 EVT VT = N->getValueType(0);
16744 EVT SrcVT = Src.getOperand(0).getValueType();
16746 // Widths match, just return the original vector.
16747 if (SrcVT == VT)
16748 return Src.getOperand(0);
16749 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16750 }
16751 [[fallthrough]];
16752 }
16753 case RISCVISD::VMV_S_X_VL: {
16754 const MVT VT = N->getSimpleValueType(0);
16755 SDValue Passthru = N->getOperand(0);
16756 SDValue Scalar = N->getOperand(1);
16757 SDValue VL = N->getOperand(2);
16758
16759 // Use M1 or smaller to avoid over constraining register allocation
16760 const MVT M1VT = getLMUL1VT(VT);
16761 if (M1VT.bitsLT(VT)) {
16762 SDValue M1Passthru =
16763 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16764 DAG.getVectorIdxConstant(0, DL));
16765 SDValue Result =
16766 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16767 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16768 DAG.getVectorIdxConstant(0, DL));
16769 return Result;
16770 }
16771
16772 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16773 // higher would involve overly constraining the register allocator for
16774 // no purpose.
16775 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16776 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
16777 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
16778 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
16779
16780 break;
16781 }
16782 case RISCVISD::VMV_X_S: {
16783 SDValue Vec = N->getOperand(0);
16784 MVT VecVT = N->getOperand(0).getSimpleValueType();
16785 const MVT M1VT = getLMUL1VT(VecVT);
16786 if (M1VT.bitsLT(VecVT)) {
16787 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
16788 DAG.getVectorIdxConstant(0, DL));
16789 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16790 }
16791 break;
16792 }
16796 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16797 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16798 switch (IntNo) {
16799 // By default we do not combine any intrinsic.
16800 default:
16801 return SDValue();
16802 case Intrinsic::riscv_masked_strided_load: {
16803 MVT VT = N->getSimpleValueType(0);
16804 auto *Load = cast<MemIntrinsicSDNode>(N);
16805 SDValue PassThru = N->getOperand(2);
16806 SDValue Base = N->getOperand(3);
16807 SDValue Stride = N->getOperand(4);
16808 SDValue Mask = N->getOperand(5);
16809
16810 // If the stride is equal to the element size in bytes, we can use
16811 // a masked.load.
16812 const unsigned ElementSize = VT.getScalarStoreSize();
16813 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16814 StrideC && StrideC->getZExtValue() == ElementSize)
16815 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16816 DAG.getUNDEF(XLenVT), Mask, PassThru,
16817 Load->getMemoryVT(), Load->getMemOperand(),
16819 return SDValue();
16820 }
16821 case Intrinsic::riscv_masked_strided_store: {
16822 auto *Store = cast<MemIntrinsicSDNode>(N);
16823 SDValue Value = N->getOperand(2);
16824 SDValue Base = N->getOperand(3);
16825 SDValue Stride = N->getOperand(4);
16826 SDValue Mask = N->getOperand(5);
16827
16828 // If the stride is equal to the element size in bytes, we can use
16829 // a masked.store.
16830 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16831 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16832 StrideC && StrideC->getZExtValue() == ElementSize)
16833 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16834 DAG.getUNDEF(XLenVT), Mask,
16835 Store->getMemoryVT(), Store->getMemOperand(),
16836 ISD::UNINDEXED, false);
16837 return SDValue();
16838 }
16839 case Intrinsic::riscv_vcpop:
16840 case Intrinsic::riscv_vcpop_mask:
16841 case Intrinsic::riscv_vfirst:
16842 case Intrinsic::riscv_vfirst_mask: {
16843 SDValue VL = N->getOperand(2);
16844 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16845 IntNo == Intrinsic::riscv_vfirst_mask)
16846 VL = N->getOperand(3);
16847 if (!isNullConstant(VL))
16848 return SDValue();
16849 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16850 SDLoc DL(N);
16851 EVT VT = N->getValueType(0);
16852 if (IntNo == Intrinsic::riscv_vfirst ||
16853 IntNo == Intrinsic::riscv_vfirst_mask)
16854 return DAG.getConstant(-1, DL, VT);
16855 return DAG.getConstant(0, DL, VT);
16856 }
16857 }
16858 }
16859 case ISD::BITCAST: {
16861 SDValue N0 = N->getOperand(0);
16862 EVT VT = N->getValueType(0);
16863 EVT SrcVT = N0.getValueType();
16864 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16865 // type, widen both sides to avoid a trip through memory.
16866 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16867 VT.isScalarInteger()) {
16868 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16869 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
16870 Ops[0] = N0;
16871 SDLoc DL(N);
16872 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16873 N0 = DAG.getBitcast(MVT::i8, N0);
16874 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
16875 }
16876
16877 return SDValue();
16878 }
16879 }
16880
16881 return SDValue();
16882}
16883
16885 EVT XVT, unsigned KeptBits) const {
16886 // For vectors, we don't have a preference..
16887 if (XVT.isVector())
16888 return false;
16889
16890 if (XVT != MVT::i32 && XVT != MVT::i64)
16891 return false;
16892
16893 // We can use sext.w for RV64 or an srai 31 on RV32.
16894 if (KeptBits == 32 || KeptBits == 64)
16895 return true;
16896
16897 // With Zbb we can use sext.h/sext.b.
16898 return Subtarget.hasStdExtZbb() &&
16899 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16900 KeptBits == 16);
16901}
16902
16904 const SDNode *N, CombineLevel Level) const {
16905 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16906 N->getOpcode() == ISD::SRL) &&
16907 "Expected shift op");
16908
16909 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16910 // materialised in fewer instructions than `(OP _, c1)`:
16911 //
16912 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16913 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16914 SDValue N0 = N->getOperand(0);
16915 EVT Ty = N0.getValueType();
16916 if (Ty.isScalarInteger() &&
16917 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16918 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16919 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16920 if (C1 && C2) {
16921 const APInt &C1Int = C1->getAPIntValue();
16922 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16923
16924 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16925 // and the combine should happen, to potentially allow further combines
16926 // later.
16927 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16928 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
16929 return true;
16930
16931 // We can materialise `c1` in an add immediate, so it's "free", and the
16932 // combine should be prevented.
16933 if (C1Int.getSignificantBits() <= 64 &&
16935 return false;
16936
16937 // Neither constant will fit into an immediate, so find materialisation
16938 // costs.
16939 int C1Cost =
16940 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16941 /*CompressionCost*/ true);
16942 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16943 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16944 /*CompressionCost*/ true);
16945
16946 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16947 // combine should be prevented.
16948 if (C1Cost < ShiftedC1Cost)
16949 return false;
16950 }
16951 }
16952 return true;
16953}
16954
16956 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16957 TargetLoweringOpt &TLO) const {
16958 // Delay this optimization as late as possible.
16959 if (!TLO.LegalOps)
16960 return false;
16961
16962 EVT VT = Op.getValueType();
16963 if (VT.isVector())
16964 return false;
16965
16966 unsigned Opcode = Op.getOpcode();
16967 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16968 return false;
16969
16970 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16971 if (!C)
16972 return false;
16973
16974 const APInt &Mask = C->getAPIntValue();
16975
16976 // Clear all non-demanded bits initially.
16977 APInt ShrunkMask = Mask & DemandedBits;
16978
16979 // Try to make a smaller immediate by setting undemanded bits.
16980
16981 APInt ExpandedMask = Mask | ~DemandedBits;
16982
16983 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
16984 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
16985 };
16986 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
16987 if (NewMask == Mask)
16988 return true;
16989 SDLoc DL(Op);
16990 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
16991 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
16992 Op.getOperand(0), NewC);
16993 return TLO.CombineTo(Op, NewOp);
16994 };
16995
16996 // If the shrunk mask fits in sign extended 12 bits, let the target
16997 // independent code apply it.
16998 if (ShrunkMask.isSignedIntN(12))
16999 return false;
17000
17001 // And has a few special cases for zext.
17002 if (Opcode == ISD::AND) {
17003 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17004 // otherwise use SLLI + SRLI.
17005 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17006 if (IsLegalMask(NewMask))
17007 return UseMask(NewMask);
17008
17009 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17010 if (VT == MVT::i64) {
17011 APInt NewMask = APInt(64, 0xffffffff);
17012 if (IsLegalMask(NewMask))
17013 return UseMask(NewMask);
17014 }
17015 }
17016
17017 // For the remaining optimizations, we need to be able to make a negative
17018 // number through a combination of mask and undemanded bits.
17019 if (!ExpandedMask.isNegative())
17020 return false;
17021
17022 // What is the fewest number of bits we need to represent the negative number.
17023 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17024
17025 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17026 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17027 // If we can't create a simm12, we shouldn't change opaque constants.
17028 APInt NewMask = ShrunkMask;
17029 if (MinSignedBits <= 12)
17030 NewMask.setBitsFrom(11);
17031 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17032 NewMask.setBitsFrom(31);
17033 else
17034 return false;
17035
17036 // Check that our new mask is a subset of the demanded mask.
17037 assert(IsLegalMask(NewMask));
17038 return UseMask(NewMask);
17039}
17040
17041static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17042 static const uint64_t GREVMasks[] = {
17043 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17044 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17045
17046 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17047 unsigned Shift = 1 << Stage;
17048 if (ShAmt & Shift) {
17049 uint64_t Mask = GREVMasks[Stage];
17050 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17051 if (IsGORC)
17052 Res |= x;
17053 x = Res;
17054 }
17055 }
17056
17057 return x;
17058}
17059
17061 KnownBits &Known,
17062 const APInt &DemandedElts,
17063 const SelectionDAG &DAG,
17064 unsigned Depth) const {
17065 unsigned BitWidth = Known.getBitWidth();
17066 unsigned Opc = Op.getOpcode();
17067 assert((Opc >= ISD::BUILTIN_OP_END ||
17068 Opc == ISD::INTRINSIC_WO_CHAIN ||
17069 Opc == ISD::INTRINSIC_W_CHAIN ||
17070 Opc == ISD::INTRINSIC_VOID) &&
17071 "Should use MaskedValueIsZero if you don't know whether Op"
17072 " is a target node!");
17073
17074 Known.resetAll();
17075 switch (Opc) {
17076 default: break;
17077 case RISCVISD::SELECT_CC: {
17078 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17079 // If we don't know any bits, early out.
17080 if (Known.isUnknown())
17081 break;
17082 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17083
17084 // Only known if known in both the LHS and RHS.
17085 Known = Known.intersectWith(Known2);
17086 break;
17087 }
17090 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17091 // Result is either all zero or operand 0. We can propagate zeros, but not
17092 // ones.
17093 Known.One.clearAllBits();
17094 break;
17095 case RISCVISD::REMUW: {
17096 KnownBits Known2;
17097 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17098 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17099 // We only care about the lower 32 bits.
17100 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17101 // Restore the original width by sign extending.
17102 Known = Known.sext(BitWidth);
17103 break;
17104 }
17105 case RISCVISD::DIVUW: {
17106 KnownBits Known2;
17107 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17108 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17109 // We only care about the lower 32 bits.
17110 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17111 // Restore the original width by sign extending.
17112 Known = Known.sext(BitWidth);
17113 break;
17114 }
17115 case RISCVISD::SLLW: {
17116 KnownBits Known2;
17117 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17118 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17119 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17120 // Restore the original width by sign extending.
17121 Known = Known.sext(BitWidth);
17122 break;
17123 }
17124 case RISCVISD::CTZW: {
17125 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17126 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17127 unsigned LowBits = llvm::bit_width(PossibleTZ);
17128 Known.Zero.setBitsFrom(LowBits);
17129 break;
17130 }
17131 case RISCVISD::CLZW: {
17132 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17133 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17134 unsigned LowBits = llvm::bit_width(PossibleLZ);
17135 Known.Zero.setBitsFrom(LowBits);
17136 break;
17137 }
17138 case RISCVISD::BREV8:
17139 case RISCVISD::ORC_B: {
17140 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17141 // control value of 7 is equivalent to brev8 and orc.b.
17142 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17143 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17144 // To compute zeros, we need to invert the value and invert it back after.
17145 Known.Zero =
17146 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17147 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17148 break;
17149 }
17150 case RISCVISD::READ_VLENB: {
17151 // We can use the minimum and maximum VLEN values to bound VLENB. We
17152 // know VLEN must be a power of two.
17153 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17154 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17155 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17156 Known.Zero.setLowBits(Log2_32(MinVLenB));
17157 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17158 if (MaxVLenB == MinVLenB)
17159 Known.One.setBit(Log2_32(MinVLenB));
17160 break;
17161 }
17162 case RISCVISD::FCLASS: {
17163 // fclass will only set one of the low 10 bits.
17164 Known.Zero.setBitsFrom(10);
17165 break;
17166 }
17169 unsigned IntNo =
17170 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17171 switch (IntNo) {
17172 default:
17173 // We can't do anything for most intrinsics.
17174 break;
17175 case Intrinsic::riscv_vsetvli:
17176 case Intrinsic::riscv_vsetvlimax: {
17177 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17178 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17179 RISCVII::VLMUL VLMUL =
17180 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17181 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17182 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17183 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17184 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17185
17186 // Result of vsetvli must be not larger than AVL.
17187 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17188 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17189
17190 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17191 if (BitWidth > KnownZeroFirstBit)
17192 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17193 break;
17194 }
17195 }
17196 break;
17197 }
17198 }
17199}
17200
17202 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17203 unsigned Depth) const {
17204 switch (Op.getOpcode()) {
17205 default:
17206 break;
17207 case RISCVISD::SELECT_CC: {
17208 unsigned Tmp =
17209 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17210 if (Tmp == 1) return 1; // Early out.
17211 unsigned Tmp2 =
17212 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17213 return std::min(Tmp, Tmp2);
17214 }
17217 // Output is either all zero or operand 0. We can propagate sign bit count
17218 // from operand 0.
17219 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17220 case RISCVISD::ABSW: {
17221 // We expand this at isel to negw+max. The result will have 33 sign bits
17222 // if the input has at least 33 sign bits.
17223 unsigned Tmp =
17224 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17225 if (Tmp < 33) return 1;
17226 return 33;
17227 }
17228 case RISCVISD::SLLW:
17229 case RISCVISD::SRAW:
17230 case RISCVISD::SRLW:
17231 case RISCVISD::DIVW:
17232 case RISCVISD::DIVUW:
17233 case RISCVISD::REMUW:
17234 case RISCVISD::ROLW:
17235 case RISCVISD::RORW:
17240 // TODO: As the result is sign-extended, this is conservatively correct. A
17241 // more precise answer could be calculated for SRAW depending on known
17242 // bits in the shift amount.
17243 return 33;
17244 case RISCVISD::VMV_X_S: {
17245 // The number of sign bits of the scalar result is computed by obtaining the
17246 // element type of the input vector operand, subtracting its width from the
17247 // XLEN, and then adding one (sign bit within the element type). If the
17248 // element type is wider than XLen, the least-significant XLEN bits are
17249 // taken.
17250 unsigned XLen = Subtarget.getXLen();
17251 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17252 if (EltBits <= XLen)
17253 return XLen - EltBits + 1;
17254 break;
17255 }
17257 unsigned IntNo = Op.getConstantOperandVal(1);
17258 switch (IntNo) {
17259 default:
17260 break;
17261 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17262 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17263 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17264 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17265 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17266 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17267 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17268 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17269 case Intrinsic::riscv_masked_cmpxchg_i64:
17270 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17271 // narrow atomic operation. These are implemented using atomic
17272 // operations at the minimum supported atomicrmw/cmpxchg width whose
17273 // result is then sign extended to XLEN. With +A, the minimum width is
17274 // 32 for both 64 and 32.
17275 assert(Subtarget.getXLen() == 64);
17277 assert(Subtarget.hasStdExtA());
17278 return 33;
17279 }
17280 break;
17281 }
17282 }
17283
17284 return 1;
17285}
17286
17288 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17289 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17290
17291 // TODO: Add more target nodes.
17292 switch (Op.getOpcode()) {
17294 // Integer select_cc cannot create poison.
17295 // TODO: What are the FP poison semantics?
17296 // TODO: This instruction blocks poison from the unselected operand, can
17297 // we do anything with that?
17298 return !Op.getValueType().isInteger();
17299 }
17301 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17302}
17303
17304const Constant *
17306 assert(Ld && "Unexpected null LoadSDNode");
17307 if (!ISD::isNormalLoad(Ld))
17308 return nullptr;
17309
17310 SDValue Ptr = Ld->getBasePtr();
17311
17312 // Only constant pools with no offset are supported.
17313 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17314 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17315 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17316 CNode->getOffset() != 0)
17317 return nullptr;
17318
17319 return CNode;
17320 };
17321
17322 // Simple case, LLA.
17323 if (Ptr.getOpcode() == RISCVISD::LLA) {
17324 auto *CNode = GetSupportedConstantPool(Ptr);
17325 if (!CNode || CNode->getTargetFlags() != 0)
17326 return nullptr;
17327
17328 return CNode->getConstVal();
17329 }
17330
17331 // Look for a HI and ADD_LO pair.
17332 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17333 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17334 return nullptr;
17335
17336 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17337 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17338
17339 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17340 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17341 return nullptr;
17342
17343 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17344 return nullptr;
17345
17346 return CNodeLo->getConstVal();
17347}
17348
17350 MachineBasicBlock *BB) {
17351 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17352
17353 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17354 // Should the count have wrapped while it was being read, we need to try
17355 // again.
17356 // For example:
17357 // ```
17358 // read:
17359 // csrrs x3, counterh # load high word of counter
17360 // csrrs x2, counter # load low word of counter
17361 // csrrs x4, counterh # load high word of counter
17362 // bne x3, x4, read # check if high word reads match, otherwise try again
17363 // ```
17364
17365 MachineFunction &MF = *BB->getParent();
17366 const BasicBlock *LLVMBB = BB->getBasicBlock();
17368
17369 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17370 MF.insert(It, LoopMBB);
17371
17372 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17373 MF.insert(It, DoneMBB);
17374
17375 // Transfer the remainder of BB and its successor edges to DoneMBB.
17376 DoneMBB->splice(DoneMBB->begin(), BB,
17377 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17379
17380 BB->addSuccessor(LoopMBB);
17381
17383 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17384 Register LoReg = MI.getOperand(0).getReg();
17385 Register HiReg = MI.getOperand(1).getReg();
17386 int64_t LoCounter = MI.getOperand(2).getImm();
17387 int64_t HiCounter = MI.getOperand(3).getImm();
17388 DebugLoc DL = MI.getDebugLoc();
17389
17391 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17392 .addImm(HiCounter)
17393 .addReg(RISCV::X0);
17394 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17395 .addImm(LoCounter)
17396 .addReg(RISCV::X0);
17397 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17398 .addImm(HiCounter)
17399 .addReg(RISCV::X0);
17400
17401 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17402 .addReg(HiReg)
17403 .addReg(ReadAgainReg)
17404 .addMBB(LoopMBB);
17405
17406 LoopMBB->addSuccessor(LoopMBB);
17407 LoopMBB->addSuccessor(DoneMBB);
17408
17409 MI.eraseFromParent();
17410
17411 return DoneMBB;
17412}
17413
17416 const RISCVSubtarget &Subtarget) {
17417 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17418
17419 MachineFunction &MF = *BB->getParent();
17420 DebugLoc DL = MI.getDebugLoc();
17423 Register LoReg = MI.getOperand(0).getReg();
17424 Register HiReg = MI.getOperand(1).getReg();
17425 Register SrcReg = MI.getOperand(2).getReg();
17426
17427 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17428 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17429
17430 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17431 RI, Register());
17433 MachineMemOperand *MMOLo =
17437 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17438 .addFrameIndex(FI)
17439 .addImm(0)
17440 .addMemOperand(MMOLo);
17441 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17442 .addFrameIndex(FI)
17443 .addImm(4)
17444 .addMemOperand(MMOHi);
17445 MI.eraseFromParent(); // The pseudo instruction is gone now.
17446 return BB;
17447}
17448
17451 const RISCVSubtarget &Subtarget) {
17452 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17453 "Unexpected instruction");
17454
17455 MachineFunction &MF = *BB->getParent();
17456 DebugLoc DL = MI.getDebugLoc();
17459 Register DstReg = MI.getOperand(0).getReg();
17460 Register LoReg = MI.getOperand(1).getReg();
17461 Register HiReg = MI.getOperand(2).getReg();
17462
17463 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17464 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17465
17467 MachineMemOperand *MMOLo =
17471 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17472 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17473 .addFrameIndex(FI)
17474 .addImm(0)
17475 .addMemOperand(MMOLo);
17476 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17477 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17478 .addFrameIndex(FI)
17479 .addImm(4)
17480 .addMemOperand(MMOHi);
17481 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17482 MI.eraseFromParent(); // The pseudo instruction is gone now.
17483 return BB;
17484}
17485
17487 switch (MI.getOpcode()) {
17488 default:
17489 return false;
17490 case RISCV::Select_GPR_Using_CC_GPR:
17491 case RISCV::Select_FPR16_Using_CC_GPR:
17492 case RISCV::Select_FPR16INX_Using_CC_GPR:
17493 case RISCV::Select_FPR32_Using_CC_GPR:
17494 case RISCV::Select_FPR32INX_Using_CC_GPR:
17495 case RISCV::Select_FPR64_Using_CC_GPR:
17496 case RISCV::Select_FPR64INX_Using_CC_GPR:
17497 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17498 return true;
17499 }
17500}
17501
17503 unsigned RelOpcode, unsigned EqOpcode,
17504 const RISCVSubtarget &Subtarget) {
17505 DebugLoc DL = MI.getDebugLoc();
17506 Register DstReg = MI.getOperand(0).getReg();
17507 Register Src1Reg = MI.getOperand(1).getReg();
17508 Register Src2Reg = MI.getOperand(2).getReg();
17510 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17512
17513 // Save the current FFLAGS.
17514 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17515
17516 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17517 .addReg(Src1Reg)
17518 .addReg(Src2Reg);
17521
17522 // Restore the FFLAGS.
17523 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17524 .addReg(SavedFFlags, RegState::Kill);
17525
17526 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17527 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17528 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17529 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17532
17533 // Erase the pseudoinstruction.
17534 MI.eraseFromParent();
17535 return BB;
17536}
17537
17538static MachineBasicBlock *
17540 MachineBasicBlock *ThisMBB,
17541 const RISCVSubtarget &Subtarget) {
17542 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17543 // Without this, custom-inserter would have generated:
17544 //
17545 // A
17546 // | \
17547 // | B
17548 // | /
17549 // C
17550 // | \
17551 // | D
17552 // | /
17553 // E
17554 //
17555 // A: X = ...; Y = ...
17556 // B: empty
17557 // C: Z = PHI [X, A], [Y, B]
17558 // D: empty
17559 // E: PHI [X, C], [Z, D]
17560 //
17561 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17562 //
17563 // A
17564 // | \
17565 // | C
17566 // | /|
17567 // |/ |
17568 // | |
17569 // | D
17570 // | /
17571 // E
17572 //
17573 // A: X = ...; Y = ...
17574 // D: empty
17575 // E: PHI [X, A], [X, C], [Y, D]
17576
17577 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17578 const DebugLoc &DL = First.getDebugLoc();
17579 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17580 MachineFunction *F = ThisMBB->getParent();
17581 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17582 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17583 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17584 MachineFunction::iterator It = ++ThisMBB->getIterator();
17585 F->insert(It, FirstMBB);
17586 F->insert(It, SecondMBB);
17587 F->insert(It, SinkMBB);
17588
17589 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17590 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17592 ThisMBB->end());
17593 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17594
17595 // Fallthrough block for ThisMBB.
17596 ThisMBB->addSuccessor(FirstMBB);
17597 // Fallthrough block for FirstMBB.
17598 FirstMBB->addSuccessor(SecondMBB);
17599 ThisMBB->addSuccessor(SinkMBB);
17600 FirstMBB->addSuccessor(SinkMBB);
17601 // This is fallthrough.
17602 SecondMBB->addSuccessor(SinkMBB);
17603
17604 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17605 Register FLHS = First.getOperand(1).getReg();
17606 Register FRHS = First.getOperand(2).getReg();
17607 // Insert appropriate branch.
17608 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17609 .addReg(FLHS)
17610 .addReg(FRHS)
17611 .addMBB(SinkMBB);
17612
17613 Register SLHS = Second.getOperand(1).getReg();
17614 Register SRHS = Second.getOperand(2).getReg();
17615 Register Op1Reg4 = First.getOperand(4).getReg();
17616 Register Op1Reg5 = First.getOperand(5).getReg();
17617
17618 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17619 // Insert appropriate branch.
17620 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17621 .addReg(SLHS)
17622 .addReg(SRHS)
17623 .addMBB(SinkMBB);
17624
17625 Register DestReg = Second.getOperand(0).getReg();
17626 Register Op2Reg4 = Second.getOperand(4).getReg();
17627 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17628 .addReg(Op2Reg4)
17629 .addMBB(ThisMBB)
17630 .addReg(Op1Reg4)
17631 .addMBB(FirstMBB)
17632 .addReg(Op1Reg5)
17633 .addMBB(SecondMBB);
17634
17635 // Now remove the Select_FPRX_s.
17636 First.eraseFromParent();
17637 Second.eraseFromParent();
17638 return SinkMBB;
17639}
17640
17643 const RISCVSubtarget &Subtarget) {
17644 // To "insert" Select_* instructions, we actually have to insert the triangle
17645 // control-flow pattern. The incoming instructions know the destination vreg
17646 // to set, the condition code register to branch on, the true/false values to
17647 // select between, and the condcode to use to select the appropriate branch.
17648 //
17649 // We produce the following control flow:
17650 // HeadMBB
17651 // | \
17652 // | IfFalseMBB
17653 // | /
17654 // TailMBB
17655 //
17656 // When we find a sequence of selects we attempt to optimize their emission
17657 // by sharing the control flow. Currently we only handle cases where we have
17658 // multiple selects with the exact same condition (same LHS, RHS and CC).
17659 // The selects may be interleaved with other instructions if the other
17660 // instructions meet some requirements we deem safe:
17661 // - They are not pseudo instructions.
17662 // - They are debug instructions. Otherwise,
17663 // - They do not have side-effects, do not access memory and their inputs do
17664 // not depend on the results of the select pseudo-instructions.
17665 // The TrueV/FalseV operands of the selects cannot depend on the result of
17666 // previous selects in the sequence.
17667 // These conditions could be further relaxed. See the X86 target for a
17668 // related approach and more information.
17669 //
17670 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17671 // is checked here and handled by a separate function -
17672 // EmitLoweredCascadedSelect.
17673 Register LHS = MI.getOperand(1).getReg();
17674 Register RHS = MI.getOperand(2).getReg();
17675 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17676
17677 SmallVector<MachineInstr *, 4> SelectDebugValues;
17678 SmallSet<Register, 4> SelectDests;
17679 SelectDests.insert(MI.getOperand(0).getReg());
17680
17681 MachineInstr *LastSelectPseudo = &MI;
17682 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17683 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17684 Next->getOpcode() == MI.getOpcode() &&
17685 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17686 Next->getOperand(5).isKill()) {
17687 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17688 }
17689
17690 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17691 SequenceMBBI != E; ++SequenceMBBI) {
17692 if (SequenceMBBI->isDebugInstr())
17693 continue;
17694 if (isSelectPseudo(*SequenceMBBI)) {
17695 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17696 SequenceMBBI->getOperand(2).getReg() != RHS ||
17697 SequenceMBBI->getOperand(3).getImm() != CC ||
17698 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17699 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17700 break;
17701 LastSelectPseudo = &*SequenceMBBI;
17702 SequenceMBBI->collectDebugValues(SelectDebugValues);
17703 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17704 continue;
17705 }
17706 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17707 SequenceMBBI->mayLoadOrStore() ||
17708 SequenceMBBI->usesCustomInsertionHook())
17709 break;
17710 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17711 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17712 }))
17713 break;
17714 }
17715
17716 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17717 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17718 DebugLoc DL = MI.getDebugLoc();
17720
17721 MachineBasicBlock *HeadMBB = BB;
17722 MachineFunction *F = BB->getParent();
17723 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17724 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17725
17726 F->insert(I, IfFalseMBB);
17727 F->insert(I, TailMBB);
17728
17729 // Transfer debug instructions associated with the selects to TailMBB.
17730 for (MachineInstr *DebugInstr : SelectDebugValues) {
17731 TailMBB->push_back(DebugInstr->removeFromParent());
17732 }
17733
17734 // Move all instructions after the sequence to TailMBB.
17735 TailMBB->splice(TailMBB->end(), HeadMBB,
17736 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17737 // Update machine-CFG edges by transferring all successors of the current
17738 // block to the new block which will contain the Phi nodes for the selects.
17739 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17740 // Set the successors for HeadMBB.
17741 HeadMBB->addSuccessor(IfFalseMBB);
17742 HeadMBB->addSuccessor(TailMBB);
17743
17744 // Insert appropriate branch.
17745 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17746 .addReg(LHS)
17747 .addReg(RHS)
17748 .addMBB(TailMBB);
17749
17750 // IfFalseMBB just falls through to TailMBB.
17751 IfFalseMBB->addSuccessor(TailMBB);
17752
17753 // Create PHIs for all of the select pseudo-instructions.
17754 auto SelectMBBI = MI.getIterator();
17755 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17756 auto InsertionPoint = TailMBB->begin();
17757 while (SelectMBBI != SelectEnd) {
17758 auto Next = std::next(SelectMBBI);
17759 if (isSelectPseudo(*SelectMBBI)) {
17760 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17761 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17762 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17763 .addReg(SelectMBBI->getOperand(4).getReg())
17764 .addMBB(HeadMBB)
17765 .addReg(SelectMBBI->getOperand(5).getReg())
17766 .addMBB(IfFalseMBB);
17767 SelectMBBI->eraseFromParent();
17768 }
17769 SelectMBBI = Next;
17770 }
17771
17772 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
17773 return TailMBB;
17774}
17775
17778 unsigned CVTXOpc) {
17779 DebugLoc DL = MI.getDebugLoc();
17780
17782
17784 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17785
17786 // Save the old value of FFLAGS.
17787 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17788
17789 assert(MI.getNumOperands() == 7);
17790
17791 // Emit a VFCVT_X_F
17792 const TargetRegisterInfo *TRI =
17794 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
17795 Register Tmp = MRI.createVirtualRegister(RC);
17796 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17797 .add(MI.getOperand(1))
17798 .add(MI.getOperand(2))
17799 .add(MI.getOperand(3))
17800 .add(MachineOperand::CreateImm(7)) // frm = DYN
17801 .add(MI.getOperand(4))
17802 .add(MI.getOperand(5))
17803 .add(MI.getOperand(6))
17804 .add(MachineOperand::CreateReg(RISCV::FRM,
17805 /*IsDef*/ false,
17806 /*IsImp*/ true));
17807
17808 // Emit a VFCVT_F_X
17809 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
17810 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
17811 // There is no E8 variant for VFCVT_F_X.
17812 assert(Log2SEW >= 4);
17813 // Since MI (VFROUND) isn't SEW specific, we cannot use a macro to make
17814 // handling of different (LMUL, SEW) pairs easier because we need to pull the
17815 // SEW immediate from MI, and that information is not avaliable during macro
17816 // expansion.
17817 unsigned CVTFOpc;
17818 if (Log2SEW == 4) {
17819 switch (LMul) {
17820 case RISCVII::LMUL_1:
17821 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E16_MASK;
17822 break;
17823 case RISCVII::LMUL_2:
17824 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E16_MASK;
17825 break;
17826 case RISCVII::LMUL_4:
17827 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E16_MASK;
17828 break;
17829 case RISCVII::LMUL_8:
17830 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E16_MASK;
17831 break;
17832 case RISCVII::LMUL_F2:
17833 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E16_MASK;
17834 break;
17835 case RISCVII::LMUL_F4:
17836 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF4_E16_MASK;
17837 break;
17838 case RISCVII::LMUL_F8:
17840 llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
17841 }
17842 } else if (Log2SEW == 5) {
17843 switch (LMul) {
17844 case RISCVII::LMUL_1:
17845 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E32_MASK;
17846 break;
17847 case RISCVII::LMUL_2:
17848 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E32_MASK;
17849 break;
17850 case RISCVII::LMUL_4:
17851 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E32_MASK;
17852 break;
17853 case RISCVII::LMUL_8:
17854 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E32_MASK;
17855 break;
17856 case RISCVII::LMUL_F2:
17857 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E32_MASK;
17858 break;
17859 case RISCVII::LMUL_F4:
17860 case RISCVII::LMUL_F8:
17862 llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
17863 }
17864 } else if (Log2SEW == 6) {
17865 switch (LMul) {
17866 case RISCVII::LMUL_1:
17867 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E64_MASK;
17868 break;
17869 case RISCVII::LMUL_2:
17870 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E64_MASK;
17871 break;
17872 case RISCVII::LMUL_4:
17873 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E64_MASK;
17874 break;
17875 case RISCVII::LMUL_8:
17876 CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E64_MASK;
17877 break;
17878 case RISCVII::LMUL_F2:
17879 case RISCVII::LMUL_F4:
17880 case RISCVII::LMUL_F8:
17882 llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
17883 }
17884 } else {
17885 llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
17886 }
17887
17888 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17889 .add(MI.getOperand(0))
17890 .add(MI.getOperand(1))
17891 .addReg(Tmp)
17892 .add(MI.getOperand(3))
17893 .add(MachineOperand::CreateImm(7)) // frm = DYN
17894 .add(MI.getOperand(4))
17895 .add(MI.getOperand(5))
17896 .add(MI.getOperand(6))
17897 .add(MachineOperand::CreateReg(RISCV::FRM,
17898 /*IsDef*/ false,
17899 /*IsImp*/ true));
17900
17901 // Restore FFLAGS.
17902 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17903 .addReg(SavedFFLAGS, RegState::Kill);
17904
17905 // Erase the pseudoinstruction.
17906 MI.eraseFromParent();
17907 return BB;
17908}
17909
17911 const RISCVSubtarget &Subtarget) {
17912 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17913 const TargetRegisterClass *RC;
17914 switch (MI.getOpcode()) {
17915 default:
17916 llvm_unreachable("Unexpected opcode");
17917 case RISCV::PseudoFROUND_H:
17918 CmpOpc = RISCV::FLT_H;
17919 F2IOpc = RISCV::FCVT_W_H;
17920 I2FOpc = RISCV::FCVT_H_W;
17921 FSGNJOpc = RISCV::FSGNJ_H;
17922 FSGNJXOpc = RISCV::FSGNJX_H;
17923 RC = &RISCV::FPR16RegClass;
17924 break;
17925 case RISCV::PseudoFROUND_H_INX:
17926 CmpOpc = RISCV::FLT_H_INX;
17927 F2IOpc = RISCV::FCVT_W_H_INX;
17928 I2FOpc = RISCV::FCVT_H_W_INX;
17929 FSGNJOpc = RISCV::FSGNJ_H_INX;
17930 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17931 RC = &RISCV::GPRF16RegClass;
17932 break;
17933 case RISCV::PseudoFROUND_S:
17934 CmpOpc = RISCV::FLT_S;
17935 F2IOpc = RISCV::FCVT_W_S;
17936 I2FOpc = RISCV::FCVT_S_W;
17937 FSGNJOpc = RISCV::FSGNJ_S;
17938 FSGNJXOpc = RISCV::FSGNJX_S;
17939 RC = &RISCV::FPR32RegClass;
17940 break;
17941 case RISCV::PseudoFROUND_S_INX:
17942 CmpOpc = RISCV::FLT_S_INX;
17943 F2IOpc = RISCV::FCVT_W_S_INX;
17944 I2FOpc = RISCV::FCVT_S_W_INX;
17945 FSGNJOpc = RISCV::FSGNJ_S_INX;
17946 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17947 RC = &RISCV::GPRF32RegClass;
17948 break;
17949 case RISCV::PseudoFROUND_D:
17950 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17951 CmpOpc = RISCV::FLT_D;
17952 F2IOpc = RISCV::FCVT_L_D;
17953 I2FOpc = RISCV::FCVT_D_L;
17954 FSGNJOpc = RISCV::FSGNJ_D;
17955 FSGNJXOpc = RISCV::FSGNJX_D;
17956 RC = &RISCV::FPR64RegClass;
17957 break;
17958 case RISCV::PseudoFROUND_D_INX:
17959 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17960 CmpOpc = RISCV::FLT_D_INX;
17961 F2IOpc = RISCV::FCVT_L_D_INX;
17962 I2FOpc = RISCV::FCVT_D_L_INX;
17963 FSGNJOpc = RISCV::FSGNJ_D_INX;
17964 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17965 RC = &RISCV::GPRRegClass;
17966 break;
17967 }
17968
17969 const BasicBlock *BB = MBB->getBasicBlock();
17970 DebugLoc DL = MI.getDebugLoc();
17972
17974 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17975 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17976
17977 F->insert(I, CvtMBB);
17978 F->insert(I, DoneMBB);
17979 // Move all instructions after the sequence to DoneMBB.
17980 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
17981 MBB->end());
17982 // Update machine-CFG edges by transferring all successors of the current
17983 // block to the new block which will contain the Phi nodes for the selects.
17985 // Set the successors for MBB.
17986 MBB->addSuccessor(CvtMBB);
17987 MBB->addSuccessor(DoneMBB);
17988
17989 Register DstReg = MI.getOperand(0).getReg();
17990 Register SrcReg = MI.getOperand(1).getReg();
17991 Register MaxReg = MI.getOperand(2).getReg();
17992 int64_t FRM = MI.getOperand(3).getImm();
17993
17994 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17996
17997 Register FabsReg = MRI.createVirtualRegister(RC);
17998 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17999
18000 // Compare the FP value to the max value.
18001 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18002 auto MIB =
18003 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18006
18007 // Insert branch.
18008 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18009 .addReg(CmpReg)
18010 .addReg(RISCV::X0)
18011 .addMBB(DoneMBB);
18012
18013 CvtMBB->addSuccessor(DoneMBB);
18014
18015 // Convert to integer.
18016 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18017 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18020
18021 // Convert back to FP.
18022 Register I2FReg = MRI.createVirtualRegister(RC);
18023 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18026
18027 // Restore the sign bit.
18028 Register CvtReg = MRI.createVirtualRegister(RC);
18029 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18030
18031 // Merge the results.
18032 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18033 .addReg(SrcReg)
18034 .addMBB(MBB)
18035 .addReg(CvtReg)
18036 .addMBB(CvtMBB);
18037
18038 MI.eraseFromParent();
18039 return DoneMBB;
18040}
18041
18044 MachineBasicBlock *BB) const {
18045 switch (MI.getOpcode()) {
18046 default:
18047 llvm_unreachable("Unexpected instr type to insert");
18048 case RISCV::ReadCounterWide:
18049 assert(!Subtarget.is64Bit() &&
18050 "ReadCounterWide is only to be used on riscv32");
18051 return emitReadCounterWidePseudo(MI, BB);
18052 case RISCV::Select_GPR_Using_CC_GPR:
18053 case RISCV::Select_FPR16_Using_CC_GPR:
18054 case RISCV::Select_FPR16INX_Using_CC_GPR:
18055 case RISCV::Select_FPR32_Using_CC_GPR:
18056 case RISCV::Select_FPR32INX_Using_CC_GPR:
18057 case RISCV::Select_FPR64_Using_CC_GPR:
18058 case RISCV::Select_FPR64INX_Using_CC_GPR:
18059 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18060 return emitSelectPseudo(MI, BB, Subtarget);
18061 case RISCV::BuildPairF64Pseudo:
18062 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18063 case RISCV::SplitF64Pseudo:
18064 return emitSplitF64Pseudo(MI, BB, Subtarget);
18065 case RISCV::PseudoQuietFLE_H:
18066 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18067 case RISCV::PseudoQuietFLE_H_INX:
18068 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18069 case RISCV::PseudoQuietFLT_H:
18070 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18071 case RISCV::PseudoQuietFLT_H_INX:
18072 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18073 case RISCV::PseudoQuietFLE_S:
18074 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18075 case RISCV::PseudoQuietFLE_S_INX:
18076 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18077 case RISCV::PseudoQuietFLT_S:
18078 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18079 case RISCV::PseudoQuietFLT_S_INX:
18080 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18081 case RISCV::PseudoQuietFLE_D:
18082 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18083 case RISCV::PseudoQuietFLE_D_INX:
18084 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18085 case RISCV::PseudoQuietFLE_D_IN32X:
18086 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18087 Subtarget);
18088 case RISCV::PseudoQuietFLT_D:
18089 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18090 case RISCV::PseudoQuietFLT_D_INX:
18091 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18092 case RISCV::PseudoQuietFLT_D_IN32X:
18093 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18094 Subtarget);
18095
18096 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18097 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18098 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18099 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18100 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18101 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18102 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18103 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18104 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18105 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18106 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18107 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18108 case RISCV::PseudoFROUND_H:
18109 case RISCV::PseudoFROUND_H_INX:
18110 case RISCV::PseudoFROUND_S:
18111 case RISCV::PseudoFROUND_S_INX:
18112 case RISCV::PseudoFROUND_D:
18113 case RISCV::PseudoFROUND_D_INX:
18114 case RISCV::PseudoFROUND_D_IN32X:
18115 return emitFROUND(MI, BB, Subtarget);
18116 case TargetOpcode::STATEPOINT:
18117 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18118 // while jal call instruction (where statepoint will be lowered at the end)
18119 // has implicit def. This def is early-clobber as it will be set at
18120 // the moment of the call and earlier than any use is read.
18121 // Add this implicit dead def here as a workaround.
18122 MI.addOperand(*MI.getMF(),
18124 RISCV::X1, /*isDef*/ true,
18125 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18126 /*isUndef*/ false, /*isEarlyClobber*/ true));
18127 [[fallthrough]];
18128 case TargetOpcode::STACKMAP:
18129 case TargetOpcode::PATCHPOINT:
18130 if (!Subtarget.is64Bit())
18131 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18132 "supported on 64-bit targets");
18133 return emitPatchPoint(MI, BB);
18134 }
18135}
18136
18138 SDNode *Node) const {
18139 // Add FRM dependency to any instructions with dynamic rounding mode.
18140 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18141 if (Idx < 0) {
18142 // Vector pseudos have FRM index indicated by TSFlags.
18143 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18144 if (Idx < 0)
18145 return;
18146 }
18147 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18148 return;
18149 // If the instruction already reads FRM, don't add another read.
18150 if (MI.readsRegister(RISCV::FRM))
18151 return;
18152 MI.addOperand(
18153 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18154}
18155
18156// Calling Convention Implementation.
18157// The expectations for frontend ABI lowering vary from target to target.
18158// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18159// details, but this is a longer term goal. For now, we simply try to keep the
18160// role of the frontend as simple and well-defined as possible. The rules can
18161// be summarised as:
18162// * Never split up large scalar arguments. We handle them here.
18163// * If a hardfloat calling convention is being used, and the struct may be
18164// passed in a pair of registers (fp+fp, int+fp), and both registers are
18165// available, then pass as two separate arguments. If either the GPRs or FPRs
18166// are exhausted, then pass according to the rule below.
18167// * If a struct could never be passed in registers or directly in a stack
18168// slot (as it is larger than 2*XLEN and the floating point rules don't
18169// apply), then pass it using a pointer with the byval attribute.
18170// * If a struct is less than 2*XLEN, then coerce to either a two-element
18171// word-sized array or a 2*XLEN scalar (depending on alignment).
18172// * The frontend can determine whether a struct is returned by reference or
18173// not based on its size and fields. If it will be returned by reference, the
18174// frontend must modify the prototype so a pointer with the sret annotation is
18175// passed as the first argument. This is not necessary for large scalar
18176// returns.
18177// * Struct return values and varargs should be coerced to structs containing
18178// register-size fields in the same situations they would be for fixed
18179// arguments.
18180
18181static const MCPhysReg ArgFPR16s[] = {
18182 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18183 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18184};
18185static const MCPhysReg ArgFPR32s[] = {
18186 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18187 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18188};
18189static const MCPhysReg ArgFPR64s[] = {
18190 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18191 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18192};
18193// This is an interim calling convention and it may be changed in the future.
18194static const MCPhysReg ArgVRs[] = {
18195 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18196 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18197 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18198static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18199 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18200 RISCV::V20M2, RISCV::V22M2};
18201static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18202 RISCV::V20M4};
18203static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18204
18206 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18207 // the ILP32E ABI.
18208 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18209 RISCV::X13, RISCV::X14, RISCV::X15,
18210 RISCV::X16, RISCV::X17};
18211 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18212 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18213 RISCV::X13, RISCV::X14, RISCV::X15};
18214
18215 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18216 return ArrayRef(ArgEGPRs);
18217
18218 return ArrayRef(ArgIGPRs);
18219}
18220
18222 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18223 // for save-restore libcall, so we don't use them.
18224 static const MCPhysReg FastCCIGPRs[] = {
18225 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18226 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18227 RISCV::X29, RISCV::X30, RISCV::X31};
18228
18229 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18230 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18231 RISCV::X13, RISCV::X14, RISCV::X15,
18232 RISCV::X7};
18233
18234 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18235 return ArrayRef(FastCCEGPRs);
18236
18237 return ArrayRef(FastCCIGPRs);
18238}
18239
18240// Pass a 2*XLEN argument that has been split into two XLEN values through
18241// registers or the stack as necessary.
18242static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18243 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18244 MVT ValVT2, MVT LocVT2,
18245 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18246 unsigned XLenInBytes = XLen / 8;
18247 const RISCVSubtarget &STI =
18250
18251 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18252 // At least one half can be passed via register.
18253 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18254 VA1.getLocVT(), CCValAssign::Full));
18255 } else {
18256 // Both halves must be passed on the stack, with proper alignment.
18257 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18258 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18259 Align StackAlign(XLenInBytes);
18260 if (!EABI || XLen != 32)
18261 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18262 State.addLoc(
18264 State.AllocateStack(XLenInBytes, StackAlign),
18265 VA1.getLocVT(), CCValAssign::Full));
18267 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18268 LocVT2, CCValAssign::Full));
18269 return false;
18270 }
18271
18272 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18273 // The second half can also be passed via register.
18274 State.addLoc(
18275 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18276 } else {
18277 // The second half is passed via the stack, without additional alignment.
18279 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18280 LocVT2, CCValAssign::Full));
18281 }
18282
18283 return false;
18284}
18285
18286// Implements the RISC-V calling convention. Returns true upon failure.
18287bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18288 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18289 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18290 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18291 RVVArgDispatcher &RVVDispatcher) {
18292 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18293 assert(XLen == 32 || XLen == 64);
18294 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18295
18296 // Static chain parameter must not be passed in normal argument registers,
18297 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18298 if (ArgFlags.isNest()) {
18299 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18300 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18301 return false;
18302 }
18303 }
18304
18305 // Any return value split in to more than two values can't be returned
18306 // directly. Vectors are returned via the available vector registers.
18307 if (!LocVT.isVector() && IsRet && ValNo > 1)
18308 return true;
18309
18310 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18311 // variadic argument, or if no F16/F32 argument registers are available.
18312 bool UseGPRForF16_F32 = true;
18313 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18314 // variadic argument, or if no F64 argument registers are available.
18315 bool UseGPRForF64 = true;
18316
18317 switch (ABI) {
18318 default:
18319 llvm_unreachable("Unexpected ABI");
18322 case RISCVABI::ABI_LP64:
18324 break;
18327 UseGPRForF16_F32 = !IsFixed;
18328 break;
18331 UseGPRForF16_F32 = !IsFixed;
18332 UseGPRForF64 = !IsFixed;
18333 break;
18334 }
18335
18336 // FPR16, FPR32, and FPR64 alias each other.
18337 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18338 UseGPRForF16_F32 = true;
18339 UseGPRForF64 = true;
18340 }
18341
18342 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18343 // similar local variables rather than directly checking against the target
18344 // ABI.
18345
18346 if (UseGPRForF16_F32 &&
18347 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18348 LocVT = XLenVT;
18349 LocInfo = CCValAssign::BCvt;
18350 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18351 LocVT = MVT::i64;
18352 LocInfo = CCValAssign::BCvt;
18353 }
18354
18356
18357 // If this is a variadic argument, the RISC-V calling convention requires
18358 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18359 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18360 // be used regardless of whether the original argument was split during
18361 // legalisation or not. The argument will not be passed by registers if the
18362 // original type is larger than 2*XLEN, so the register alignment rule does
18363 // not apply.
18364 // TODO: To be compatible with GCC's behaviors, we don't align registers
18365 // currently if we are using ILP32E calling convention. This behavior may be
18366 // changed when RV32E/ILP32E is ratified.
18367 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18368 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18369 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18370 ABI != RISCVABI::ABI_ILP32E) {
18371 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18372 // Skip 'odd' register if necessary.
18373 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18374 State.AllocateReg(ArgGPRs);
18375 }
18376
18377 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18378 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18379 State.getPendingArgFlags();
18380
18381 assert(PendingLocs.size() == PendingArgFlags.size() &&
18382 "PendingLocs and PendingArgFlags out of sync");
18383
18384 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18385 // registers are exhausted.
18386 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18387 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18388 // Depending on available argument GPRS, f64 may be passed in a pair of
18389 // GPRs, split between a GPR and the stack, or passed completely on the
18390 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18391 // cases.
18392 Register Reg = State.AllocateReg(ArgGPRs);
18393 if (!Reg) {
18394 unsigned StackOffset = State.AllocateStack(8, Align(8));
18395 State.addLoc(
18396 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18397 return false;
18398 }
18399 LocVT = MVT::i32;
18400 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18401 Register HiReg = State.AllocateReg(ArgGPRs);
18402 if (HiReg) {
18403 State.addLoc(
18404 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18405 } else {
18406 unsigned StackOffset = State.AllocateStack(4, Align(4));
18407 State.addLoc(
18408 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18409 }
18410 return false;
18411 }
18412
18413 // Fixed-length vectors are located in the corresponding scalable-vector
18414 // container types.
18415 if (ValVT.isFixedLengthVector())
18416 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18417
18418 // Split arguments might be passed indirectly, so keep track of the pending
18419 // values. Split vectors are passed via a mix of registers and indirectly, so
18420 // treat them as we would any other argument.
18421 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18422 LocVT = XLenVT;
18423 LocInfo = CCValAssign::Indirect;
18424 PendingLocs.push_back(
18425 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18426 PendingArgFlags.push_back(ArgFlags);
18427 if (!ArgFlags.isSplitEnd()) {
18428 return false;
18429 }
18430 }
18431
18432 // If the split argument only had two elements, it should be passed directly
18433 // in registers or on the stack.
18434 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18435 PendingLocs.size() <= 2) {
18436 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18437 // Apply the normal calling convention rules to the first half of the
18438 // split argument.
18439 CCValAssign VA = PendingLocs[0];
18440 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18441 PendingLocs.clear();
18442 PendingArgFlags.clear();
18443 return CC_RISCVAssign2XLen(
18444 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18445 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18446 }
18447
18448 // Allocate to a register if possible, or else a stack slot.
18449 Register Reg;
18450 unsigned StoreSizeBytes = XLen / 8;
18451 Align StackAlign = Align(XLen / 8);
18452
18453 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18454 Reg = State.AllocateReg(ArgFPR16s);
18455 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18456 Reg = State.AllocateReg(ArgFPR32s);
18457 else if (ValVT == MVT::f64 && !UseGPRForF64)
18458 Reg = State.AllocateReg(ArgFPR64s);
18459 else if (ValVT.isVector()) {
18460 Reg = RVVDispatcher.getNextPhysReg();
18461 if (!Reg) {
18462 // For return values, the vector must be passed fully via registers or
18463 // via the stack.
18464 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18465 // but we're using all of them.
18466 if (IsRet)
18467 return true;
18468 // Try using a GPR to pass the address
18469 if ((Reg = State.AllocateReg(ArgGPRs))) {
18470 LocVT = XLenVT;
18471 LocInfo = CCValAssign::Indirect;
18472 } else if (ValVT.isScalableVector()) {
18473 LocVT = XLenVT;
18474 LocInfo = CCValAssign::Indirect;
18475 } else {
18476 // Pass fixed-length vectors on the stack.
18477 LocVT = ValVT;
18478 StoreSizeBytes = ValVT.getStoreSize();
18479 // Align vectors to their element sizes, being careful for vXi1
18480 // vectors.
18481 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18482 }
18483 }
18484 } else {
18485 Reg = State.AllocateReg(ArgGPRs);
18486 }
18487
18488 unsigned StackOffset =
18489 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18490
18491 // If we reach this point and PendingLocs is non-empty, we must be at the
18492 // end of a split argument that must be passed indirectly.
18493 if (!PendingLocs.empty()) {
18494 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18495 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18496
18497 for (auto &It : PendingLocs) {
18498 if (Reg)
18499 It.convertToReg(Reg);
18500 else
18501 It.convertToMem(StackOffset);
18502 State.addLoc(It);
18503 }
18504 PendingLocs.clear();
18505 PendingArgFlags.clear();
18506 return false;
18507 }
18508
18509 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18510 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18511 "Expected an XLenVT or vector types at this stage");
18512
18513 if (Reg) {
18514 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18515 return false;
18516 }
18517
18518 // When a scalar floating-point value is passed on the stack, no
18519 // bit-conversion is needed.
18520 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18521 assert(!ValVT.isVector());
18522 LocVT = ValVT;
18523 LocInfo = CCValAssign::Full;
18524 }
18525 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18526 return false;
18527}
18528
18529template <typename ArgTy>
18530static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18531 for (const auto &ArgIdx : enumerate(Args)) {
18532 MVT ArgVT = ArgIdx.value().VT;
18533 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18534 return ArgIdx.index();
18535 }
18536 return std::nullopt;
18537}
18538
18539void RISCVTargetLowering::analyzeInputArgs(
18540 MachineFunction &MF, CCState &CCInfo,
18541 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18542 RISCVCCAssignFn Fn) const {
18543 unsigned NumArgs = Ins.size();
18545
18546 RVVArgDispatcher Dispatcher;
18547 if (IsRet) {
18548 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18549 } else {
18550 SmallVector<Type *, 4> TypeList;
18551 for (const Argument &Arg : MF.getFunction().args())
18552 TypeList.push_back(Arg.getType());
18553 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18554 }
18555
18556 for (unsigned i = 0; i != NumArgs; ++i) {
18557 MVT ArgVT = Ins[i].VT;
18558 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18559
18560 Type *ArgTy = nullptr;
18561 if (IsRet)
18562 ArgTy = FType->getReturnType();
18563 else if (Ins[i].isOrigArg())
18564 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18565
18567 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18568 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18569 Dispatcher)) {
18570 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18571 << ArgVT << '\n');
18572 llvm_unreachable(nullptr);
18573 }
18574 }
18575}
18576
18577void RISCVTargetLowering::analyzeOutputArgs(
18578 MachineFunction &MF, CCState &CCInfo,
18579 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18580 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18581 unsigned NumArgs = Outs.size();
18582
18583 SmallVector<Type *, 4> TypeList;
18584 if (IsRet)
18585 TypeList.push_back(MF.getFunction().getReturnType());
18586 else if (CLI)
18587 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18588 TypeList.push_back(Arg.Ty);
18589 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18590
18591 for (unsigned i = 0; i != NumArgs; i++) {
18592 MVT ArgVT = Outs[i].VT;
18593 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18594 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18595
18597 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18598 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18599 Dispatcher)) {
18600 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18601 << ArgVT << "\n");
18602 llvm_unreachable(nullptr);
18603 }
18604 }
18605}
18606
18607// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18608// values.
18610 const CCValAssign &VA, const SDLoc &DL,
18611 const RISCVSubtarget &Subtarget) {
18612 switch (VA.getLocInfo()) {
18613 default:
18614 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18615 case CCValAssign::Full:
18617 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18618 break;
18619 case CCValAssign::BCvt:
18620 if (VA.getLocVT().isInteger() &&
18621 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18622 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18623 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18624 if (RV64LegalI32) {
18625 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18626 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18627 } else {
18628 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18629 }
18630 } else {
18631 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18632 }
18633 break;
18634 }
18635 return Val;
18636}
18637
18638// The caller is responsible for loading the full value if the argument is
18639// passed with CCValAssign::Indirect.
18641 const CCValAssign &VA, const SDLoc &DL,
18642 const ISD::InputArg &In,
18643 const RISCVTargetLowering &TLI) {
18646 EVT LocVT = VA.getLocVT();
18647 SDValue Val;
18648 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18649 Register VReg = RegInfo.createVirtualRegister(RC);
18650 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18651 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18652
18653 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18654 if (In.isOrigArg()) {
18655 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18656 if (OrigArg->getType()->isIntegerTy()) {
18657 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18658 // An input zero extended from i31 can also be considered sign extended.
18659 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18660 (BitWidth < 32 && In.Flags.isZExt())) {
18662 RVFI->addSExt32Register(VReg);
18663 }
18664 }
18665 }
18666
18668 return Val;
18669
18670 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18671}
18672
18674 const CCValAssign &VA, const SDLoc &DL,
18675 const RISCVSubtarget &Subtarget) {
18676 EVT LocVT = VA.getLocVT();
18677
18678 switch (VA.getLocInfo()) {
18679 default:
18680 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18681 case CCValAssign::Full:
18682 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18683 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18684 break;
18685 case CCValAssign::BCvt:
18686 if (LocVT.isInteger() &&
18687 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18688 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18689 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18690 if (RV64LegalI32) {
18691 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18692 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18693 } else {
18694 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18695 }
18696 } else {
18697 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18698 }
18699 break;
18700 }
18701 return Val;
18702}
18703
18704// The caller is responsible for loading the full value if the argument is
18705// passed with CCValAssign::Indirect.
18707 const CCValAssign &VA, const SDLoc &DL) {
18709 MachineFrameInfo &MFI = MF.getFrameInfo();
18710 EVT LocVT = VA.getLocVT();
18711 EVT ValVT = VA.getValVT();
18713 if (ValVT.isScalableVector()) {
18714 // When the value is a scalable vector, we save the pointer which points to
18715 // the scalable vector value in the stack. The ValVT will be the pointer
18716 // type, instead of the scalable vector type.
18717 ValVT = LocVT;
18718 }
18719 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18720 /*IsImmutable=*/true);
18721 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18722 SDValue Val;
18723
18724 ISD::LoadExtType ExtType;
18725 switch (VA.getLocInfo()) {
18726 default:
18727 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18728 case CCValAssign::Full:
18730 case CCValAssign::BCvt:
18731 ExtType = ISD::NON_EXTLOAD;
18732 break;
18733 }
18734 Val = DAG.getExtLoad(
18735 ExtType, DL, LocVT, Chain, FIN,
18737 return Val;
18738}
18739
18741 const CCValAssign &VA,
18742 const CCValAssign &HiVA,
18743 const SDLoc &DL) {
18744 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18745 "Unexpected VA");
18747 MachineFrameInfo &MFI = MF.getFrameInfo();
18749
18750 assert(VA.isRegLoc() && "Expected register VA assignment");
18751
18752 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18753 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18754 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18755 SDValue Hi;
18756 if (HiVA.isMemLoc()) {
18757 // Second half of f64 is passed on the stack.
18758 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18759 /*IsImmutable=*/true);
18760 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18761 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18763 } else {
18764 // Second half of f64 is passed in another GPR.
18765 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18766 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18767 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18768 }
18769 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18770}
18771
18772// FastCC has less than 1% performance improvement for some particular
18773// benchmark. But theoretically, it may has benenfit for some cases.
18775 unsigned ValNo, MVT ValVT, MVT LocVT,
18776 CCValAssign::LocInfo LocInfo,
18777 ISD::ArgFlagsTy ArgFlags, CCState &State,
18778 bool IsFixed, bool IsRet, Type *OrigTy,
18779 const RISCVTargetLowering &TLI,
18780 RVVArgDispatcher &RVVDispatcher) {
18781 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18782 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18783 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18784 return false;
18785 }
18786 }
18787
18788 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18789
18790 if (LocVT == MVT::f16 &&
18791 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18792 static const MCPhysReg FPR16List[] = {
18793 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18794 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18795 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18796 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18797 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18798 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18799 return false;
18800 }
18801 }
18802
18803 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18804 static const MCPhysReg FPR32List[] = {
18805 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18806 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18807 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18808 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18809 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18810 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18811 return false;
18812 }
18813 }
18814
18815 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18816 static const MCPhysReg FPR64List[] = {
18817 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18818 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18819 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18820 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18821 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18822 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18823 return false;
18824 }
18825 }
18826
18827 // Check if there is an available GPR before hitting the stack.
18828 if ((LocVT == MVT::f16 &&
18829 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18830 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18831 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18832 Subtarget.hasStdExtZdinx())) {
18833 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18834 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18835 return false;
18836 }
18837 }
18838
18839 if (LocVT == MVT::f16) {
18840 unsigned Offset2 = State.AllocateStack(2, Align(2));
18841 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
18842 return false;
18843 }
18844
18845 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18846 unsigned Offset4 = State.AllocateStack(4, Align(4));
18847 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
18848 return false;
18849 }
18850
18851 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18852 unsigned Offset5 = State.AllocateStack(8, Align(8));
18853 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
18854 return false;
18855 }
18856
18857 if (LocVT.isVector()) {
18858 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18859 if (AllocatedVReg) {
18860 // Fixed-length vectors are located in the corresponding scalable-vector
18861 // container types.
18862 if (ValVT.isFixedLengthVector())
18863 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18864 State.addLoc(
18865 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
18866 } else {
18867 // Try and pass the address via a "fast" GPR.
18868 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18869 LocInfo = CCValAssign::Indirect;
18870 LocVT = TLI.getSubtarget().getXLenVT();
18871 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
18872 } else if (ValVT.isFixedLengthVector()) {
18873 auto StackAlign =
18875 unsigned StackOffset =
18876 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18877 State.addLoc(
18878 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18879 } else {
18880 // Can't pass scalable vectors on the stack.
18881 return true;
18882 }
18883 }
18884
18885 return false;
18886 }
18887
18888 return true; // CC didn't match.
18889}
18890
18891bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18892 CCValAssign::LocInfo LocInfo,
18893 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18894 if (ArgFlags.isNest()) {
18896 "Attribute 'nest' is not supported in GHC calling convention");
18897 }
18898
18899 static const MCPhysReg GPRList[] = {
18900 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18901 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18902
18903 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18904 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18905 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18906 if (unsigned Reg = State.AllocateReg(GPRList)) {
18907 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18908 return false;
18909 }
18910 }
18911
18912 const RISCVSubtarget &Subtarget =
18914
18915 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18916 // Pass in STG registers: F1, ..., F6
18917 // fs0 ... fs5
18918 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18919 RISCV::F18_F, RISCV::F19_F,
18920 RISCV::F20_F, RISCV::F21_F};
18921 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18922 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18923 return false;
18924 }
18925 }
18926
18927 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18928 // Pass in STG registers: D1, ..., D6
18929 // fs6 ... fs11
18930 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18931 RISCV::F24_D, RISCV::F25_D,
18932 RISCV::F26_D, RISCV::F27_D};
18933 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18934 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18935 return false;
18936 }
18937 }
18938
18939 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18940 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18941 Subtarget.is64Bit())) {
18942 if (unsigned Reg = State.AllocateReg(GPRList)) {
18943 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18944 return false;
18945 }
18946 }
18947
18948 report_fatal_error("No registers left in GHC calling convention");
18949 return true;
18950}
18951
18952// Transform physical registers into virtual registers.
18954 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18955 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18956 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18957
18959
18960 switch (CallConv) {
18961 default:
18962 report_fatal_error("Unsupported calling convention");
18963 case CallingConv::C:
18964 case CallingConv::Fast:
18966 case CallingConv::GRAAL:
18968 break;
18969 case CallingConv::GHC:
18970 if (Subtarget.hasStdExtE())
18971 report_fatal_error("GHC calling convention is not supported on RVE!");
18972 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18973 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18974 "(Zdinx/D) instruction set extensions");
18975 }
18976
18977 const Function &Func = MF.getFunction();
18978 if (Func.hasFnAttribute("interrupt")) {
18979 if (!Func.arg_empty())
18981 "Functions with the interrupt attribute cannot have arguments!");
18982
18983 StringRef Kind =
18984 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18985
18986 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18988 "Function interrupt attribute argument not supported!");
18989 }
18990
18991 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18992 MVT XLenVT = Subtarget.getXLenVT();
18993 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18994 // Used with vargs to acumulate store chains.
18995 std::vector<SDValue> OutChains;
18996
18997 // Assign locations to all of the incoming arguments.
18999 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19000
19001 if (CallConv == CallingConv::GHC)
19003 else
19004 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19006 : RISCV::CC_RISCV);
19007
19008 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19009 CCValAssign &VA = ArgLocs[i];
19010 SDValue ArgValue;
19011 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19012 // case.
19013 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19014 assert(VA.needsCustom());
19015 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19016 } else if (VA.isRegLoc())
19017 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19018 else
19019 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19020
19021 if (VA.getLocInfo() == CCValAssign::Indirect) {
19022 // If the original argument was split and passed by reference (e.g. i128
19023 // on RV32), we need to load all parts of it here (using the same
19024 // address). Vectors may be partly split to registers and partly to the
19025 // stack, in which case the base address is partly offset and subsequent
19026 // stores are relative to that.
19027 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19029 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19030 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19031 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19032 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19033 CCValAssign &PartVA = ArgLocs[i + 1];
19034 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19035 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19036 if (PartVA.getValVT().isScalableVector())
19037 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19038 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19039 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19041 ++i;
19042 ++InsIdx;
19043 }
19044 continue;
19045 }
19046 InVals.push_back(ArgValue);
19047 }
19048
19049 if (any_of(ArgLocs,
19050 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19051 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19052
19053 if (IsVarArg) {
19054 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19055 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19056 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19057 MachineFrameInfo &MFI = MF.getFrameInfo();
19058 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19060
19061 // Size of the vararg save area. For now, the varargs save area is either
19062 // zero or large enough to hold a0-a7.
19063 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19064 int FI;
19065
19066 // If all registers are allocated, then all varargs must be passed on the
19067 // stack and we don't need to save any argregs.
19068 if (VarArgsSaveSize == 0) {
19069 int VaArgOffset = CCInfo.getStackSize();
19070 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19071 } else {
19072 int VaArgOffset = -VarArgsSaveSize;
19073 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19074
19075 // If saving an odd number of registers then create an extra stack slot to
19076 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19077 // offsets to even-numbered registered remain 2*XLEN-aligned.
19078 if (Idx % 2) {
19080 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19081 VarArgsSaveSize += XLenInBytes;
19082 }
19083
19084 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19085
19086 // Copy the integer registers that may have been used for passing varargs
19087 // to the vararg save area.
19088 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19089 const Register Reg = RegInfo.createVirtualRegister(RC);
19090 RegInfo.addLiveIn(ArgRegs[I], Reg);
19091 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19092 SDValue Store = DAG.getStore(
19093 Chain, DL, ArgValue, FIN,
19094 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19095 OutChains.push_back(Store);
19096 FIN =
19097 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19098 }
19099 }
19100
19101 // Record the frame index of the first variable argument
19102 // which is a value necessary to VASTART.
19103 RVFI->setVarArgsFrameIndex(FI);
19104 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19105 }
19106
19107 // All stores are grouped in one node to allow the matching between
19108 // the size of Ins and InVals. This only happens for vararg functions.
19109 if (!OutChains.empty()) {
19110 OutChains.push_back(Chain);
19111 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19112 }
19113
19114 return Chain;
19115}
19116
19117/// isEligibleForTailCallOptimization - Check whether the call is eligible
19118/// for tail call optimization.
19119/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19120bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19121 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19122 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19123
19124 auto CalleeCC = CLI.CallConv;
19125 auto &Outs = CLI.Outs;
19126 auto &Caller = MF.getFunction();
19127 auto CallerCC = Caller.getCallingConv();
19128
19129 // Exception-handling functions need a special set of instructions to
19130 // indicate a return to the hardware. Tail-calling another function would
19131 // probably break this.
19132 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19133 // should be expanded as new function attributes are introduced.
19134 if (Caller.hasFnAttribute("interrupt"))
19135 return false;
19136
19137 // Do not tail call opt if the stack is used to pass parameters.
19138 if (CCInfo.getStackSize() != 0)
19139 return false;
19140
19141 // Do not tail call opt if any parameters need to be passed indirectly.
19142 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19143 // passed indirectly. So the address of the value will be passed in a
19144 // register, or if not available, then the address is put on the stack. In
19145 // order to pass indirectly, space on the stack often needs to be allocated
19146 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19147 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19148 // are passed CCValAssign::Indirect.
19149 for (auto &VA : ArgLocs)
19150 if (VA.getLocInfo() == CCValAssign::Indirect)
19151 return false;
19152
19153 // Do not tail call opt if either caller or callee uses struct return
19154 // semantics.
19155 auto IsCallerStructRet = Caller.hasStructRetAttr();
19156 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19157 if (IsCallerStructRet || IsCalleeStructRet)
19158 return false;
19159
19160 // The callee has to preserve all registers the caller needs to preserve.
19161 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19162 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19163 if (CalleeCC != CallerCC) {
19164 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19165 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19166 return false;
19167 }
19168
19169 // Byval parameters hand the function a pointer directly into the stack area
19170 // we want to reuse during a tail call. Working around this *is* possible
19171 // but less efficient and uglier in LowerCall.
19172 for (auto &Arg : Outs)
19173 if (Arg.Flags.isByVal())
19174 return false;
19175
19176 return true;
19177}
19178
19180 return DAG.getDataLayout().getPrefTypeAlign(
19181 VT.getTypeForEVT(*DAG.getContext()));
19182}
19183
19184// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19185// and output parameter nodes.
19187 SmallVectorImpl<SDValue> &InVals) const {
19188 SelectionDAG &DAG = CLI.DAG;
19189 SDLoc &DL = CLI.DL;
19191 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19193 SDValue Chain = CLI.Chain;
19194 SDValue Callee = CLI.Callee;
19195 bool &IsTailCall = CLI.IsTailCall;
19196 CallingConv::ID CallConv = CLI.CallConv;
19197 bool IsVarArg = CLI.IsVarArg;
19198 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19199 MVT XLenVT = Subtarget.getXLenVT();
19200
19202
19203 // Analyze the operands of the call, assigning locations to each operand.
19205 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19206
19207 if (CallConv == CallingConv::GHC) {
19208 if (Subtarget.hasStdExtE())
19209 report_fatal_error("GHC calling convention is not supported on RVE!");
19211 } else
19212 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19214 : RISCV::CC_RISCV);
19215
19216 // Check if it's really possible to do a tail call.
19217 if (IsTailCall)
19218 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19219
19220 if (IsTailCall)
19221 ++NumTailCalls;
19222 else if (CLI.CB && CLI.CB->isMustTailCall())
19223 report_fatal_error("failed to perform tail call elimination on a call "
19224 "site marked musttail");
19225
19226 // Get a count of how many bytes are to be pushed on the stack.
19227 unsigned NumBytes = ArgCCInfo.getStackSize();
19228
19229 // Create local copies for byval args
19230 SmallVector<SDValue, 8> ByValArgs;
19231 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19232 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19233 if (!Flags.isByVal())
19234 continue;
19235
19236 SDValue Arg = OutVals[i];
19237 unsigned Size = Flags.getByValSize();
19238 Align Alignment = Flags.getNonZeroByValAlign();
19239
19240 int FI =
19241 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19242 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19243 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19244
19245 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19246 /*IsVolatile=*/false,
19247 /*AlwaysInline=*/false, IsTailCall,
19249 ByValArgs.push_back(FIPtr);
19250 }
19251
19252 if (!IsTailCall)
19253 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19254
19255 // Copy argument values to their designated locations.
19257 SmallVector<SDValue, 8> MemOpChains;
19258 SDValue StackPtr;
19259 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19260 ++i, ++OutIdx) {
19261 CCValAssign &VA = ArgLocs[i];
19262 SDValue ArgValue = OutVals[OutIdx];
19263 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19264
19265 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19266 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19267 assert(VA.isRegLoc() && "Expected register VA assignment");
19268 assert(VA.needsCustom());
19269 SDValue SplitF64 = DAG.getNode(
19270 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19271 SDValue Lo = SplitF64.getValue(0);
19272 SDValue Hi = SplitF64.getValue(1);
19273
19274 Register RegLo = VA.getLocReg();
19275 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19276
19277 // Get the CCValAssign for the Hi part.
19278 CCValAssign &HiVA = ArgLocs[++i];
19279
19280 if (HiVA.isMemLoc()) {
19281 // Second half of f64 is passed on the stack.
19282 if (!StackPtr.getNode())
19283 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19285 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19286 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19287 // Emit the store.
19288 MemOpChains.push_back(
19289 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19290 } else {
19291 // Second half of f64 is passed in another GPR.
19292 Register RegHigh = HiVA.getLocReg();
19293 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19294 }
19295 continue;
19296 }
19297
19298 // Promote the value if needed.
19299 // For now, only handle fully promoted and indirect arguments.
19300 if (VA.getLocInfo() == CCValAssign::Indirect) {
19301 // Store the argument in a stack slot and pass its address.
19302 Align StackAlign =
19303 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19304 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19305 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19306 // If the original argument was split (e.g. i128), we need
19307 // to store the required parts of it here (and pass just one address).
19308 // Vectors may be partly split to registers and partly to the stack, in
19309 // which case the base address is partly offset and subsequent stores are
19310 // relative to that.
19311 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19312 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19313 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19314 // Calculate the total size to store. We don't have access to what we're
19315 // actually storing other than performing the loop and collecting the
19316 // info.
19318 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19319 SDValue PartValue = OutVals[OutIdx + 1];
19320 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19321 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19322 EVT PartVT = PartValue.getValueType();
19323 if (PartVT.isScalableVector())
19324 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19325 StoredSize += PartVT.getStoreSize();
19326 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19327 Parts.push_back(std::make_pair(PartValue, Offset));
19328 ++i;
19329 ++OutIdx;
19330 }
19331 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19332 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19333 MemOpChains.push_back(
19334 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19336 for (const auto &Part : Parts) {
19337 SDValue PartValue = Part.first;
19338 SDValue PartOffset = Part.second;
19340 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19341 MemOpChains.push_back(
19342 DAG.getStore(Chain, DL, PartValue, Address,
19344 }
19345 ArgValue = SpillSlot;
19346 } else {
19347 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19348 }
19349
19350 // Use local copy if it is a byval arg.
19351 if (Flags.isByVal())
19352 ArgValue = ByValArgs[j++];
19353
19354 if (VA.isRegLoc()) {
19355 // Queue up the argument copies and emit them at the end.
19356 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19357 } else {
19358 assert(VA.isMemLoc() && "Argument not register or memory");
19359 assert(!IsTailCall && "Tail call not allowed if stack is used "
19360 "for passing parameters");
19361
19362 // Work out the address of the stack slot.
19363 if (!StackPtr.getNode())
19364 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19366 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19368
19369 // Emit the store.
19370 MemOpChains.push_back(
19371 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19372 }
19373 }
19374
19375 // Join the stores, which are independent of one another.
19376 if (!MemOpChains.empty())
19377 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19378
19379 SDValue Glue;
19380
19381 // Build a sequence of copy-to-reg nodes, chained and glued together.
19382 for (auto &Reg : RegsToPass) {
19383 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19384 Glue = Chain.getValue(1);
19385 }
19386
19387 // Validate that none of the argument registers have been marked as
19388 // reserved, if so report an error. Do the same for the return address if this
19389 // is not a tailcall.
19390 validateCCReservedRegs(RegsToPass, MF);
19391 if (!IsTailCall &&
19394 MF.getFunction(),
19395 "Return address register required, but has been reserved."});
19396
19397 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19398 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19399 // split it and then direct call can be matched by PseudoCALL.
19400 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19401 const GlobalValue *GV = S->getGlobal();
19402 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19403 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19404 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19405 }
19406
19407 // The first call operand is the chain and the second is the target address.
19409 Ops.push_back(Chain);
19410 Ops.push_back(Callee);
19411
19412 // Add argument registers to the end of the list so that they are
19413 // known live into the call.
19414 for (auto &Reg : RegsToPass)
19415 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19416
19417 if (!IsTailCall) {
19418 // Add a register mask operand representing the call-preserved registers.
19419 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19420 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19421 assert(Mask && "Missing call preserved mask for calling convention");
19422 Ops.push_back(DAG.getRegisterMask(Mask));
19423 }
19424
19425 // Glue the call to the argument copies, if any.
19426 if (Glue.getNode())
19427 Ops.push_back(Glue);
19428
19429 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19430 "Unexpected CFI type for a direct call");
19431
19432 // Emit the call.
19433 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19434
19435 if (IsTailCall) {
19437 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19438 if (CLI.CFIType)
19439 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19440 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19441 return Ret;
19442 }
19443
19444 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19445 if (CLI.CFIType)
19446 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19447 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19448 Glue = Chain.getValue(1);
19449
19450 // Mark the end of the call, which is glued to the call itself.
19451 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19452 Glue = Chain.getValue(1);
19453
19454 // Assign locations to each value returned by this call.
19456 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19457 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19458
19459 // Copy all of the result registers out of their specified physreg.
19460 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19461 auto &VA = RVLocs[i];
19462 // Copy the value out
19463 SDValue RetValue =
19464 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19465 // Glue the RetValue to the end of the call sequence
19466 Chain = RetValue.getValue(1);
19467 Glue = RetValue.getValue(2);
19468
19469 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19470 assert(VA.needsCustom());
19471 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19472 MVT::i32, Glue);
19473 Chain = RetValue2.getValue(1);
19474 Glue = RetValue2.getValue(2);
19475 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19476 RetValue2);
19477 }
19478
19479 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19480
19481 InVals.push_back(RetValue);
19482 }
19483
19484 return Chain;
19485}
19486
19488 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19489 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19491 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19492
19493 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19494
19495 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19496 MVT VT = Outs[i].VT;
19497 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19498 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19499 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19500 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19501 nullptr, *this, Dispatcher))
19502 return false;
19503 }
19504 return true;
19505}
19506
19507SDValue
19509 bool IsVarArg,
19511 const SmallVectorImpl<SDValue> &OutVals,
19512 const SDLoc &DL, SelectionDAG &DAG) const {
19514 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19515
19516 // Stores the assignment of the return value to a location.
19518
19519 // Info about the registers and stack slot.
19520 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19521 *DAG.getContext());
19522
19523 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19524 nullptr, RISCV::CC_RISCV);
19525
19526 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19527 report_fatal_error("GHC functions return void only");
19528
19529 SDValue Glue;
19530 SmallVector<SDValue, 4> RetOps(1, Chain);
19531
19532 // Copy the result values into the output registers.
19533 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19534 SDValue Val = OutVals[OutIdx];
19535 CCValAssign &VA = RVLocs[i];
19536 assert(VA.isRegLoc() && "Can only return in registers!");
19537
19538 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19539 // Handle returning f64 on RV32D with a soft float ABI.
19540 assert(VA.isRegLoc() && "Expected return via registers");
19541 assert(VA.needsCustom());
19542 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19543 DAG.getVTList(MVT::i32, MVT::i32), Val);
19544 SDValue Lo = SplitF64.getValue(0);
19545 SDValue Hi = SplitF64.getValue(1);
19546 Register RegLo = VA.getLocReg();
19547 Register RegHi = RVLocs[++i].getLocReg();
19548
19549 if (STI.isRegisterReservedByUser(RegLo) ||
19550 STI.isRegisterReservedByUser(RegHi))
19552 MF.getFunction(),
19553 "Return value register required, but has been reserved."});
19554
19555 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19556 Glue = Chain.getValue(1);
19557 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19558 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19559 Glue = Chain.getValue(1);
19560 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19561 } else {
19562 // Handle a 'normal' return.
19563 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19564 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19565
19566 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19568 MF.getFunction(),
19569 "Return value register required, but has been reserved."});
19570
19571 // Guarantee that all emitted copies are stuck together.
19572 Glue = Chain.getValue(1);
19573 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19574 }
19575 }
19576
19577 RetOps[0] = Chain; // Update chain.
19578
19579 // Add the glue node if we have it.
19580 if (Glue.getNode()) {
19581 RetOps.push_back(Glue);
19582 }
19583
19584 if (any_of(RVLocs,
19585 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19586 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19587
19588 unsigned RetOpc = RISCVISD::RET_GLUE;
19589 // Interrupt service routines use different return instructions.
19590 const Function &Func = DAG.getMachineFunction().getFunction();
19591 if (Func.hasFnAttribute("interrupt")) {
19592 if (!Func.getReturnType()->isVoidTy())
19594 "Functions with the interrupt attribute must have void return type!");
19595
19597 StringRef Kind =
19598 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19599
19600 if (Kind == "supervisor")
19601 RetOpc = RISCVISD::SRET_GLUE;
19602 else
19603 RetOpc = RISCVISD::MRET_GLUE;
19604 }
19605
19606 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19607}
19608
19609void RISCVTargetLowering::validateCCReservedRegs(
19610 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19611 MachineFunction &MF) const {
19612 const Function &F = MF.getFunction();
19613 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19614
19615 if (llvm::any_of(Regs, [&STI](auto Reg) {
19616 return STI.isRegisterReservedByUser(Reg.first);
19617 }))
19618 F.getContext().diagnose(DiagnosticInfoUnsupported{
19619 F, "Argument register required, but has been reserved."});
19620}
19621
19622// Check if the result of the node is only used as a return value, as
19623// otherwise we can't perform a tail-call.
19625 if (N->getNumValues() != 1)
19626 return false;
19627 if (!N->hasNUsesOfValue(1, 0))
19628 return false;
19629
19630 SDNode *Copy = *N->use_begin();
19631
19632 if (Copy->getOpcode() == ISD::BITCAST) {
19633 return isUsedByReturnOnly(Copy, Chain);
19634 }
19635
19636 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19637 // with soft float ABIs.
19638 if (Copy->getOpcode() != ISD::CopyToReg) {
19639 return false;
19640 }
19641
19642 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19643 // isn't safe to perform a tail call.
19644 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19645 return false;
19646
19647 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19648 bool HasRet = false;
19649 for (SDNode *Node : Copy->uses()) {
19650 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19651 return false;
19652 HasRet = true;
19653 }
19654 if (!HasRet)
19655 return false;
19656
19657 Chain = Copy->getOperand(0);
19658 return true;
19659}
19660
19662 return CI->isTailCall();
19663}
19664
19665const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19666#define NODE_NAME_CASE(NODE) \
19667 case RISCVISD::NODE: \
19668 return "RISCVISD::" #NODE;
19669 // clang-format off
19670 switch ((RISCVISD::NodeType)Opcode) {
19672 break;
19673 NODE_NAME_CASE(RET_GLUE)
19674 NODE_NAME_CASE(SRET_GLUE)
19675 NODE_NAME_CASE(MRET_GLUE)
19676 NODE_NAME_CASE(CALL)
19677 NODE_NAME_CASE(SELECT_CC)
19678 NODE_NAME_CASE(BR_CC)
19679 NODE_NAME_CASE(BuildPairF64)
19680 NODE_NAME_CASE(SplitF64)
19681 NODE_NAME_CASE(TAIL)
19682 NODE_NAME_CASE(ADD_LO)
19683 NODE_NAME_CASE(HI)
19684 NODE_NAME_CASE(LLA)
19685 NODE_NAME_CASE(ADD_TPREL)
19686 NODE_NAME_CASE(MULHSU)
19687 NODE_NAME_CASE(SHL_ADD)
19688 NODE_NAME_CASE(SLLW)
19689 NODE_NAME_CASE(SRAW)
19690 NODE_NAME_CASE(SRLW)
19691 NODE_NAME_CASE(DIVW)
19692 NODE_NAME_CASE(DIVUW)
19693 NODE_NAME_CASE(REMUW)
19694 NODE_NAME_CASE(ROLW)
19695 NODE_NAME_CASE(RORW)
19696 NODE_NAME_CASE(CLZW)
19697 NODE_NAME_CASE(CTZW)
19698 NODE_NAME_CASE(ABSW)
19699 NODE_NAME_CASE(FMV_H_X)
19700 NODE_NAME_CASE(FMV_X_ANYEXTH)
19701 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19702 NODE_NAME_CASE(FMV_W_X_RV64)
19703 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19704 NODE_NAME_CASE(FCVT_X)
19705 NODE_NAME_CASE(FCVT_XU)
19706 NODE_NAME_CASE(FCVT_W_RV64)
19707 NODE_NAME_CASE(FCVT_WU_RV64)
19708 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19709 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19710 NODE_NAME_CASE(FP_ROUND_BF16)
19711 NODE_NAME_CASE(FP_EXTEND_BF16)
19712 NODE_NAME_CASE(FROUND)
19713 NODE_NAME_CASE(FCLASS)
19714 NODE_NAME_CASE(FMAX)
19715 NODE_NAME_CASE(FMIN)
19716 NODE_NAME_CASE(READ_COUNTER_WIDE)
19717 NODE_NAME_CASE(BREV8)
19718 NODE_NAME_CASE(ORC_B)
19719 NODE_NAME_CASE(ZIP)
19720 NODE_NAME_CASE(UNZIP)
19721 NODE_NAME_CASE(CLMUL)
19722 NODE_NAME_CASE(CLMULH)
19723 NODE_NAME_CASE(CLMULR)
19724 NODE_NAME_CASE(MOPR)
19725 NODE_NAME_CASE(MOPRR)
19726 NODE_NAME_CASE(SHA256SIG0)
19727 NODE_NAME_CASE(SHA256SIG1)
19728 NODE_NAME_CASE(SHA256SUM0)
19729 NODE_NAME_CASE(SHA256SUM1)
19730 NODE_NAME_CASE(SM4KS)
19731 NODE_NAME_CASE(SM4ED)
19732 NODE_NAME_CASE(SM3P0)
19733 NODE_NAME_CASE(SM3P1)
19734 NODE_NAME_CASE(TH_LWD)
19735 NODE_NAME_CASE(TH_LWUD)
19736 NODE_NAME_CASE(TH_LDD)
19737 NODE_NAME_CASE(TH_SWD)
19738 NODE_NAME_CASE(TH_SDD)
19739 NODE_NAME_CASE(VMV_V_V_VL)
19740 NODE_NAME_CASE(VMV_V_X_VL)
19741 NODE_NAME_CASE(VFMV_V_F_VL)
19742 NODE_NAME_CASE(VMV_X_S)
19743 NODE_NAME_CASE(VMV_S_X_VL)
19744 NODE_NAME_CASE(VFMV_S_F_VL)
19745 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19746 NODE_NAME_CASE(READ_VLENB)
19747 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19748 NODE_NAME_CASE(VSLIDEUP_VL)
19749 NODE_NAME_CASE(VSLIDE1UP_VL)
19750 NODE_NAME_CASE(VSLIDEDOWN_VL)
19751 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19752 NODE_NAME_CASE(VFSLIDE1UP_VL)
19753 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19754 NODE_NAME_CASE(VID_VL)
19755 NODE_NAME_CASE(VFNCVT_ROD_VL)
19756 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19757 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19758 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19759 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19760 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19761 NODE_NAME_CASE(VECREDUCE_AND_VL)
19762 NODE_NAME_CASE(VECREDUCE_OR_VL)
19763 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19764 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19765 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19766 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19767 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19768 NODE_NAME_CASE(ADD_VL)
19769 NODE_NAME_CASE(AND_VL)
19770 NODE_NAME_CASE(MUL_VL)
19771 NODE_NAME_CASE(OR_VL)
19772 NODE_NAME_CASE(SDIV_VL)
19773 NODE_NAME_CASE(SHL_VL)
19774 NODE_NAME_CASE(SREM_VL)
19775 NODE_NAME_CASE(SRA_VL)
19776 NODE_NAME_CASE(SRL_VL)
19777 NODE_NAME_CASE(ROTL_VL)
19778 NODE_NAME_CASE(ROTR_VL)
19779 NODE_NAME_CASE(SUB_VL)
19780 NODE_NAME_CASE(UDIV_VL)
19781 NODE_NAME_CASE(UREM_VL)
19782 NODE_NAME_CASE(XOR_VL)
19783 NODE_NAME_CASE(AVGFLOORU_VL)
19784 NODE_NAME_CASE(AVGCEILU_VL)
19785 NODE_NAME_CASE(SADDSAT_VL)
19786 NODE_NAME_CASE(UADDSAT_VL)
19787 NODE_NAME_CASE(SSUBSAT_VL)
19788 NODE_NAME_CASE(USUBSAT_VL)
19789 NODE_NAME_CASE(FADD_VL)
19790 NODE_NAME_CASE(FSUB_VL)
19791 NODE_NAME_CASE(FMUL_VL)
19792 NODE_NAME_CASE(FDIV_VL)
19793 NODE_NAME_CASE(FNEG_VL)
19794 NODE_NAME_CASE(FABS_VL)
19795 NODE_NAME_CASE(FSQRT_VL)
19796 NODE_NAME_CASE(FCLASS_VL)
19797 NODE_NAME_CASE(VFMADD_VL)
19798 NODE_NAME_CASE(VFNMADD_VL)
19799 NODE_NAME_CASE(VFMSUB_VL)
19800 NODE_NAME_CASE(VFNMSUB_VL)
19801 NODE_NAME_CASE(VFWMADD_VL)
19802 NODE_NAME_CASE(VFWNMADD_VL)
19803 NODE_NAME_CASE(VFWMSUB_VL)
19804 NODE_NAME_CASE(VFWNMSUB_VL)
19805 NODE_NAME_CASE(FCOPYSIGN_VL)
19806 NODE_NAME_CASE(SMIN_VL)
19807 NODE_NAME_CASE(SMAX_VL)
19808 NODE_NAME_CASE(UMIN_VL)
19809 NODE_NAME_CASE(UMAX_VL)
19810 NODE_NAME_CASE(BITREVERSE_VL)
19811 NODE_NAME_CASE(BSWAP_VL)
19812 NODE_NAME_CASE(CTLZ_VL)
19813 NODE_NAME_CASE(CTTZ_VL)
19814 NODE_NAME_CASE(CTPOP_VL)
19815 NODE_NAME_CASE(VFMIN_VL)
19816 NODE_NAME_CASE(VFMAX_VL)
19817 NODE_NAME_CASE(MULHS_VL)
19818 NODE_NAME_CASE(MULHU_VL)
19819 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19820 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19821 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19822 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19823 NODE_NAME_CASE(VFCVT_X_F_VL)
19824 NODE_NAME_CASE(VFCVT_XU_F_VL)
19825 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19826 NODE_NAME_CASE(SINT_TO_FP_VL)
19827 NODE_NAME_CASE(UINT_TO_FP_VL)
19828 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19829 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19830 NODE_NAME_CASE(FP_EXTEND_VL)
19831 NODE_NAME_CASE(FP_ROUND_VL)
19832 NODE_NAME_CASE(STRICT_FADD_VL)
19833 NODE_NAME_CASE(STRICT_FSUB_VL)
19834 NODE_NAME_CASE(STRICT_FMUL_VL)
19835 NODE_NAME_CASE(STRICT_FDIV_VL)
19836 NODE_NAME_CASE(STRICT_FSQRT_VL)
19837 NODE_NAME_CASE(STRICT_VFMADD_VL)
19838 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19839 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19840 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19841 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19842 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19843 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19844 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19845 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19846 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19847 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19848 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19849 NODE_NAME_CASE(STRICT_FSETCC_VL)
19850 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19851 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19852 NODE_NAME_CASE(VWMUL_VL)
19853 NODE_NAME_CASE(VWMULU_VL)
19854 NODE_NAME_CASE(VWMULSU_VL)
19855 NODE_NAME_CASE(VWADD_VL)
19856 NODE_NAME_CASE(VWADDU_VL)
19857 NODE_NAME_CASE(VWSUB_VL)
19858 NODE_NAME_CASE(VWSUBU_VL)
19859 NODE_NAME_CASE(VWADD_W_VL)
19860 NODE_NAME_CASE(VWADDU_W_VL)
19861 NODE_NAME_CASE(VWSUB_W_VL)
19862 NODE_NAME_CASE(VWSUBU_W_VL)
19863 NODE_NAME_CASE(VWSLL_VL)
19864 NODE_NAME_CASE(VFWMUL_VL)
19865 NODE_NAME_CASE(VFWADD_VL)
19866 NODE_NAME_CASE(VFWSUB_VL)
19867 NODE_NAME_CASE(VFWADD_W_VL)
19868 NODE_NAME_CASE(VFWSUB_W_VL)
19869 NODE_NAME_CASE(VWMACC_VL)
19870 NODE_NAME_CASE(VWMACCU_VL)
19871 NODE_NAME_CASE(VWMACCSU_VL)
19872 NODE_NAME_CASE(VNSRL_VL)
19873 NODE_NAME_CASE(SETCC_VL)
19874 NODE_NAME_CASE(VMERGE_VL)
19875 NODE_NAME_CASE(VMAND_VL)
19876 NODE_NAME_CASE(VMOR_VL)
19877 NODE_NAME_CASE(VMXOR_VL)
19878 NODE_NAME_CASE(VMCLR_VL)
19879 NODE_NAME_CASE(VMSET_VL)
19880 NODE_NAME_CASE(VRGATHER_VX_VL)
19881 NODE_NAME_CASE(VRGATHER_VV_VL)
19882 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19883 NODE_NAME_CASE(VSEXT_VL)
19884 NODE_NAME_CASE(VZEXT_VL)
19885 NODE_NAME_CASE(VCPOP_VL)
19886 NODE_NAME_CASE(VFIRST_VL)
19887 NODE_NAME_CASE(READ_CSR)
19888 NODE_NAME_CASE(WRITE_CSR)
19889 NODE_NAME_CASE(SWAP_CSR)
19890 NODE_NAME_CASE(CZERO_EQZ)
19891 NODE_NAME_CASE(CZERO_NEZ)
19892 NODE_NAME_CASE(SF_VC_XV_SE)
19893 NODE_NAME_CASE(SF_VC_IV_SE)
19894 NODE_NAME_CASE(SF_VC_VV_SE)
19895 NODE_NAME_CASE(SF_VC_FV_SE)
19896 NODE_NAME_CASE(SF_VC_XVV_SE)
19897 NODE_NAME_CASE(SF_VC_IVV_SE)
19898 NODE_NAME_CASE(SF_VC_VVV_SE)
19899 NODE_NAME_CASE(SF_VC_FVV_SE)
19900 NODE_NAME_CASE(SF_VC_XVW_SE)
19901 NODE_NAME_CASE(SF_VC_IVW_SE)
19902 NODE_NAME_CASE(SF_VC_VVW_SE)
19903 NODE_NAME_CASE(SF_VC_FVW_SE)
19904 NODE_NAME_CASE(SF_VC_V_X_SE)
19905 NODE_NAME_CASE(SF_VC_V_I_SE)
19906 NODE_NAME_CASE(SF_VC_V_XV_SE)
19907 NODE_NAME_CASE(SF_VC_V_IV_SE)
19908 NODE_NAME_CASE(SF_VC_V_VV_SE)
19909 NODE_NAME_CASE(SF_VC_V_FV_SE)
19910 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19911 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19912 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19913 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19914 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19915 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19916 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19917 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19918 }
19919 // clang-format on
19920 return nullptr;
19921#undef NODE_NAME_CASE
19922}
19923
19924/// getConstraintType - Given a constraint letter, return the type of
19925/// constraint it is for this target.
19928 if (Constraint.size() == 1) {
19929 switch (Constraint[0]) {
19930 default:
19931 break;
19932 case 'f':
19933 return C_RegisterClass;
19934 case 'I':
19935 case 'J':
19936 case 'K':
19937 return C_Immediate;
19938 case 'A':
19939 return C_Memory;
19940 case 's':
19941 case 'S': // A symbolic address
19942 return C_Other;
19943 }
19944 } else {
19945 if (Constraint == "vr" || Constraint == "vm")
19946 return C_RegisterClass;
19947 }
19948 return TargetLowering::getConstraintType(Constraint);
19949}
19950
19951std::pair<unsigned, const TargetRegisterClass *>
19953 StringRef Constraint,
19954 MVT VT) const {
19955 // First, see if this is a constraint that directly corresponds to a RISC-V
19956 // register class.
19957 if (Constraint.size() == 1) {
19958 switch (Constraint[0]) {
19959 case 'r':
19960 // TODO: Support fixed vectors up to XLen for P extension?
19961 if (VT.isVector())
19962 break;
19963 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19964 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19965 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19966 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19967 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19968 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19969 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19970 case 'f':
19971 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19972 return std::make_pair(0U, &RISCV::FPR16RegClass);
19973 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19974 return std::make_pair(0U, &RISCV::FPR32RegClass);
19975 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19976 return std::make_pair(0U, &RISCV::FPR64RegClass);
19977 break;
19978 default:
19979 break;
19980 }
19981 } else if (Constraint == "vr") {
19982 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19983 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19984 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19985 return std::make_pair(0U, RC);
19986 }
19987 } else if (Constraint == "vm") {
19988 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19989 return std::make_pair(0U, &RISCV::VMV0RegClass);
19990 }
19991
19992 // Clang will correctly decode the usage of register name aliases into their
19993 // official names. However, other frontends like `rustc` do not. This allows
19994 // users of these frontends to use the ABI names for registers in LLVM-style
19995 // register constraints.
19996 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19997 .Case("{zero}", RISCV::X0)
19998 .Case("{ra}", RISCV::X1)
19999 .Case("{sp}", RISCV::X2)
20000 .Case("{gp}", RISCV::X3)
20001 .Case("{tp}", RISCV::X4)
20002 .Case("{t0}", RISCV::X5)
20003 .Case("{t1}", RISCV::X6)
20004 .Case("{t2}", RISCV::X7)
20005 .Cases("{s0}", "{fp}", RISCV::X8)
20006 .Case("{s1}", RISCV::X9)
20007 .Case("{a0}", RISCV::X10)
20008 .Case("{a1}", RISCV::X11)
20009 .Case("{a2}", RISCV::X12)
20010 .Case("{a3}", RISCV::X13)
20011 .Case("{a4}", RISCV::X14)
20012 .Case("{a5}", RISCV::X15)
20013 .Case("{a6}", RISCV::X16)
20014 .Case("{a7}", RISCV::X17)
20015 .Case("{s2}", RISCV::X18)
20016 .Case("{s3}", RISCV::X19)
20017 .Case("{s4}", RISCV::X20)
20018 .Case("{s5}", RISCV::X21)
20019 .Case("{s6}", RISCV::X22)
20020 .Case("{s7}", RISCV::X23)
20021 .Case("{s8}", RISCV::X24)
20022 .Case("{s9}", RISCV::X25)
20023 .Case("{s10}", RISCV::X26)
20024 .Case("{s11}", RISCV::X27)
20025 .Case("{t3}", RISCV::X28)
20026 .Case("{t4}", RISCV::X29)
20027 .Case("{t5}", RISCV::X30)
20028 .Case("{t6}", RISCV::X31)
20029 .Default(RISCV::NoRegister);
20030 if (XRegFromAlias != RISCV::NoRegister)
20031 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20032
20033 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20034 // TableGen record rather than the AsmName to choose registers for InlineAsm
20035 // constraints, plus we want to match those names to the widest floating point
20036 // register type available, manually select floating point registers here.
20037 //
20038 // The second case is the ABI name of the register, so that frontends can also
20039 // use the ABI names in register constraint lists.
20040 if (Subtarget.hasStdExtF()) {
20041 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20042 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20043 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20044 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20045 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20046 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20047 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20048 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20049 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20050 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20051 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20052 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20053 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20054 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20055 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20056 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20057 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20058 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20059 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20060 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20061 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20062 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20063 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20064 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20065 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20066 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20067 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20068 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20069 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20070 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20071 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20072 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20073 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20074 .Default(RISCV::NoRegister);
20075 if (FReg != RISCV::NoRegister) {
20076 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20077 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20078 unsigned RegNo = FReg - RISCV::F0_F;
20079 unsigned DReg = RISCV::F0_D + RegNo;
20080 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20081 }
20082 if (VT == MVT::f32 || VT == MVT::Other)
20083 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20084 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20085 unsigned RegNo = FReg - RISCV::F0_F;
20086 unsigned HReg = RISCV::F0_H + RegNo;
20087 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20088 }
20089 }
20090 }
20091
20092 if (Subtarget.hasVInstructions()) {
20093 Register VReg = StringSwitch<Register>(Constraint.lower())
20094 .Case("{v0}", RISCV::V0)
20095 .Case("{v1}", RISCV::V1)
20096 .Case("{v2}", RISCV::V2)
20097 .Case("{v3}", RISCV::V3)
20098 .Case("{v4}", RISCV::V4)
20099 .Case("{v5}", RISCV::V5)
20100 .Case("{v6}", RISCV::V6)
20101 .Case("{v7}", RISCV::V7)
20102 .Case("{v8}", RISCV::V8)
20103 .Case("{v9}", RISCV::V9)
20104 .Case("{v10}", RISCV::V10)
20105 .Case("{v11}", RISCV::V11)
20106 .Case("{v12}", RISCV::V12)
20107 .Case("{v13}", RISCV::V13)
20108 .Case("{v14}", RISCV::V14)
20109 .Case("{v15}", RISCV::V15)
20110 .Case("{v16}", RISCV::V16)
20111 .Case("{v17}", RISCV::V17)
20112 .Case("{v18}", RISCV::V18)
20113 .Case("{v19}", RISCV::V19)
20114 .Case("{v20}", RISCV::V20)
20115 .Case("{v21}", RISCV::V21)
20116 .Case("{v22}", RISCV::V22)
20117 .Case("{v23}", RISCV::V23)
20118 .Case("{v24}", RISCV::V24)
20119 .Case("{v25}", RISCV::V25)
20120 .Case("{v26}", RISCV::V26)
20121 .Case("{v27}", RISCV::V27)
20122 .Case("{v28}", RISCV::V28)
20123 .Case("{v29}", RISCV::V29)
20124 .Case("{v30}", RISCV::V30)
20125 .Case("{v31}", RISCV::V31)
20126 .Default(RISCV::NoRegister);
20127 if (VReg != RISCV::NoRegister) {
20128 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20129 return std::make_pair(VReg, &RISCV::VMRegClass);
20130 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20131 return std::make_pair(VReg, &RISCV::VRRegClass);
20132 for (const auto *RC :
20133 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20134 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20135 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20136 return std::make_pair(VReg, RC);
20137 }
20138 }
20139 }
20140 }
20141
20142 std::pair<Register, const TargetRegisterClass *> Res =
20144
20145 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20146 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20147 // Subtarget into account.
20148 if (Res.second == &RISCV::GPRF16RegClass ||
20149 Res.second == &RISCV::GPRF32RegClass ||
20150 Res.second == &RISCV::GPRPairRegClass)
20151 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20152
20153 return Res;
20154}
20155
20158 // Currently only support length 1 constraints.
20159 if (ConstraintCode.size() == 1) {
20160 switch (ConstraintCode[0]) {
20161 case 'A':
20163 default:
20164 break;
20165 }
20166 }
20167
20168 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20169}
20170
20172 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20173 SelectionDAG &DAG) const {
20174 // Currently only support length 1 constraints.
20175 if (Constraint.size() == 1) {
20176 switch (Constraint[0]) {
20177 case 'I':
20178 // Validate & create a 12-bit signed immediate operand.
20179 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20180 uint64_t CVal = C->getSExtValue();
20181 if (isInt<12>(CVal))
20182 Ops.push_back(
20183 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20184 }
20185 return;
20186 case 'J':
20187 // Validate & create an integer zero operand.
20188 if (isNullConstant(Op))
20189 Ops.push_back(
20190 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20191 return;
20192 case 'K':
20193 // Validate & create a 5-bit unsigned immediate operand.
20194 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20195 uint64_t CVal = C->getZExtValue();
20196 if (isUInt<5>(CVal))
20197 Ops.push_back(
20198 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20199 }
20200 return;
20201 case 'S':
20203 return;
20204 default:
20205 break;
20206 }
20207 }
20208 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20209}
20210
20212 Instruction *Inst,
20213 AtomicOrdering Ord) const {
20214 if (Subtarget.hasStdExtZtso()) {
20215 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20216 return Builder.CreateFence(Ord);
20217 return nullptr;
20218 }
20219
20220 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20221 return Builder.CreateFence(Ord);
20222 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20223 return Builder.CreateFence(AtomicOrdering::Release);
20224 return nullptr;
20225}
20226
20228 Instruction *Inst,
20229 AtomicOrdering Ord) const {
20230 if (Subtarget.hasStdExtZtso()) {
20231 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20232 return Builder.CreateFence(Ord);
20233 return nullptr;
20234 }
20235
20236 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20237 return Builder.CreateFence(AtomicOrdering::Acquire);
20238 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20241 return nullptr;
20242}
20243
20246 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20247 // point operations can't be used in an lr/sc sequence without breaking the
20248 // forward-progress guarantee.
20249 if (AI->isFloatingPointOperation() ||
20253
20254 // Don't expand forced atomics, we want to have __sync libcalls instead.
20255 if (Subtarget.hasForcedAtomics())
20257
20258 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20259 if (AI->getOperation() == AtomicRMWInst::Nand) {
20260 if (Subtarget.hasStdExtZacas() &&
20261 (Size >= 32 || Subtarget.hasStdExtZabha()))
20263 if (Size < 32)
20265 }
20266
20267 if (Size < 32 && !Subtarget.hasStdExtZabha())
20269
20271}
20272
20273static Intrinsic::ID
20275 if (XLen == 32) {
20276 switch (BinOp) {
20277 default:
20278 llvm_unreachable("Unexpected AtomicRMW BinOp");
20280 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20281 case AtomicRMWInst::Add:
20282 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20283 case AtomicRMWInst::Sub:
20284 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20286 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20287 case AtomicRMWInst::Max:
20288 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20289 case AtomicRMWInst::Min:
20290 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20292 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20294 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20295 }
20296 }
20297
20298 if (XLen == 64) {
20299 switch (BinOp) {
20300 default:
20301 llvm_unreachable("Unexpected AtomicRMW BinOp");
20303 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20304 case AtomicRMWInst::Add:
20305 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20306 case AtomicRMWInst::Sub:
20307 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20309 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20310 case AtomicRMWInst::Max:
20311 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20312 case AtomicRMWInst::Min:
20313 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20315 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20317 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20318 }
20319 }
20320
20321 llvm_unreachable("Unexpected XLen\n");
20322}
20323
20325 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20326 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20327 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20328 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20329 // mask, as this produces better code than the LR/SC loop emitted by
20330 // int_riscv_masked_atomicrmw_xchg.
20331 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20332 isa<ConstantInt>(AI->getValOperand())) {
20333 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20334 if (CVal->isZero())
20335 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20336 Builder.CreateNot(Mask, "Inv_Mask"),
20337 AI->getAlign(), Ord);
20338 if (CVal->isMinusOne())
20339 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20340 AI->getAlign(), Ord);
20341 }
20342
20343 unsigned XLen = Subtarget.getXLen();
20344 Value *Ordering =
20345 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20346 Type *Tys[] = {AlignedAddr->getType()};
20347 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20348 AI->getModule(),
20350
20351 if (XLen == 64) {
20352 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20353 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20354 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20355 }
20356
20357 Value *Result;
20358
20359 // Must pass the shift amount needed to sign extend the loaded value prior
20360 // to performing a signed comparison for min/max. ShiftAmt is the number of
20361 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20362 // is the number of bits to left+right shift the value in order to
20363 // sign-extend.
20364 if (AI->getOperation() == AtomicRMWInst::Min ||
20366 const DataLayout &DL = AI->getModule()->getDataLayout();
20367 unsigned ValWidth =
20368 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20369 Value *SextShamt =
20370 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20371 Result = Builder.CreateCall(LrwOpScwLoop,
20372 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20373 } else {
20374 Result =
20375 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20376 }
20377
20378 if (XLen == 64)
20379 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20380 return Result;
20381}
20382
20385 AtomicCmpXchgInst *CI) const {
20386 // Don't expand forced atomics, we want to have __sync libcalls instead.
20387 if (Subtarget.hasForcedAtomics())
20389
20391 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20392 (Size == 8 || Size == 16))
20395}
20396
20398 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20399 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20400 unsigned XLen = Subtarget.getXLen();
20401 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20402 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20403 if (XLen == 64) {
20404 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20405 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20406 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20407 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20408 }
20409 Type *Tys[] = {AlignedAddr->getType()};
20410 Function *MaskedCmpXchg =
20411 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20412 Value *Result = Builder.CreateCall(
20413 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20414 if (XLen == 64)
20415 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20416 return Result;
20417}
20418
20420 EVT DataVT) const {
20421 // We have indexed loads for all supported EEW types. Indices are always
20422 // zero extended.
20423 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20424 isTypeLegal(Extend.getValueType()) &&
20425 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20426 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20427}
20428
20430 EVT VT) const {
20431 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20432 return false;
20433
20434 switch (FPVT.getSimpleVT().SimpleTy) {
20435 case MVT::f16:
20436 return Subtarget.hasStdExtZfhmin();
20437 case MVT::f32:
20438 return Subtarget.hasStdExtF();
20439 case MVT::f64:
20440 return Subtarget.hasStdExtD();
20441 default:
20442 return false;
20443 }
20444}
20445
20447 // If we are using the small code model, we can reduce size of jump table
20448 // entry to 4 bytes.
20449 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20452 }
20454}
20455
20457 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20458 unsigned uid, MCContext &Ctx) const {
20459 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20461 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20462}
20463
20465 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20466 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20467 // a power of two as well.
20468 // FIXME: This doesn't work for zve32, but that's already broken
20469 // elsewhere for the same reason.
20470 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20471 static_assert(RISCV::RVVBitsPerBlock == 64,
20472 "RVVBitsPerBlock changed, audit needed");
20473 return true;
20474}
20475
20477 SDValue &Offset,
20479 SelectionDAG &DAG) const {
20480 // Target does not support indexed loads.
20481 if (!Subtarget.hasVendorXTHeadMemIdx())
20482 return false;
20483
20484 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20485 return false;
20486
20487 Base = Op->getOperand(0);
20488 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20489 int64_t RHSC = RHS->getSExtValue();
20490 if (Op->getOpcode() == ISD::SUB)
20491 RHSC = -(uint64_t)RHSC;
20492
20493 // The constants that can be encoded in the THeadMemIdx instructions
20494 // are of the form (sign_extend(imm5) << imm2).
20495 bool isLegalIndexedOffset = false;
20496 for (unsigned i = 0; i < 4; i++)
20497 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20498 isLegalIndexedOffset = true;
20499 break;
20500 }
20501
20502 if (!isLegalIndexedOffset)
20503 return false;
20504
20505 Offset = Op->getOperand(1);
20506 return true;
20507 }
20508
20509 return false;
20510}
20511
20513 SDValue &Offset,
20515 SelectionDAG &DAG) const {
20516 EVT VT;
20517 SDValue Ptr;
20518 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20519 VT = LD->getMemoryVT();
20520 Ptr = LD->getBasePtr();
20521 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20522 VT = ST->getMemoryVT();
20523 Ptr = ST->getBasePtr();
20524 } else
20525 return false;
20526
20527 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20528 return false;
20529
20530 AM = ISD::PRE_INC;
20531 return true;
20532}
20533
20535 SDValue &Base,
20536 SDValue &Offset,
20538 SelectionDAG &DAG) const {
20539 EVT VT;
20540 SDValue Ptr;
20541 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20542 VT = LD->getMemoryVT();
20543 Ptr = LD->getBasePtr();
20544 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20545 VT = ST->getMemoryVT();
20546 Ptr = ST->getBasePtr();
20547 } else
20548 return false;
20549
20550 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20551 return false;
20552 // Post-indexing updates the base, so it's not a valid transform
20553 // if that's not the same as the load's pointer.
20554 if (Ptr != Base)
20555 return false;
20556
20557 AM = ISD::POST_INC;
20558 return true;
20559}
20560
20562 EVT VT) const {
20563 EVT SVT = VT.getScalarType();
20564
20565 if (!SVT.isSimple())
20566 return false;
20567
20568 switch (SVT.getSimpleVT().SimpleTy) {
20569 case MVT::f16:
20570 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20571 : Subtarget.hasStdExtZfhOrZhinx();
20572 case MVT::f32:
20573 return Subtarget.hasStdExtFOrZfinx();
20574 case MVT::f64:
20575 return Subtarget.hasStdExtDOrZdinx();
20576 default:
20577 break;
20578 }
20579
20580 return false;
20581}
20582
20584 // Zacas will use amocas.w which does not require extension.
20585 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20586}
20587
20589 const Constant *PersonalityFn) const {
20590 return RISCV::X10;
20591}
20592
20594 const Constant *PersonalityFn) const {
20595 return RISCV::X11;
20596}
20597
20599 // Return false to suppress the unnecessary extensions if the LibCall
20600 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20601 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20602 Type.getSizeInBits() < Subtarget.getXLen()))
20603 return false;
20604
20605 return true;
20606}
20607
20609 if (Subtarget.is64Bit() && Type == MVT::i32)
20610 return true;
20611
20612 return IsSigned;
20613}
20614
20616 SDValue C) const {
20617 // Check integral scalar types.
20618 const bool HasExtMOrZmmul =
20619 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20620 if (!VT.isScalarInteger())
20621 return false;
20622
20623 // Omit the optimization if the sub target has the M extension and the data
20624 // size exceeds XLen.
20625 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20626 return false;
20627
20628 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20629 // Break the MUL to a SLLI and an ADD/SUB.
20630 const APInt &Imm = ConstNode->getAPIntValue();
20631 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20632 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20633 return true;
20634
20635 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20636 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20637 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20638 (Imm - 8).isPowerOf2()))
20639 return true;
20640
20641 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20642 // a pair of LUI/ADDI.
20643 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20644 ConstNode->hasOneUse()) {
20645 APInt ImmS = Imm.ashr(Imm.countr_zero());
20646 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20647 (1 - ImmS).isPowerOf2())
20648 return true;
20649 }
20650 }
20651
20652 return false;
20653}
20654
20656 SDValue ConstNode) const {
20657 // Let the DAGCombiner decide for vectors.
20658 EVT VT = AddNode.getValueType();
20659 if (VT.isVector())
20660 return true;
20661
20662 // Let the DAGCombiner decide for larger types.
20663 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20664 return true;
20665
20666 // It is worse if c1 is simm12 while c1*c2 is not.
20667 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20668 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20669 const APInt &C1 = C1Node->getAPIntValue();
20670 const APInt &C2 = C2Node->getAPIntValue();
20671 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20672 return false;
20673
20674 // Default to true and let the DAGCombiner decide.
20675 return true;
20676}
20677
20679 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20680 unsigned *Fast) const {
20681 if (!VT.isVector()) {
20682 if (Fast)
20683 *Fast = Subtarget.enableUnalignedScalarMem();
20684 return Subtarget.enableUnalignedScalarMem();
20685 }
20686
20687 // All vector implementations must support element alignment
20688 EVT ElemVT = VT.getVectorElementType();
20689 if (Alignment >= ElemVT.getStoreSize()) {
20690 if (Fast)
20691 *Fast = 1;
20692 return true;
20693 }
20694
20695 // Note: We lower an unmasked unaligned vector access to an equally sized
20696 // e8 element type access. Given this, we effectively support all unmasked
20697 // misaligned accesses. TODO: Work through the codegen implications of
20698 // allowing such accesses to be formed, and considered fast.
20699 if (Fast)
20700 *Fast = Subtarget.enableUnalignedVectorMem();
20701 return Subtarget.enableUnalignedVectorMem();
20702}
20703
20704
20706 const AttributeList &FuncAttributes) const {
20707 if (!Subtarget.hasVInstructions())
20708 return MVT::Other;
20709
20710 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20711 return MVT::Other;
20712
20713 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20714 // has an expansion threshold, and we want the number of hardware memory
20715 // operations to correspond roughly to that threshold. LMUL>1 operations
20716 // are typically expanded linearly internally, and thus correspond to more
20717 // than one actual memory operation. Note that store merging and load
20718 // combining will typically form larger LMUL operations from the LMUL1
20719 // operations emitted here, and that's okay because combining isn't
20720 // introducing new memory operations; it's just merging existing ones.
20721 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20722 if (Op.size() < MinVLenInBytes)
20723 // TODO: Figure out short memops. For the moment, do the default thing
20724 // which ends up using scalar sequences.
20725 return MVT::Other;
20726
20727 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20728 // a large scalar constant and instead use vmv.v.x/i to do the
20729 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20730 // maximize the chance we can encode the size in the vsetvli.
20731 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20732 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20733
20734 // Do we have sufficient alignment for our preferred VT? If not, revert
20735 // to largest size allowed by our alignment criteria.
20736 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20737 Align RequiredAlign(PreferredVT.getStoreSize());
20738 if (Op.isFixedDstAlign())
20739 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20740 if (Op.isMemcpy())
20741 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20742 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20743 }
20744 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20745}
20746
20748 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20749 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20750 bool IsABIRegCopy = CC.has_value();
20751 EVT ValueVT = Val.getValueType();
20752 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20753 PartVT == MVT::f32) {
20754 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20755 // nan, and cast to f32.
20756 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20757 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20758 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20759 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20760 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20761 Parts[0] = Val;
20762 return true;
20763 }
20764
20765 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20766 LLVMContext &Context = *DAG.getContext();
20767 EVT ValueEltVT = ValueVT.getVectorElementType();
20768 EVT PartEltVT = PartVT.getVectorElementType();
20769 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20770 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20771 if (PartVTBitSize % ValueVTBitSize == 0) {
20772 assert(PartVTBitSize >= ValueVTBitSize);
20773 // If the element types are different, bitcast to the same element type of
20774 // PartVT first.
20775 // Give an example here, we want copy a <vscale x 1 x i8> value to
20776 // <vscale x 4 x i16>.
20777 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20778 // subvector, then we can bitcast to <vscale x 4 x i16>.
20779 if (ValueEltVT != PartEltVT) {
20780 if (PartVTBitSize > ValueVTBitSize) {
20781 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20782 assert(Count != 0 && "The number of element should not be zero.");
20783 EVT SameEltTypeVT =
20784 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20785 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20786 DAG.getUNDEF(SameEltTypeVT), Val,
20787 DAG.getVectorIdxConstant(0, DL));
20788 }
20789 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20790 } else {
20791 Val =
20792 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20793 Val, DAG.getVectorIdxConstant(0, DL));
20794 }
20795 Parts[0] = Val;
20796 return true;
20797 }
20798 }
20799 return false;
20800}
20801
20803 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20804 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20805 bool IsABIRegCopy = CC.has_value();
20806 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20807 PartVT == MVT::f32) {
20808 SDValue Val = Parts[0];
20809
20810 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20811 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20812 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20813 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20814 return Val;
20815 }
20816
20817 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20818 LLVMContext &Context = *DAG.getContext();
20819 SDValue Val = Parts[0];
20820 EVT ValueEltVT = ValueVT.getVectorElementType();
20821 EVT PartEltVT = PartVT.getVectorElementType();
20822 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20823 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20824 if (PartVTBitSize % ValueVTBitSize == 0) {
20825 assert(PartVTBitSize >= ValueVTBitSize);
20826 EVT SameEltTypeVT = ValueVT;
20827 // If the element types are different, convert it to the same element type
20828 // of PartVT.
20829 // Give an example here, we want copy a <vscale x 1 x i8> value from
20830 // <vscale x 4 x i16>.
20831 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20832 // then we can extract <vscale x 1 x i8>.
20833 if (ValueEltVT != PartEltVT) {
20834 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20835 assert(Count != 0 && "The number of element should not be zero.");
20836 SameEltTypeVT =
20837 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20838 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
20839 }
20840 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
20841 DAG.getVectorIdxConstant(0, DL));
20842 return Val;
20843 }
20844 }
20845 return SDValue();
20846}
20847
20849 // When aggressively optimizing for code size, we prefer to use a div
20850 // instruction, as it is usually smaller than the alternative sequence.
20851 // TODO: Add vector division?
20852 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20853 return OptSize && !VT.isVector();
20854}
20855
20857 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20858 // some situation.
20859 unsigned Opc = N->getOpcode();
20860 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20861 return false;
20862 return true;
20863}
20864
20865static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20866 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20867 Function *ThreadPointerFunc =
20868 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20869 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
20870 IRB.CreateCall(ThreadPointerFunc), Offset);
20871}
20872
20874 // Fuchsia provides a fixed TLS slot for the stack cookie.
20875 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20876 if (Subtarget.isTargetFuchsia())
20877 return useTpOffset(IRB, -0x10);
20878
20880}
20881
20883 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20884 const DataLayout &DL) const {
20885 EVT VT = getValueType(DL, VTy);
20886 // Don't lower vlseg/vsseg for vector types that can't be split.
20887 if (!isTypeLegal(VT))
20888 return false;
20889
20891 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
20892 Alignment))
20893 return false;
20894
20895 MVT ContainerVT = VT.getSimpleVT();
20896
20897 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20898 if (!Subtarget.useRVVForFixedLengthVectors())
20899 return false;
20900 // Sometimes the interleaved access pass picks up splats as interleaves of
20901 // one element. Don't lower these.
20902 if (FVTy->getNumElements() < 2)
20903 return false;
20904
20906 }
20907
20908 // Need to make sure that EMUL * NFIELDS ≤ 8
20909 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20910 if (Fractional)
20911 return true;
20912 return Factor * LMUL <= 8;
20913}
20914
20916 Align Alignment) const {
20917 if (!Subtarget.hasVInstructions())
20918 return false;
20919
20920 // Only support fixed vectors if we know the minimum vector size.
20921 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20922 return false;
20923
20924 EVT ScalarType = DataType.getScalarType();
20925 if (!isLegalElementTypeForRVV(ScalarType))
20926 return false;
20927
20928 if (!Subtarget.enableUnalignedVectorMem() &&
20929 Alignment < ScalarType.getStoreSize())
20930 return false;
20931
20932 return true;
20933}
20934
20936 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20937 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20938 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20939 Intrinsic::riscv_seg8_load};
20940
20941/// Lower an interleaved load into a vlsegN intrinsic.
20942///
20943/// E.g. Lower an interleaved load (Factor = 2):
20944/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20945/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20946/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20947///
20948/// Into:
20949/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20950/// %ptr, i64 4)
20951/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20952/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20955 ArrayRef<unsigned> Indices, unsigned Factor) const {
20956 IRBuilder<> Builder(LI);
20957
20958 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
20959 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
20961 LI->getModule()->getDataLayout()))
20962 return false;
20963
20964 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20965
20966 Function *VlsegNFunc =
20968 {VTy, LI->getPointerOperandType(), XLenTy});
20969
20970 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20971
20972 CallInst *VlsegN =
20973 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
20974
20975 for (unsigned i = 0; i < Shuffles.size(); i++) {
20976 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
20977 Shuffles[i]->replaceAllUsesWith(SubVec);
20978 }
20979
20980 return true;
20981}
20982
20984 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20985 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20986 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20987 Intrinsic::riscv_seg8_store};
20988
20989/// Lower an interleaved store into a vssegN intrinsic.
20990///
20991/// E.g. Lower an interleaved store (Factor = 3):
20992/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20993/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20994/// store <12 x i32> %i.vec, <12 x i32>* %ptr
20995///
20996/// Into:
20997/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20998/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20999/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21000/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21001/// %ptr, i32 4)
21002///
21003/// Note that the new shufflevectors will be removed and we'll only generate one
21004/// vsseg3 instruction in CodeGen.
21006 ShuffleVectorInst *SVI,
21007 unsigned Factor) const {
21008 IRBuilder<> Builder(SI);
21009 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21010 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21011 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21012 ShuffleVTy->getNumElements() / Factor);
21013 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21014 SI->getPointerAddressSpace(),
21015 SI->getModule()->getDataLayout()))
21016 return false;
21017
21018 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21019
21020 Function *VssegNFunc =
21021 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21022 {VTy, SI->getPointerOperandType(), XLenTy});
21023
21024 auto Mask = SVI->getShuffleMask();
21026
21027 for (unsigned i = 0; i < Factor; i++) {
21028 Value *Shuffle = Builder.CreateShuffleVector(
21029 SVI->getOperand(0), SVI->getOperand(1),
21030 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21031 Ops.push_back(Shuffle);
21032 }
21033 // This VL should be OK (should be executable in one vsseg instruction,
21034 // potentially under larger LMULs) because we checked that the fixed vector
21035 // type fits in isLegalInterleavedAccessType
21036 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21037 Ops.append({SI->getPointerOperand(), VL});
21038
21039 Builder.CreateCall(VssegNFunc, Ops);
21040
21041 return true;
21042}
21043
21045 LoadInst *LI) const {
21046 assert(LI->isSimple());
21047 IRBuilder<> Builder(LI);
21048
21049 // Only deinterleave2 supported at present.
21050 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
21051 return false;
21052
21053 unsigned Factor = 2;
21054
21055 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21056 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21057
21058 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21060 LI->getModule()->getDataLayout()))
21061 return false;
21062
21063 Function *VlsegNFunc;
21064 Value *VL;
21065 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21067
21068 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21069 VlsegNFunc = Intrinsic::getDeclaration(
21070 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21071 {ResVTy, LI->getPointerOperandType(), XLenTy});
21072 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21073 } else {
21074 static const Intrinsic::ID IntrIds[] = {
21075 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21076 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21077 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21078 Intrinsic::riscv_vlseg8};
21079
21080 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21081 {ResVTy, XLenTy});
21082 VL = Constant::getAllOnesValue(XLenTy);
21083 Ops.append(Factor, PoisonValue::get(ResVTy));
21084 }
21085
21086 Ops.append({LI->getPointerOperand(), VL});
21087
21088 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21089 DI->replaceAllUsesWith(Vlseg);
21090
21091 return true;
21092}
21093
21095 StoreInst *SI) const {
21096 assert(SI->isSimple());
21097 IRBuilder<> Builder(SI);
21098
21099 // Only interleave2 supported at present.
21100 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
21101 return false;
21102
21103 unsigned Factor = 2;
21104
21105 VectorType *VTy = cast<VectorType>(II->getType());
21106 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21107
21108 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21109 SI->getPointerAddressSpace(),
21110 SI->getModule()->getDataLayout()))
21111 return false;
21112
21113 Function *VssegNFunc;
21114 Value *VL;
21115 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21116
21117 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21118 VssegNFunc = Intrinsic::getDeclaration(
21119 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21120 {InVTy, SI->getPointerOperandType(), XLenTy});
21121 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21122 } else {
21123 static const Intrinsic::ID IntrIds[] = {
21124 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21125 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21126 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21127 Intrinsic::riscv_vsseg8};
21128
21129 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21130 {InVTy, XLenTy});
21131 VL = Constant::getAllOnesValue(XLenTy);
21132 }
21133
21134 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21135 SI->getPointerOperand(), VL});
21136
21137 return true;
21138}
21139
21143 const TargetInstrInfo *TII) const {
21144 assert(MBBI->isCall() && MBBI->getCFIType() &&
21145 "Invalid call instruction for a KCFI check");
21146 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21147 MBBI->getOpcode()));
21148
21149 MachineOperand &Target = MBBI->getOperand(0);
21150 Target.setIsRenamable(false);
21151
21152 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21153 .addReg(Target.getReg())
21154 .addImm(MBBI->getCFIType())
21155 .getInstr();
21156}
21157
21158#define GET_REGISTER_MATCHER
21159#include "RISCVGenAsmMatcher.inc"
21160
21163 const MachineFunction &MF) const {
21165 if (Reg == RISCV::NoRegister)
21167 if (Reg == RISCV::NoRegister)
21169 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21170 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21171 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21172 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21173 StringRef(RegName) + "\"."));
21174 return Reg;
21175}
21176
21179 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21180
21181 if (NontemporalInfo == nullptr)
21183
21184 // 1 for default value work as __RISCV_NTLH_ALL
21185 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21186 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21187 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21188 // 5 -> __RISCV_NTLH_ALL
21189 int NontemporalLevel = 5;
21190 const MDNode *RISCVNontemporalInfo =
21191 I.getMetadata("riscv-nontemporal-domain");
21192 if (RISCVNontemporalInfo != nullptr)
21193 NontemporalLevel =
21194 cast<ConstantInt>(
21195 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21196 ->getValue())
21197 ->getZExtValue();
21198
21199 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21200 "RISC-V target doesn't support this non-temporal domain.");
21201
21202 NontemporalLevel -= 2;
21204 if (NontemporalLevel & 0b1)
21205 Flags |= MONontemporalBit0;
21206 if (NontemporalLevel & 0b10)
21207 Flags |= MONontemporalBit1;
21208
21209 return Flags;
21210}
21211
21214
21215 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21217 TargetFlags |= (NodeFlags & MONontemporalBit0);
21218 TargetFlags |= (NodeFlags & MONontemporalBit1);
21219 return TargetFlags;
21220}
21221
21223 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21224 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21225}
21226
21228 if (VT.isScalableVector())
21229 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21230 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21231 return true;
21232 return Subtarget.hasStdExtZbb() &&
21233 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21234}
21235
21237 ISD::CondCode Cond) const {
21238 return isCtpopFast(VT) ? 0 : 1;
21239}
21240
21242
21243 // GISel support is in progress or complete for these opcodes.
21244 unsigned Op = Inst.getOpcode();
21245 if (Op == Instruction::Add || Op == Instruction::Sub ||
21246 Op == Instruction::And || Op == Instruction::Or ||
21247 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21248 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21249 return false;
21250
21251 if (Inst.getType()->isScalableTy())
21252 return true;
21253
21254 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21255 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21256 !isa<ReturnInst>(&Inst))
21257 return true;
21258
21259 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21260 if (AI->getAllocatedType()->isScalableTy())
21261 return true;
21262 }
21263
21264 return false;
21265}
21266
21267SDValue
21268RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21269 SelectionDAG &DAG,
21270 SmallVectorImpl<SDNode *> &Created) const {
21272 if (isIntDivCheap(N->getValueType(0), Attr))
21273 return SDValue(N, 0); // Lower SDIV as SDIV
21274
21275 // Only perform this transform if short forward branch opt is supported.
21276 if (!Subtarget.hasShortForwardBranchOpt())
21277 return SDValue();
21278 EVT VT = N->getValueType(0);
21279 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21280 return SDValue();
21281
21282 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21283 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21284 return SDValue();
21285 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21286}
21287
21288bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21289 EVT VT, const APInt &AndMask) const {
21290 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21291 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21293}
21294
21295unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21296 return Subtarget.getMinimumJumpTableEntries();
21297}
21298
21299// Handle single arg such as return value.
21300template <typename Arg>
21301void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21302 // This lambda determines whether an array of types are constructed by
21303 // homogeneous vector types.
21304 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21305 // First, extract the first element in the argument type.
21306 auto It = ArgList.begin();
21307 MVT FirstArgRegType = It->VT;
21308
21309 // Return if there is no return or the type needs split.
21310 if (It == ArgList.end() || It->Flags.isSplit())
21311 return false;
21312
21313 ++It;
21314
21315 // Return if this argument type contains only 1 element, or it's not a
21316 // vector type.
21317 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21318 return false;
21319
21320 // Second, check if the following elements in this argument type are all the
21321 // same.
21322 for (; It != ArgList.end(); ++It)
21323 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21324 return false;
21325
21326 return true;
21327 };
21328
21329 if (isHomogeneousScalableVectorType(ArgList)) {
21330 // Handle as tuple type
21331 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21332 } else {
21333 // Handle as normal vector type
21334 bool FirstVMaskAssigned = false;
21335 for (const auto &OutArg : ArgList) {
21336 MVT RegisterVT = OutArg.VT;
21337
21338 // Skip non-RVV register type
21339 if (!RegisterVT.isVector())
21340 continue;
21341
21342 if (RegisterVT.isFixedLengthVector())
21343 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21344
21345 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21346 RVVArgInfos.push_back({1, RegisterVT, true});
21347 FirstVMaskAssigned = true;
21348 continue;
21349 }
21350
21351 RVVArgInfos.push_back({1, RegisterVT, false});
21352 }
21353 }
21354}
21355
21356// Handle multiple args.
21357template <>
21358void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21359 const DataLayout &DL = MF->getDataLayout();
21360 const Function &F = MF->getFunction();
21361 LLVMContext &Context = F.getContext();
21362
21363 bool FirstVMaskAssigned = false;
21364 for (Type *Ty : TypeList) {
21365 StructType *STy = dyn_cast<StructType>(Ty);
21366 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21367 Type *ElemTy = STy->getTypeAtIndex(0U);
21368 EVT VT = TLI->getValueType(DL, ElemTy);
21369 MVT RegisterVT =
21370 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21371 unsigned NumRegs =
21372 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21373
21374 RVVArgInfos.push_back(
21375 {NumRegs * STy->getNumElements(), RegisterVT, false});
21376 } else {
21377 SmallVector<EVT, 4> ValueVTs;
21378 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21379
21380 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21381 ++Value) {
21382 EVT VT = ValueVTs[Value];
21383 MVT RegisterVT =
21384 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21385 unsigned NumRegs =
21386 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21387
21388 // Skip non-RVV register type
21389 if (!RegisterVT.isVector())
21390 continue;
21391
21392 if (RegisterVT.isFixedLengthVector())
21393 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21394
21395 if (!FirstVMaskAssigned &&
21396 RegisterVT.getVectorElementType() == MVT::i1) {
21397 RVVArgInfos.push_back({1, RegisterVT, true});
21398 FirstVMaskAssigned = true;
21399 --NumRegs;
21400 }
21401
21402 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21403 }
21404 }
21405 }
21406}
21407
21408void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21409 unsigned StartReg) {
21410 assert((StartReg % LMul) == 0 &&
21411 "Start register number should be multiple of lmul");
21412 const MCPhysReg *VRArrays;
21413 switch (LMul) {
21414 default:
21415 report_fatal_error("Invalid lmul");
21416 case 1:
21417 VRArrays = ArgVRs;
21418 break;
21419 case 2:
21420 VRArrays = ArgVRM2s;
21421 break;
21422 case 4:
21423 VRArrays = ArgVRM4s;
21424 break;
21425 case 8:
21426 VRArrays = ArgVRM8s;
21427 break;
21428 }
21429
21430 for (unsigned i = 0; i < NF; ++i)
21431 if (StartReg)
21432 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21433 else
21434 AllocatedPhysRegs.push_back(MCPhysReg());
21435}
21436
21437/// This function determines if each RVV argument is passed by register, if the
21438/// argument can be assigned to a VR, then give it a specific register.
21439/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21440void RVVArgDispatcher::compute() {
21441 uint32_t AssignedMap = 0;
21442 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21443 // Allocate first vector mask argument to V0.
21444 if (ArgInfo.FirstVMask) {
21445 AllocatedPhysRegs.push_back(RISCV::V0);
21446 return;
21447 }
21448
21449 unsigned RegsNeeded = divideCeil(
21450 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21451 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21452 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21453 StartReg += RegsNeeded) {
21454 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21455 if ((AssignedMap & Map) == 0) {
21456 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21457 AssignedMap |= Map;
21458 return;
21459 }
21460 }
21461
21462 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21463 };
21464
21465 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21466 allocate(RVVArgInfos[i]);
21467}
21468
21470 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21471 return AllocatedPhysRegs[CurIdx++];
21472}
21473
21475
21476#define GET_RISCVVIntrinsicsTable_IMPL
21477#include "RISCVGenSearchableTables.inc"
21478
21479} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:299
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
iterator_range< arg_iterator > args()
Definition: Function.h:838
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
Argument * getArg(unsigned i) const
Definition: Function.h:832
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:386
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:386
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:731
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:827
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:658
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:861
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:772
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:798
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:844
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:484
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:877
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:243
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1132
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1128
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1345
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1376
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1161
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1277
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1278
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1361
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1365
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1234
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1239
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1375
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1273
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1274
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1406
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:885
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1194
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1358
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1227
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1362
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:994
@ STRICT_LROUND
Definition: ISDOpcodes.h:431
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1083
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1276
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1062
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:586
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:646
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1377
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1157
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1370
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1271
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1217
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1335
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1254
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1279
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1047
@ STRICT_LRINT
Definition: ISDOpcodes.h:433
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:591
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1378
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:429
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:922
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1269
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:990
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1270
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1188
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1214
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:636
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:434
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:612
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1268
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ STRICT_LLROUND
Definition: ISDOpcodes.h:432
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1366
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1152
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1076
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:580
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1491
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1491
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1478
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1412
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1509
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1574
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:554
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)