LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (Subtarget.hasVInstructions()) {
667
669 if (RV64LegalI32 && Subtarget.is64Bit())
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT};
702
703 static const unsigned FloatingPointVPOps[] = {
704 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
705 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
706 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
707 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
708 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
709 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
710 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
711 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
712 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
713 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716 ISD::EXPERIMENTAL_VP_SPLICE};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 if (!Subtarget.is64Bit()) {
728 // We must custom-lower certain vXi64 operations on RV32 due to the vector
729 // element type being illegal.
731 MVT::i64, Custom);
732
733 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
734
735 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
736 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
737 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
738 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
739 MVT::i64, Custom);
740 }
741
742 for (MVT VT : BoolVecVTs) {
743 if (!isTypeLegal(VT))
744 continue;
745
747
748 // Mask VTs are custom-expanded into a series of standard nodes
752 VT, Custom);
753
755 Custom);
756
759 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
760 Expand);
761
762 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
763
766 Custom);
767
769 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
770 Custom);
771
772 // RVV has native int->float & float->int conversions where the
773 // element type sizes are within one power-of-two of each other. Any
774 // wider distances between type sizes have to be lowered as sequences
775 // which progressively narrow the gap in stages.
780 VT, Custom);
782 Custom);
783
784 // Expand all extending loads to types larger than this, and truncating
785 // stores from types larger than this.
787 setTruncStoreAction(VT, OtherVT, Expand);
789 OtherVT, Expand);
790 }
791
792 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
793 ISD::VP_TRUNCATE, ISD::VP_SETCC},
794 VT, Custom);
795
798
800
801 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
802 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
803
806 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
807 }
808
809 for (MVT VT : IntVecVTs) {
810 if (!isTypeLegal(VT))
811 continue;
812
815
816 // Vectors implement MULHS/MULHU.
818
819 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
820 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
822
824 Legal);
825
827
828 // Custom-lower extensions and truncations from/to mask types.
830 VT, Custom);
831
832 // RVV has native int->float & float->int conversions where the
833 // element type sizes are within one power-of-two of each other. Any
834 // wider distances between type sizes have to be lowered as sequences
835 // which progressively narrow the gap in stages.
840 VT, Custom);
842 Custom);
845 VT, Legal);
846
847 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
848 // nodes which truncate by one power of two at a time.
850
851 // Custom-lower insert/extract operations to simplify patterns.
853 Custom);
854
855 // Custom-lower reduction operations to set up the corresponding custom
856 // nodes' operands.
857 setOperationAction(IntegerVecReduceOps, VT, Custom);
858
859 setOperationAction(IntegerVPOps, VT, Custom);
860
862
864 VT, Custom);
865
867 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
868 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
869 VT, Custom);
870
873 VT, Custom);
874
877
879
881 setTruncStoreAction(VT, OtherVT, Expand);
883 OtherVT, Expand);
884 }
885
888
889 // Splice
891
892 if (Subtarget.hasStdExtZvkb()) {
894 setOperationAction(ISD::VP_BSWAP, VT, Custom);
895 } else {
896 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
898 }
899
900 if (Subtarget.hasStdExtZvbb()) {
902 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
903 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
904 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
905 VT, Custom);
906 } else {
907 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
909 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
910 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
911 VT, Expand);
912
913 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
914 // range of f32.
915 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
916 if (isTypeLegal(FloatVT)) {
918 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
919 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
920 VT, Custom);
921 }
922 }
923 }
924
925 // Expand various CCs to best match the RVV ISA, which natively supports UNE
926 // but no other unordered comparisons, and supports all ordered comparisons
927 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
928 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
929 // and we pattern-match those back to the "original", swapping operands once
930 // more. This way we catch both operations and both "vf" and "fv" forms with
931 // fewer patterns.
932 static const ISD::CondCode VFPCCToExpand[] = {
936 };
937
938 // TODO: support more ops.
939 static const unsigned ZvfhminPromoteOps[] = {
947
948 // TODO: support more vp ops.
949 static const unsigned ZvfhminPromoteVPOps[] = {
950 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
951 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
952 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
953 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
954 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
955 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
956 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
957 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
958 ISD::VP_FMAXIMUM};
959
960 // Sets common operation actions on RVV floating-point vector types.
961 const auto SetCommonVFPActions = [&](MVT VT) {
963 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
964 // sizes are within one power-of-two of each other. Therefore conversions
965 // between vXf16 and vXf64 must be lowered as sequences which convert via
966 // vXf32.
969 // Custom-lower insert/extract operations to simplify patterns.
971 Custom);
972 // Expand various condition codes (explained above).
973 setCondCodeAction(VFPCCToExpand, VT, Expand);
974
977
981 VT, Custom);
982
983 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
984
985 // Expand FP operations that need libcalls.
997
999
1001
1003 VT, Custom);
1004
1006 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1007 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1008 VT, Custom);
1009
1012
1015 VT, Custom);
1016
1019
1021
1022 setOperationAction(FloatingPointVPOps, VT, Custom);
1023
1025 Custom);
1028 VT, Legal);
1033 VT, Custom);
1034 };
1035
1036 // Sets common extload/truncstore actions on RVV floating-point vector
1037 // types.
1038 const auto SetCommonVFPExtLoadTruncStoreActions =
1039 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1040 for (auto SmallVT : SmallerVTs) {
1041 setTruncStoreAction(VT, SmallVT, Expand);
1042 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1043 }
1044 };
1045
1046 if (Subtarget.hasVInstructionsF16()) {
1047 for (MVT VT : F16VecVTs) {
1048 if (!isTypeLegal(VT))
1049 continue;
1050 SetCommonVFPActions(VT);
1051 }
1052 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1053 for (MVT VT : F16VecVTs) {
1054 if (!isTypeLegal(VT))
1055 continue;
1058 Custom);
1059 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1060 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1061 Custom);
1064 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1065 VT, Custom);
1068 VT, Custom);
1069 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1071 // load/store
1073
1074 // Custom split nxv32f16 since nxv32f32 if not legal.
1075 if (VT == MVT::nxv32f16) {
1076 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1077 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1078 continue;
1079 }
1080 // Add more promote ops.
1081 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1082 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1083 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1084 }
1085 }
1086
1087 if (Subtarget.hasVInstructionsF32()) {
1088 for (MVT VT : F32VecVTs) {
1089 if (!isTypeLegal(VT))
1090 continue;
1091 SetCommonVFPActions(VT);
1092 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1093 }
1094 }
1095
1096 if (Subtarget.hasVInstructionsF64()) {
1097 for (MVT VT : F64VecVTs) {
1098 if (!isTypeLegal(VT))
1099 continue;
1100 SetCommonVFPActions(VT);
1101 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1102 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1103 }
1104 }
1105
1106 if (Subtarget.useRVVForFixedLengthVectors()) {
1108 if (!useRVVForFixedLengthVectorVT(VT))
1109 continue;
1110
1111 // By default everything must be expanded.
1112 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1115 setTruncStoreAction(VT, OtherVT, Expand);
1117 OtherVT, Expand);
1118 }
1119
1120 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1121 // expansion to a build_vector of 0s.
1123
1124 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1126 Custom);
1127
1129 Custom);
1130
1132 VT, Custom);
1133
1135
1137
1139
1141
1143
1145
1148 Custom);
1149
1151 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1152 Custom);
1153
1155 {
1164 },
1165 VT, Custom);
1167 Custom);
1168
1170
1171 // Operations below are different for between masks and other vectors.
1172 if (VT.getVectorElementType() == MVT::i1) {
1173 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1174 ISD::OR, ISD::XOR},
1175 VT, Custom);
1176
1177 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1178 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1179 VT, Custom);
1180
1181 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1182 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1183 continue;
1184 }
1185
1186 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1187 // it before type legalization for i64 vectors on RV32. It will then be
1188 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1189 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1190 // improvements first.
1191 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1194 }
1195
1198
1199 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1200 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1201 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1202 ISD::VP_SCATTER},
1203 VT, Custom);
1204
1208 VT, Custom);
1209
1212
1214
1215 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1216 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1218
1221 VT, Custom);
1222
1225
1228
1229 // Custom-lower reduction operations to set up the corresponding custom
1230 // nodes' operands.
1234 VT, Custom);
1235
1236 setOperationAction(IntegerVPOps, VT, Custom);
1237
1238 if (Subtarget.hasStdExtZvkb())
1240
1241 if (Subtarget.hasStdExtZvbb()) {
1244 VT, Custom);
1245 } else {
1246 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1247 // range of f32.
1248 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1249 if (isTypeLegal(FloatVT))
1252 Custom);
1253 }
1254 }
1255
1257 // There are no extending loads or truncating stores.
1258 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1259 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1260 setTruncStoreAction(VT, InnerVT, Expand);
1261 }
1262
1263 if (!useRVVForFixedLengthVectorVT(VT))
1264 continue;
1265
1266 // By default everything must be expanded.
1267 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1269
1270 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1271 // expansion to a build_vector of 0s.
1273
1274 if (VT.getVectorElementType() == MVT::f16 &&
1275 !Subtarget.hasVInstructionsF16()) {
1278 Custom);
1279 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1281 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1282 Custom);
1284 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1285 VT, Custom);
1288 VT, Custom);
1291 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1292 // Don't promote f16 vector operations to f32 if f32 vector type is
1293 // not legal.
1294 // TODO: could split the f16 vector into two vectors and do promotion.
1295 if (!isTypeLegal(F32VecVT))
1296 continue;
1297 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1298 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1299 continue;
1300 }
1301
1302 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1304 Custom);
1305
1309 VT, Custom);
1310
1313 VT, Custom);
1314
1315 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1316 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1317 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1318 ISD::VP_SCATTER},
1319 VT, Custom);
1320
1325 VT, Custom);
1326
1328
1331 VT, Custom);
1332
1333 setCondCodeAction(VFPCCToExpand, VT, Expand);
1334
1338
1340
1341 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1342
1343 setOperationAction(FloatingPointVPOps, VT, Custom);
1344
1346 Custom);
1353 VT, Custom);
1354 }
1355
1356 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1357 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1358 Custom);
1359 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1361 if (Subtarget.hasStdExtFOrZfinx())
1363 if (Subtarget.hasStdExtDOrZdinx())
1365 }
1366 }
1367
1368 if (Subtarget.hasStdExtA()) {
1370 if (RV64LegalI32 && Subtarget.is64Bit())
1372 }
1373
1374 if (Subtarget.hasForcedAtomics()) {
1375 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1381 XLenVT, LibCall);
1382 }
1383
1384 if (Subtarget.hasVendorXTHeadMemIdx()) {
1385 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1386 setIndexedLoadAction(im, MVT::i8, Legal);
1387 setIndexedStoreAction(im, MVT::i8, Legal);
1388 setIndexedLoadAction(im, MVT::i16, Legal);
1389 setIndexedStoreAction(im, MVT::i16, Legal);
1390 setIndexedLoadAction(im, MVT::i32, Legal);
1391 setIndexedStoreAction(im, MVT::i32, Legal);
1392
1393 if (Subtarget.is64Bit()) {
1394 setIndexedLoadAction(im, MVT::i64, Legal);
1395 setIndexedStoreAction(im, MVT::i64, Legal);
1396 }
1397 }
1398 }
1399
1400 // Function alignments.
1401 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1402 setMinFunctionAlignment(FunctionAlignment);
1403 // Set preferred alignments.
1406
1410 if (Subtarget.is64Bit())
1412
1413 if (Subtarget.hasStdExtFOrZfinx())
1415
1416 if (Subtarget.hasStdExtZbb())
1418
1419 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1421
1422 if (Subtarget.hasStdExtZbkb())
1424 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1426 if (Subtarget.hasStdExtFOrZfinx())
1429 if (Subtarget.hasVInstructions())
1431 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1434 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1437 if (Subtarget.hasVendorXTHeadMemPair())
1439 if (Subtarget.useRVVForFixedLengthVectors())
1441
1442 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1443 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1444
1445 // Disable strict node mutation.
1446 IsStrictFPEnabled = true;
1447}
1448
1450 LLVMContext &Context,
1451 EVT VT) const {
1452 if (!VT.isVector())
1453 return getPointerTy(DL);
1454 if (Subtarget.hasVInstructions() &&
1455 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1456 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1458}
1459
1460MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1461 return Subtarget.getXLenVT();
1462}
1463
1464// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1465bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1466 unsigned VF,
1467 bool IsScalable) const {
1468 if (!Subtarget.hasVInstructions())
1469 return true;
1470
1471 if (!IsScalable)
1472 return true;
1473
1474 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1475 return true;
1476
1477 // Don't allow VF=1 if those types are't legal.
1478 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1479 return true;
1480
1481 // VLEN=32 support is incomplete.
1482 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1483 return true;
1484
1485 // The maximum VF is for the smallest element width with LMUL=8.
1486 // VF must be a power of 2.
1487 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1488 return VF > MaxVF || !isPowerOf2_32(VF);
1489}
1490
1491bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1492 return !Subtarget.hasVInstructions() ||
1493 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1494}
1495
1497 const CallInst &I,
1498 MachineFunction &MF,
1499 unsigned Intrinsic) const {
1500 auto &DL = I.getModule()->getDataLayout();
1501
1502 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1503 bool IsUnitStrided, bool UsePtrVal = false) {
1505 // We can't use ptrVal if the intrinsic can access memory before the
1506 // pointer. This means we can't use it for strided or indexed intrinsics.
1507 if (UsePtrVal)
1508 Info.ptrVal = I.getArgOperand(PtrOp);
1509 else
1510 Info.fallbackAddressSpace =
1511 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1512 Type *MemTy;
1513 if (IsStore) {
1514 // Store value is the first operand.
1515 MemTy = I.getArgOperand(0)->getType();
1516 } else {
1517 // Use return type. If it's segment load, return type is a struct.
1518 MemTy = I.getType();
1519 if (MemTy->isStructTy())
1520 MemTy = MemTy->getStructElementType(0);
1521 }
1522 if (!IsUnitStrided)
1523 MemTy = MemTy->getScalarType();
1524
1525 Info.memVT = getValueType(DL, MemTy);
1526 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1528 Info.flags |=
1530 return true;
1531 };
1532
1533 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1535
1537 switch (Intrinsic) {
1538 default:
1539 return false;
1540 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1545 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1546 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1547 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1548 case Intrinsic::riscv_masked_cmpxchg_i32:
1550 Info.memVT = MVT::i32;
1551 Info.ptrVal = I.getArgOperand(0);
1552 Info.offset = 0;
1553 Info.align = Align(4);
1556 return true;
1557 case Intrinsic::riscv_masked_strided_load:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1559 /*IsUnitStrided*/ false);
1560 case Intrinsic::riscv_masked_strided_store:
1561 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1562 /*IsUnitStrided*/ false);
1563 case Intrinsic::riscv_seg2_load:
1564 case Intrinsic::riscv_seg3_load:
1565 case Intrinsic::riscv_seg4_load:
1566 case Intrinsic::riscv_seg5_load:
1567 case Intrinsic::riscv_seg6_load:
1568 case Intrinsic::riscv_seg7_load:
1569 case Intrinsic::riscv_seg8_load:
1570 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1571 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1572 case Intrinsic::riscv_seg2_store:
1573 case Intrinsic::riscv_seg3_store:
1574 case Intrinsic::riscv_seg4_store:
1575 case Intrinsic::riscv_seg5_store:
1576 case Intrinsic::riscv_seg6_store:
1577 case Intrinsic::riscv_seg7_store:
1578 case Intrinsic::riscv_seg8_store:
1579 // Operands are (vec, ..., vec, ptr, vl)
1580 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1581 /*IsStore*/ true,
1582 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1583 case Intrinsic::riscv_vle:
1584 case Intrinsic::riscv_vle_mask:
1585 case Intrinsic::riscv_vleff:
1586 case Intrinsic::riscv_vleff_mask:
1587 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1588 /*IsStore*/ false,
1589 /*IsUnitStrided*/ true,
1590 /*UsePtrVal*/ true);
1591 case Intrinsic::riscv_vse:
1592 case Intrinsic::riscv_vse_mask:
1593 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1594 /*IsStore*/ true,
1595 /*IsUnitStrided*/ true,
1596 /*UsePtrVal*/ true);
1597 case Intrinsic::riscv_vlse:
1598 case Intrinsic::riscv_vlse_mask:
1599 case Intrinsic::riscv_vloxei:
1600 case Intrinsic::riscv_vloxei_mask:
1601 case Intrinsic::riscv_vluxei:
1602 case Intrinsic::riscv_vluxei_mask:
1603 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1604 /*IsStore*/ false,
1605 /*IsUnitStrided*/ false);
1606 case Intrinsic::riscv_vsse:
1607 case Intrinsic::riscv_vsse_mask:
1608 case Intrinsic::riscv_vsoxei:
1609 case Intrinsic::riscv_vsoxei_mask:
1610 case Intrinsic::riscv_vsuxei:
1611 case Intrinsic::riscv_vsuxei_mask:
1612 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1613 /*IsStore*/ true,
1614 /*IsUnitStrided*/ false);
1615 case Intrinsic::riscv_vlseg2:
1616 case Intrinsic::riscv_vlseg3:
1617 case Intrinsic::riscv_vlseg4:
1618 case Intrinsic::riscv_vlseg5:
1619 case Intrinsic::riscv_vlseg6:
1620 case Intrinsic::riscv_vlseg7:
1621 case Intrinsic::riscv_vlseg8:
1622 case Intrinsic::riscv_vlseg2ff:
1623 case Intrinsic::riscv_vlseg3ff:
1624 case Intrinsic::riscv_vlseg4ff:
1625 case Intrinsic::riscv_vlseg5ff:
1626 case Intrinsic::riscv_vlseg6ff:
1627 case Intrinsic::riscv_vlseg7ff:
1628 case Intrinsic::riscv_vlseg8ff:
1629 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1630 /*IsStore*/ false,
1631 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1632 case Intrinsic::riscv_vlseg2_mask:
1633 case Intrinsic::riscv_vlseg3_mask:
1634 case Intrinsic::riscv_vlseg4_mask:
1635 case Intrinsic::riscv_vlseg5_mask:
1636 case Intrinsic::riscv_vlseg6_mask:
1637 case Intrinsic::riscv_vlseg7_mask:
1638 case Intrinsic::riscv_vlseg8_mask:
1639 case Intrinsic::riscv_vlseg2ff_mask:
1640 case Intrinsic::riscv_vlseg3ff_mask:
1641 case Intrinsic::riscv_vlseg4ff_mask:
1642 case Intrinsic::riscv_vlseg5ff_mask:
1643 case Intrinsic::riscv_vlseg6ff_mask:
1644 case Intrinsic::riscv_vlseg7ff_mask:
1645 case Intrinsic::riscv_vlseg8ff_mask:
1646 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1647 /*IsStore*/ false,
1648 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1649 case Intrinsic::riscv_vlsseg2:
1650 case Intrinsic::riscv_vlsseg3:
1651 case Intrinsic::riscv_vlsseg4:
1652 case Intrinsic::riscv_vlsseg5:
1653 case Intrinsic::riscv_vlsseg6:
1654 case Intrinsic::riscv_vlsseg7:
1655 case Intrinsic::riscv_vlsseg8:
1656 case Intrinsic::riscv_vloxseg2:
1657 case Intrinsic::riscv_vloxseg3:
1658 case Intrinsic::riscv_vloxseg4:
1659 case Intrinsic::riscv_vloxseg5:
1660 case Intrinsic::riscv_vloxseg6:
1661 case Intrinsic::riscv_vloxseg7:
1662 case Intrinsic::riscv_vloxseg8:
1663 case Intrinsic::riscv_vluxseg2:
1664 case Intrinsic::riscv_vluxseg3:
1665 case Intrinsic::riscv_vluxseg4:
1666 case Intrinsic::riscv_vluxseg5:
1667 case Intrinsic::riscv_vluxseg6:
1668 case Intrinsic::riscv_vluxseg7:
1669 case Intrinsic::riscv_vluxseg8:
1670 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1671 /*IsStore*/ false,
1672 /*IsUnitStrided*/ false);
1673 case Intrinsic::riscv_vlsseg2_mask:
1674 case Intrinsic::riscv_vlsseg3_mask:
1675 case Intrinsic::riscv_vlsseg4_mask:
1676 case Intrinsic::riscv_vlsseg5_mask:
1677 case Intrinsic::riscv_vlsseg6_mask:
1678 case Intrinsic::riscv_vlsseg7_mask:
1679 case Intrinsic::riscv_vlsseg8_mask:
1680 case Intrinsic::riscv_vloxseg2_mask:
1681 case Intrinsic::riscv_vloxseg3_mask:
1682 case Intrinsic::riscv_vloxseg4_mask:
1683 case Intrinsic::riscv_vloxseg5_mask:
1684 case Intrinsic::riscv_vloxseg6_mask:
1685 case Intrinsic::riscv_vloxseg7_mask:
1686 case Intrinsic::riscv_vloxseg8_mask:
1687 case Intrinsic::riscv_vluxseg2_mask:
1688 case Intrinsic::riscv_vluxseg3_mask:
1689 case Intrinsic::riscv_vluxseg4_mask:
1690 case Intrinsic::riscv_vluxseg5_mask:
1691 case Intrinsic::riscv_vluxseg6_mask:
1692 case Intrinsic::riscv_vluxseg7_mask:
1693 case Intrinsic::riscv_vluxseg8_mask:
1694 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1695 /*IsStore*/ false,
1696 /*IsUnitStrided*/ false);
1697 case Intrinsic::riscv_vsseg2:
1698 case Intrinsic::riscv_vsseg3:
1699 case Intrinsic::riscv_vsseg4:
1700 case Intrinsic::riscv_vsseg5:
1701 case Intrinsic::riscv_vsseg6:
1702 case Intrinsic::riscv_vsseg7:
1703 case Intrinsic::riscv_vsseg8:
1704 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1705 /*IsStore*/ true,
1706 /*IsUnitStrided*/ false);
1707 case Intrinsic::riscv_vsseg2_mask:
1708 case Intrinsic::riscv_vsseg3_mask:
1709 case Intrinsic::riscv_vsseg4_mask:
1710 case Intrinsic::riscv_vsseg5_mask:
1711 case Intrinsic::riscv_vsseg6_mask:
1712 case Intrinsic::riscv_vsseg7_mask:
1713 case Intrinsic::riscv_vsseg8_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ false);
1717 case Intrinsic::riscv_vssseg2:
1718 case Intrinsic::riscv_vssseg3:
1719 case Intrinsic::riscv_vssseg4:
1720 case Intrinsic::riscv_vssseg5:
1721 case Intrinsic::riscv_vssseg6:
1722 case Intrinsic::riscv_vssseg7:
1723 case Intrinsic::riscv_vssseg8:
1724 case Intrinsic::riscv_vsoxseg2:
1725 case Intrinsic::riscv_vsoxseg3:
1726 case Intrinsic::riscv_vsoxseg4:
1727 case Intrinsic::riscv_vsoxseg5:
1728 case Intrinsic::riscv_vsoxseg6:
1729 case Intrinsic::riscv_vsoxseg7:
1730 case Intrinsic::riscv_vsoxseg8:
1731 case Intrinsic::riscv_vsuxseg2:
1732 case Intrinsic::riscv_vsuxseg3:
1733 case Intrinsic::riscv_vsuxseg4:
1734 case Intrinsic::riscv_vsuxseg5:
1735 case Intrinsic::riscv_vsuxseg6:
1736 case Intrinsic::riscv_vsuxseg7:
1737 case Intrinsic::riscv_vsuxseg8:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1739 /*IsStore*/ true,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vssseg2_mask:
1742 case Intrinsic::riscv_vssseg3_mask:
1743 case Intrinsic::riscv_vssseg4_mask:
1744 case Intrinsic::riscv_vssseg5_mask:
1745 case Intrinsic::riscv_vssseg6_mask:
1746 case Intrinsic::riscv_vssseg7_mask:
1747 case Intrinsic::riscv_vssseg8_mask:
1748 case Intrinsic::riscv_vsoxseg2_mask:
1749 case Intrinsic::riscv_vsoxseg3_mask:
1750 case Intrinsic::riscv_vsoxseg4_mask:
1751 case Intrinsic::riscv_vsoxseg5_mask:
1752 case Intrinsic::riscv_vsoxseg6_mask:
1753 case Intrinsic::riscv_vsoxseg7_mask:
1754 case Intrinsic::riscv_vsoxseg8_mask:
1755 case Intrinsic::riscv_vsuxseg2_mask:
1756 case Intrinsic::riscv_vsuxseg3_mask:
1757 case Intrinsic::riscv_vsuxseg4_mask:
1758 case Intrinsic::riscv_vsuxseg5_mask:
1759 case Intrinsic::riscv_vsuxseg6_mask:
1760 case Intrinsic::riscv_vsuxseg7_mask:
1761 case Intrinsic::riscv_vsuxseg8_mask:
1762 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1763 /*IsStore*/ true,
1764 /*IsUnitStrided*/ false);
1765 }
1766}
1767
1769 const AddrMode &AM, Type *Ty,
1770 unsigned AS,
1771 Instruction *I) const {
1772 // No global is ever allowed as a base.
1773 if (AM.BaseGV)
1774 return false;
1775
1776 // RVV instructions only support register addressing.
1777 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1778 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1779
1780 // Require a 12-bit signed offset.
1781 if (!isInt<12>(AM.BaseOffs))
1782 return false;
1783
1784 switch (AM.Scale) {
1785 case 0: // "r+i" or just "i", depending on HasBaseReg.
1786 break;
1787 case 1:
1788 if (!AM.HasBaseReg) // allow "r+i".
1789 break;
1790 return false; // disallow "r+r" or "r+r+i".
1791 default:
1792 return false;
1793 }
1794
1795 return true;
1796}
1797
1799 return isInt<12>(Imm);
1800}
1801
1803 return isInt<12>(Imm);
1804}
1805
1806// On RV32, 64-bit integers are split into their high and low parts and held
1807// in two different registers, so the trunc is free since the low register can
1808// just be used.
1809// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1810// isTruncateFree?
1812 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1813 return false;
1814 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1815 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1816 return (SrcBits == 64 && DestBits == 32);
1817}
1818
1820 // We consider i64->i32 free on RV64 since we have good selection of W
1821 // instructions that make promoting operations back to i64 free in many cases.
1822 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1823 !DstVT.isInteger())
1824 return false;
1825 unsigned SrcBits = SrcVT.getSizeInBits();
1826 unsigned DestBits = DstVT.getSizeInBits();
1827 return (SrcBits == 64 && DestBits == 32);
1828}
1829
1831 // Zexts are free if they can be combined with a load.
1832 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1833 // poorly with type legalization of compares preferring sext.
1834 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1835 EVT MemVT = LD->getMemoryVT();
1836 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1837 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1838 LD->getExtensionType() == ISD::ZEXTLOAD))
1839 return true;
1840 }
1841
1842 return TargetLowering::isZExtFree(Val, VT2);
1843}
1844
1846 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1847}
1848
1850 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1851}
1852
1854 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1855}
1856
1858 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1859 Subtarget.hasVendorXCVbitmanip();
1860}
1861
1863 const Instruction &AndI) const {
1864 // We expect to be able to match a bit extraction instruction if the Zbs
1865 // extension is supported and the mask is a power of two. However, we
1866 // conservatively return false if the mask would fit in an ANDI instruction,
1867 // on the basis that it's possible the sinking+duplication of the AND in
1868 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1869 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1870 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1871 return false;
1872 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1873 if (!Mask)
1874 return false;
1875 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1876}
1877
1879 EVT VT = Y.getValueType();
1880
1881 // FIXME: Support vectors once we have tests.
1882 if (VT.isVector())
1883 return false;
1884
1885 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1886 !isa<ConstantSDNode>(Y);
1887}
1888
1890 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1891 if (Subtarget.hasStdExtZbs())
1892 return X.getValueType().isScalarInteger();
1893 auto *C = dyn_cast<ConstantSDNode>(Y);
1894 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1895 if (Subtarget.hasVendorXTHeadBs())
1896 return C != nullptr;
1897 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1898 return C && C->getAPIntValue().ule(10);
1899}
1900
1902 EVT VT) const {
1903 // Only enable for rvv.
1904 if (!VT.isVector() || !Subtarget.hasVInstructions())
1905 return false;
1906
1907 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1908 return false;
1909
1910 return true;
1911}
1912
1914 Type *Ty) const {
1915 assert(Ty->isIntegerTy());
1916
1917 unsigned BitSize = Ty->getIntegerBitWidth();
1918 if (BitSize > Subtarget.getXLen())
1919 return false;
1920
1921 // Fast path, assume 32-bit immediates are cheap.
1922 int64_t Val = Imm.getSExtValue();
1923 if (isInt<32>(Val))
1924 return true;
1925
1926 // A constant pool entry may be more aligned thant he load we're trying to
1927 // replace. If we don't support unaligned scalar mem, prefer the constant
1928 // pool.
1929 // TODO: Can the caller pass down the alignment?
1930 if (!Subtarget.enableUnalignedScalarMem())
1931 return true;
1932
1933 // Prefer to keep the load if it would require many instructions.
1934 // This uses the same threshold we use for constant pools but doesn't
1935 // check useConstantPoolForLargeInts.
1936 // TODO: Should we keep the load only when we're definitely going to emit a
1937 // constant pool?
1938
1940 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1941}
1942
1946 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1947 SelectionDAG &DAG) const {
1948 // One interesting pattern that we'd want to form is 'bit extract':
1949 // ((1 >> Y) & 1) ==/!= 0
1950 // But we also need to be careful not to try to reverse that fold.
1951
1952 // Is this '((1 >> Y) & 1)'?
1953 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1954 return false; // Keep the 'bit extract' pattern.
1955
1956 // Will this be '((1 >> Y) & 1)' after the transform?
1957 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1958 return true; // Do form the 'bit extract' pattern.
1959
1960 // If 'X' is a constant, and we transform, then we will immediately
1961 // try to undo the fold, thus causing endless combine loop.
1962 // So only do the transform if X is not a constant. This matches the default
1963 // implementation of this function.
1964 return !XC;
1965}
1966
1967bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1968 switch (Opcode) {
1969 case Instruction::Add:
1970 case Instruction::Sub:
1971 case Instruction::Mul:
1972 case Instruction::And:
1973 case Instruction::Or:
1974 case Instruction::Xor:
1975 case Instruction::FAdd:
1976 case Instruction::FSub:
1977 case Instruction::FMul:
1978 case Instruction::FDiv:
1979 case Instruction::ICmp:
1980 case Instruction::FCmp:
1981 return true;
1982 case Instruction::Shl:
1983 case Instruction::LShr:
1984 case Instruction::AShr:
1985 case Instruction::UDiv:
1986 case Instruction::SDiv:
1987 case Instruction::URem:
1988 case Instruction::SRem:
1989 return Operand == 1;
1990 default:
1991 return false;
1992 }
1993}
1994
1995
1997 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1998 return false;
1999
2000 if (canSplatOperand(I->getOpcode(), Operand))
2001 return true;
2002
2003 auto *II = dyn_cast<IntrinsicInst>(I);
2004 if (!II)
2005 return false;
2006
2007 switch (II->getIntrinsicID()) {
2008 case Intrinsic::fma:
2009 case Intrinsic::vp_fma:
2010 return Operand == 0 || Operand == 1;
2011 case Intrinsic::vp_shl:
2012 case Intrinsic::vp_lshr:
2013 case Intrinsic::vp_ashr:
2014 case Intrinsic::vp_udiv:
2015 case Intrinsic::vp_sdiv:
2016 case Intrinsic::vp_urem:
2017 case Intrinsic::vp_srem:
2018 case Intrinsic::ssub_sat:
2019 case Intrinsic::vp_ssub_sat:
2020 case Intrinsic::usub_sat:
2021 case Intrinsic::vp_usub_sat:
2022 return Operand == 1;
2023 // These intrinsics are commutative.
2024 case Intrinsic::vp_add:
2025 case Intrinsic::vp_mul:
2026 case Intrinsic::vp_and:
2027 case Intrinsic::vp_or:
2028 case Intrinsic::vp_xor:
2029 case Intrinsic::vp_fadd:
2030 case Intrinsic::vp_fmul:
2031 case Intrinsic::vp_icmp:
2032 case Intrinsic::vp_fcmp:
2033 case Intrinsic::smin:
2034 case Intrinsic::vp_smin:
2035 case Intrinsic::umin:
2036 case Intrinsic::vp_umin:
2037 case Intrinsic::smax:
2038 case Intrinsic::vp_smax:
2039 case Intrinsic::umax:
2040 case Intrinsic::vp_umax:
2041 case Intrinsic::sadd_sat:
2042 case Intrinsic::vp_sadd_sat:
2043 case Intrinsic::uadd_sat:
2044 case Intrinsic::vp_uadd_sat:
2045 // These intrinsics have 'vr' versions.
2046 case Intrinsic::vp_sub:
2047 case Intrinsic::vp_fsub:
2048 case Intrinsic::vp_fdiv:
2049 return Operand == 0 || Operand == 1;
2050 default:
2051 return false;
2052 }
2053}
2054
2055/// Check if sinking \p I's operands to I's basic block is profitable, because
2056/// the operands can be folded into a target instruction, e.g.
2057/// splats of scalars can fold into vector instructions.
2059 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2060 using namespace llvm::PatternMatch;
2061
2062 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2063 return false;
2064
2065 // Don't sink splat operands if the target prefers it. Some targets requires
2066 // S2V transfer buffers and we can run out of them copying the same value
2067 // repeatedly.
2068 // FIXME: It could still be worth doing if it would improve vector register
2069 // pressure and prevent a vector spill.
2070 if (!Subtarget.sinkSplatOperands())
2071 return false;
2072
2073 for (auto OpIdx : enumerate(I->operands())) {
2074 if (!canSplatOperand(I, OpIdx.index()))
2075 continue;
2076
2077 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2078 // Make sure we are not already sinking this operand
2079 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2080 continue;
2081
2082 // We are looking for a splat that can be sunk.
2084 m_Undef(), m_ZeroMask())))
2085 continue;
2086
2087 // Don't sink i1 splats.
2088 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2089 continue;
2090
2091 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2092 // and vector registers
2093 for (Use &U : Op->uses()) {
2094 Instruction *Insn = cast<Instruction>(U.getUser());
2095 if (!canSplatOperand(Insn, U.getOperandNo()))
2096 return false;
2097 }
2098
2099 Ops.push_back(&Op->getOperandUse(0));
2100 Ops.push_back(&OpIdx.value());
2101 }
2102 return true;
2103}
2104
2106 unsigned Opc = VecOp.getOpcode();
2107
2108 // Assume target opcodes can't be scalarized.
2109 // TODO - do we have any exceptions?
2110 if (Opc >= ISD::BUILTIN_OP_END)
2111 return false;
2112
2113 // If the vector op is not supported, try to convert to scalar.
2114 EVT VecVT = VecOp.getValueType();
2115 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2116 return true;
2117
2118 // If the vector op is supported, but the scalar op is not, the transform may
2119 // not be worthwhile.
2120 // Permit a vector binary operation can be converted to scalar binary
2121 // operation which is custom lowered with illegal type.
2122 EVT ScalarVT = VecVT.getScalarType();
2123 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2124 isOperationCustom(Opc, ScalarVT);
2125}
2126
2128 const GlobalAddressSDNode *GA) const {
2129 // In order to maximise the opportunity for common subexpression elimination,
2130 // keep a separate ADD node for the global address offset instead of folding
2131 // it in the global address node. Later peephole optimisations may choose to
2132 // fold it back in when profitable.
2133 return false;
2134}
2135
2136// Return one of the followings:
2137// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2138// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2139// positive counterpart, which will be materialized from the first returned
2140// element. The second returned element indicated that there should be a FNEG
2141// followed.
2142// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2143std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2144 EVT VT) const {
2145 if (!Subtarget.hasStdExtZfa())
2146 return std::make_pair(-1, false);
2147
2148 bool IsSupportedVT = false;
2149 if (VT == MVT::f16) {
2150 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2151 } else if (VT == MVT::f32) {
2152 IsSupportedVT = true;
2153 } else if (VT == MVT::f64) {
2154 assert(Subtarget.hasStdExtD() && "Expect D extension");
2155 IsSupportedVT = true;
2156 }
2157
2158 if (!IsSupportedVT)
2159 return std::make_pair(-1, false);
2160
2162 if (Index < 0 && Imm.isNegative())
2163 // Try the combination of its positive counterpart + FNEG.
2164 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2165 else
2166 return std::make_pair(Index, false);
2167}
2168
2170 bool ForCodeSize) const {
2171 bool IsLegalVT = false;
2172 if (VT == MVT::f16)
2173 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2174 else if (VT == MVT::f32)
2175 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2176 else if (VT == MVT::f64)
2177 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2178 else if (VT == MVT::bf16)
2179 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2180
2181 if (!IsLegalVT)
2182 return false;
2183
2184 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2185 return true;
2186
2187 // Cannot create a 64 bit floating-point immediate value for rv32.
2188 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2189 // td can handle +0.0 or -0.0 already.
2190 // -0.0 can be created by fmv + fneg.
2191 return Imm.isZero();
2192 }
2193
2194 // Special case: fmv + fneg
2195 if (Imm.isNegZero())
2196 return true;
2197
2198 // Building an integer and then converting requires a fmv at the end of
2199 // the integer sequence.
2200 const int Cost =
2201 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2202 Subtarget);
2203 return Cost <= FPImmCost;
2204}
2205
2206// TODO: This is very conservative.
2208 unsigned Index) const {
2210 return false;
2211
2212 // Only support extracting a fixed from a fixed vector for now.
2213 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2214 return false;
2215
2216 EVT EltVT = ResVT.getVectorElementType();
2217 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2218
2219 // The smallest type we can slide is i8.
2220 // TODO: We can extract index 0 from a mask vector without a slide.
2221 if (EltVT == MVT::i1)
2222 return false;
2223
2224 unsigned ResElts = ResVT.getVectorNumElements();
2225 unsigned SrcElts = SrcVT.getVectorNumElements();
2226
2227 unsigned MinVLen = Subtarget.getRealMinVLen();
2228 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2229
2230 // If we're extracting only data from the first VLEN bits of the source
2231 // then we can always do this with an m1 vslidedown.vx. Restricting the
2232 // Index ensures we can use a vslidedown.vi.
2233 // TODO: We can generalize this when the exact VLEN is known.
2234 if (Index + ResElts <= MinVLMAX && Index < 31)
2235 return true;
2236
2237 // Convervatively only handle extracting half of a vector.
2238 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2239 // a cheap extract. However, this case is important in practice for
2240 // shuffled extracts of longer vectors. How resolve?
2241 if ((ResElts * 2) != SrcElts)
2242 return false;
2243
2244 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2245 // cheap.
2246 if (Index >= 32)
2247 return false;
2248
2249 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2250 // the upper half of a vector until we have more test coverage.
2251 return Index == 0 || Index == ResElts;
2252}
2253
2256 EVT VT) const {
2257 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2258 // We might still end up using a GPR but that will be decided based on ABI.
2259 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2260 !Subtarget.hasStdExtZfhminOrZhinxmin())
2261 return MVT::f32;
2262
2264
2265 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2266 return MVT::i64;
2267
2268 return PartVT;
2269}
2270
2273 EVT VT) const {
2274 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2275 // We might still end up using a GPR but that will be decided based on ABI.
2276 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2277 !Subtarget.hasStdExtZfhminOrZhinxmin())
2278 return 1;
2279
2281}
2282
2284 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2285 unsigned &NumIntermediates, MVT &RegisterVT) const {
2287 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2288
2289 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2290 IntermediateVT = MVT::i64;
2291
2292 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2293 RegisterVT = MVT::i64;
2294
2295 return NumRegs;
2296}
2297
2298// Changes the condition code and swaps operands if necessary, so the SetCC
2299// operation matches one of the comparisons supported directly by branches
2300// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2301// with 1/-1.
2302static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2303 ISD::CondCode &CC, SelectionDAG &DAG) {
2304 // If this is a single bit test that can't be handled by ANDI, shift the
2305 // bit to be tested to the MSB and perform a signed compare with 0.
2306 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2307 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2308 isa<ConstantSDNode>(LHS.getOperand(1))) {
2309 uint64_t Mask = LHS.getConstantOperandVal(1);
2310 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2311 unsigned ShAmt = 0;
2312 if (isPowerOf2_64(Mask)) {
2314 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2315 } else {
2316 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2317 }
2318
2319 LHS = LHS.getOperand(0);
2320 if (ShAmt != 0)
2321 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2322 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2323 return;
2324 }
2325 }
2326
2327 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2328 int64_t C = RHSC->getSExtValue();
2329 switch (CC) {
2330 default: break;
2331 case ISD::SETGT:
2332 // Convert X > -1 to X >= 0.
2333 if (C == -1) {
2334 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2335 CC = ISD::SETGE;
2336 return;
2337 }
2338 break;
2339 case ISD::SETLT:
2340 // Convert X < 1 to 0 >= X.
2341 if (C == 1) {
2342 RHS = LHS;
2343 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2344 CC = ISD::SETGE;
2345 return;
2346 }
2347 break;
2348 }
2349 }
2350
2351 switch (CC) {
2352 default:
2353 break;
2354 case ISD::SETGT:
2355 case ISD::SETLE:
2356 case ISD::SETUGT:
2357 case ISD::SETULE:
2359 std::swap(LHS, RHS);
2360 break;
2361 }
2362}
2363
2365 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2366 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2367 if (VT.getVectorElementType() == MVT::i1)
2368 KnownSize *= 8;
2369
2370 switch (KnownSize) {
2371 default:
2372 llvm_unreachable("Invalid LMUL.");
2373 case 8:
2375 case 16:
2377 case 32:
2379 case 64:
2381 case 128:
2383 case 256:
2385 case 512:
2387 }
2388}
2389
2391 switch (LMul) {
2392 default:
2393 llvm_unreachable("Invalid LMUL.");
2398 return RISCV::VRRegClassID;
2400 return RISCV::VRM2RegClassID;
2402 return RISCV::VRM4RegClassID;
2404 return RISCV::VRM8RegClassID;
2405 }
2406}
2407
2409 RISCVII::VLMUL LMUL = getLMUL(VT);
2410 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2411 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2412 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2413 LMUL == RISCVII::VLMUL::LMUL_1) {
2414 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2415 "Unexpected subreg numbering");
2416 return RISCV::sub_vrm1_0 + Index;
2417 }
2418 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2419 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2420 "Unexpected subreg numbering");
2421 return RISCV::sub_vrm2_0 + Index;
2422 }
2423 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2424 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2425 "Unexpected subreg numbering");
2426 return RISCV::sub_vrm4_0 + Index;
2427 }
2428 llvm_unreachable("Invalid vector type.");
2429}
2430
2432 if (VT.getVectorElementType() == MVT::i1)
2433 return RISCV::VRRegClassID;
2434 return getRegClassIDForLMUL(getLMUL(VT));
2435}
2436
2437// Attempt to decompose a subvector insert/extract between VecVT and
2438// SubVecVT via subregister indices. Returns the subregister index that
2439// can perform the subvector insert/extract with the given element index, as
2440// well as the index corresponding to any leftover subvectors that must be
2441// further inserted/extracted within the register class for SubVecVT.
2442std::pair<unsigned, unsigned>
2444 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2445 const RISCVRegisterInfo *TRI) {
2446 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2447 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2448 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2449 "Register classes not ordered");
2450 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2451 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2452 // Try to compose a subregister index that takes us from the incoming
2453 // LMUL>1 register class down to the outgoing one. At each step we half
2454 // the LMUL:
2455 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2456 // Note that this is not guaranteed to find a subregister index, such as
2457 // when we are extracting from one VR type to another.
2458 unsigned SubRegIdx = RISCV::NoSubRegister;
2459 for (const unsigned RCID :
2460 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2461 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2462 VecVT = VecVT.getHalfNumVectorElementsVT();
2463 bool IsHi =
2464 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2465 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2466 getSubregIndexByMVT(VecVT, IsHi));
2467 if (IsHi)
2468 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2469 }
2470 return {SubRegIdx, InsertExtractIdx};
2471}
2472
2473// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2474// stores for those types.
2475bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2476 return !Subtarget.useRVVForFixedLengthVectors() ||
2477 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2478}
2479
2481 if (!ScalarTy.isSimple())
2482 return false;
2483 switch (ScalarTy.getSimpleVT().SimpleTy) {
2484 case MVT::iPTR:
2485 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2486 case MVT::i8:
2487 case MVT::i16:
2488 case MVT::i32:
2489 return true;
2490 case MVT::i64:
2491 return Subtarget.hasVInstructionsI64();
2492 case MVT::f16:
2493 return Subtarget.hasVInstructionsF16();
2494 case MVT::f32:
2495 return Subtarget.hasVInstructionsF32();
2496 case MVT::f64:
2497 return Subtarget.hasVInstructionsF64();
2498 default:
2499 return false;
2500 }
2501}
2502
2503
2504unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2505 return NumRepeatedDivisors;
2506}
2507
2509 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2510 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2511 "Unexpected opcode");
2512 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2513 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2515 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2516 if (!II)
2517 return SDValue();
2518 return Op.getOperand(II->VLOperand + 1 + HasChain);
2519}
2520
2522 const RISCVSubtarget &Subtarget) {
2523 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2524 if (!Subtarget.useRVVForFixedLengthVectors())
2525 return false;
2526
2527 // We only support a set of vector types with a consistent maximum fixed size
2528 // across all supported vector element types to avoid legalization issues.
2529 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2530 // fixed-length vector type we support is 1024 bytes.
2531 if (VT.getFixedSizeInBits() > 1024 * 8)
2532 return false;
2533
2534 unsigned MinVLen = Subtarget.getRealMinVLen();
2535
2536 MVT EltVT = VT.getVectorElementType();
2537
2538 // Don't use RVV for vectors we cannot scalarize if required.
2539 switch (EltVT.SimpleTy) {
2540 // i1 is supported but has different rules.
2541 default:
2542 return false;
2543 case MVT::i1:
2544 // Masks can only use a single register.
2545 if (VT.getVectorNumElements() > MinVLen)
2546 return false;
2547 MinVLen /= 8;
2548 break;
2549 case MVT::i8:
2550 case MVT::i16:
2551 case MVT::i32:
2552 break;
2553 case MVT::i64:
2554 if (!Subtarget.hasVInstructionsI64())
2555 return false;
2556 break;
2557 case MVT::f16:
2558 if (!Subtarget.hasVInstructionsF16Minimal())
2559 return false;
2560 break;
2561 case MVT::f32:
2562 if (!Subtarget.hasVInstructionsF32())
2563 return false;
2564 break;
2565 case MVT::f64:
2566 if (!Subtarget.hasVInstructionsF64())
2567 return false;
2568 break;
2569 }
2570
2571 // Reject elements larger than ELEN.
2572 if (EltVT.getSizeInBits() > Subtarget.getELen())
2573 return false;
2574
2575 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2576 // Don't use RVV for types that don't fit.
2577 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2578 return false;
2579
2580 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2581 // the base fixed length RVV support in place.
2582 if (!VT.isPow2VectorType())
2583 return false;
2584
2585 return true;
2586}
2587
2588bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2589 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2590}
2591
2592// Return the largest legal scalable vector type that matches VT's element type.
2594 const RISCVSubtarget &Subtarget) {
2595 // This may be called before legal types are setup.
2596 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2597 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2598 "Expected legal fixed length vector!");
2599
2600 unsigned MinVLen = Subtarget.getRealMinVLen();
2601 unsigned MaxELen = Subtarget.getELen();
2602
2603 MVT EltVT = VT.getVectorElementType();
2604 switch (EltVT.SimpleTy) {
2605 default:
2606 llvm_unreachable("unexpected element type for RVV container");
2607 case MVT::i1:
2608 case MVT::i8:
2609 case MVT::i16:
2610 case MVT::i32:
2611 case MVT::i64:
2612 case MVT::f16:
2613 case MVT::f32:
2614 case MVT::f64: {
2615 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2616 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2617 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2618 unsigned NumElts =
2620 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2621 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2622 return MVT::getScalableVectorVT(EltVT, NumElts);
2623 }
2624 }
2625}
2626
2628 const RISCVSubtarget &Subtarget) {
2630 Subtarget);
2631}
2632
2634 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2635}
2636
2637// Grow V to consume an entire RVV register.
2639 const RISCVSubtarget &Subtarget) {
2640 assert(VT.isScalableVector() &&
2641 "Expected to convert into a scalable vector!");
2642 assert(V.getValueType().isFixedLengthVector() &&
2643 "Expected a fixed length vector operand!");
2644 SDLoc DL(V);
2645 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2646 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2647}
2648
2649// Shrink V so it's just big enough to maintain a VT's worth of data.
2651 const RISCVSubtarget &Subtarget) {
2653 "Expected to convert into a fixed length vector!");
2654 assert(V.getValueType().isScalableVector() &&
2655 "Expected a scalable vector operand!");
2656 SDLoc DL(V);
2657 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2658 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2659}
2660
2661/// Return the type of the mask type suitable for masking the provided
2662/// vector type. This is simply an i1 element type vector of the same
2663/// (possibly scalable) length.
2664static MVT getMaskTypeFor(MVT VecVT) {
2665 assert(VecVT.isVector());
2667 return MVT::getVectorVT(MVT::i1, EC);
2668}
2669
2670/// Creates an all ones mask suitable for masking a vector of type VecTy with
2671/// vector length VL. .
2672static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2673 SelectionDAG &DAG) {
2674 MVT MaskVT = getMaskTypeFor(VecVT);
2675 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2676}
2677
2678static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2679 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2680 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2681 // canonicalize the representation. InsertVSETVLI will pick the immediate
2682 // encoding later if profitable.
2683 const auto [MinVLMAX, MaxVLMAX] =
2684 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2685 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2686 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2687
2688 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2689}
2690
2691static std::pair<SDValue, SDValue>
2693 const RISCVSubtarget &Subtarget) {
2694 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2695 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2696 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2697 return {Mask, VL};
2698}
2699
2700static std::pair<SDValue, SDValue>
2701getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2702 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2703 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2704 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2705 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2706 return {Mask, VL};
2707}
2708
2709// Gets the two common "VL" operands: an all-ones mask and the vector length.
2710// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2711// the vector type that the fixed-length vector is contained in. Otherwise if
2712// VecVT is scalable, then ContainerVT should be the same as VecVT.
2713static std::pair<SDValue, SDValue>
2714getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2715 const RISCVSubtarget &Subtarget) {
2716 if (VecVT.isFixedLengthVector())
2717 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2718 Subtarget);
2719 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2720 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2721}
2722
2724 SelectionDAG &DAG) const {
2725 assert(VecVT.isScalableVector() && "Expected scalable vector");
2726 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2727 VecVT.getVectorElementCount());
2728}
2729
2730std::pair<unsigned, unsigned>
2732 const RISCVSubtarget &Subtarget) {
2733 assert(VecVT.isScalableVector() && "Expected scalable vector");
2734
2735 unsigned EltSize = VecVT.getScalarSizeInBits();
2736 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2737
2738 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2739 unsigned MaxVLMAX =
2740 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2741
2742 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2743 unsigned MinVLMAX =
2744 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2745
2746 return std::make_pair(MinVLMAX, MaxVLMAX);
2747}
2748
2749// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2750// of either is (currently) supported. This can get us into an infinite loop
2751// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2752// as a ..., etc.
2753// Until either (or both) of these can reliably lower any node, reporting that
2754// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2755// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2756// which is not desirable.
2758 EVT VT, unsigned DefinedValues) const {
2759 return false;
2760}
2761
2763 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2764 // implementation-defined.
2765 if (!VT.isVector())
2767 unsigned DLenFactor = Subtarget.getDLenFactor();
2768 unsigned Cost;
2769 if (VT.isScalableVector()) {
2770 unsigned LMul;
2771 bool Fractional;
2772 std::tie(LMul, Fractional) =
2774 if (Fractional)
2775 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2776 else
2777 Cost = (LMul * DLenFactor);
2778 } else {
2779 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2780 }
2781 return Cost;
2782}
2783
2784
2785/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2786/// is generally quadratic in the number of vreg implied by LMUL. Note that
2787/// operand (index and possibly mask) are handled separately.
2789 return getLMULCost(VT) * getLMULCost(VT);
2790}
2791
2792/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2793/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2794/// or may track the vrgather.vv cost. It is implementation-dependent.
2796 return getLMULCost(VT);
2797}
2798
2799/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2800/// for the type VT. (This does not cover the vslide1up or vslide1down
2801/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2802/// or may track the vrgather.vv cost. It is implementation-dependent.
2804 return getLMULCost(VT);
2805}
2806
2807/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2808/// for the type VT. (This does not cover the vslide1up or vslide1down
2809/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2810/// or may track the vrgather.vv cost. It is implementation-dependent.
2812 return getLMULCost(VT);
2813}
2814
2816 const RISCVSubtarget &Subtarget) {
2817 // RISC-V FP-to-int conversions saturate to the destination register size, but
2818 // don't produce 0 for nan. We can use a conversion instruction and fix the
2819 // nan case with a compare and a select.
2820 SDValue Src = Op.getOperand(0);
2821
2822 MVT DstVT = Op.getSimpleValueType();
2823 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2824
2825 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2826
2827 if (!DstVT.isVector()) {
2828 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2829 // the result.
2830 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2831 Src.getValueType() == MVT::bf16) {
2832 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2833 }
2834
2835 unsigned Opc;
2836 if (SatVT == DstVT)
2837 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2838 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2840 else
2841 return SDValue();
2842 // FIXME: Support other SatVTs by clamping before or after the conversion.
2843
2844 SDLoc DL(Op);
2845 SDValue FpToInt = DAG.getNode(
2846 Opc, DL, DstVT, Src,
2848
2849 if (Opc == RISCVISD::FCVT_WU_RV64)
2850 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2851
2852 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2853 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2855 }
2856
2857 // Vectors.
2858
2859 MVT DstEltVT = DstVT.getVectorElementType();
2860 MVT SrcVT = Src.getSimpleValueType();
2861 MVT SrcEltVT = SrcVT.getVectorElementType();
2862 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2863 unsigned DstEltSize = DstEltVT.getSizeInBits();
2864
2865 // Only handle saturating to the destination type.
2866 if (SatVT != DstEltVT)
2867 return SDValue();
2868
2869 // FIXME: Don't support narrowing by more than 1 steps for now.
2870 if (SrcEltSize > (2 * DstEltSize))
2871 return SDValue();
2872
2873 MVT DstContainerVT = DstVT;
2874 MVT SrcContainerVT = SrcVT;
2875 if (DstVT.isFixedLengthVector()) {
2876 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2877 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2878 assert(DstContainerVT.getVectorElementCount() ==
2879 SrcContainerVT.getVectorElementCount() &&
2880 "Expected same element count");
2881 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2882 }
2883
2884 SDLoc DL(Op);
2885
2886 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2887
2888 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2889 {Src, Src, DAG.getCondCode(ISD::SETNE),
2890 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2891
2892 // Need to widen by more than 1 step, promote the FP type, then do a widening
2893 // convert.
2894 if (DstEltSize > (2 * SrcEltSize)) {
2895 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2896 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2897 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2898 }
2899
2900 unsigned RVVOpc =
2902 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2903
2904 SDValue SplatZero = DAG.getNode(
2905 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2906 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2907 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2908 Res, DAG.getUNDEF(DstContainerVT), VL);
2909
2910 if (DstVT.isFixedLengthVector())
2911 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2912
2913 return Res;
2914}
2915
2917 switch (Opc) {
2918 case ISD::FROUNDEVEN:
2920 case ISD::VP_FROUNDEVEN:
2921 return RISCVFPRndMode::RNE;
2922 case ISD::FTRUNC:
2923 case ISD::STRICT_FTRUNC:
2924 case ISD::VP_FROUNDTOZERO:
2925 return RISCVFPRndMode::RTZ;
2926 case ISD::FFLOOR:
2927 case ISD::STRICT_FFLOOR:
2928 case ISD::VP_FFLOOR:
2929 return RISCVFPRndMode::RDN;
2930 case ISD::FCEIL:
2931 case ISD::STRICT_FCEIL:
2932 case ISD::VP_FCEIL:
2933 return RISCVFPRndMode::RUP;
2934 case ISD::FROUND:
2935 case ISD::STRICT_FROUND:
2936 case ISD::VP_FROUND:
2937 return RISCVFPRndMode::RMM;
2938 case ISD::FRINT:
2939 return RISCVFPRndMode::DYN;
2940 }
2941
2943}
2944
2945// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2946// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2947// the integer domain and back. Taking care to avoid converting values that are
2948// nan or already correct.
2949static SDValue
2951 const RISCVSubtarget &Subtarget) {
2952 MVT VT = Op.getSimpleValueType();
2953 assert(VT.isVector() && "Unexpected type");
2954
2955 SDLoc DL(Op);
2956
2957 SDValue Src = Op.getOperand(0);
2958
2959 MVT ContainerVT = VT;
2960 if (VT.isFixedLengthVector()) {
2961 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2962 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2963 }
2964
2965 SDValue Mask, VL;
2966 if (Op->isVPOpcode()) {
2967 Mask = Op.getOperand(1);
2968 if (VT.isFixedLengthVector())
2969 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2970 Subtarget);
2971 VL = Op.getOperand(2);
2972 } else {
2973 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2974 }
2975
2976 // Freeze the source since we are increasing the number of uses.
2977 Src = DAG.getFreeze(Src);
2978
2979 // We do the conversion on the absolute value and fix the sign at the end.
2980 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2981
2982 // Determine the largest integer that can be represented exactly. This and
2983 // values larger than it don't have any fractional bits so don't need to
2984 // be converted.
2985 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2986 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2987 APFloat MaxVal = APFloat(FltSem);
2988 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2989 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2990 SDValue MaxValNode =
2991 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2992 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2993 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2994
2995 // If abs(Src) was larger than MaxVal or nan, keep it.
2996 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2997 Mask =
2998 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2999 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3000 Mask, Mask, VL});
3001
3002 // Truncate to integer and convert back to FP.
3003 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3004 MVT XLenVT = Subtarget.getXLenVT();
3005 SDValue Truncated;
3006
3007 switch (Op.getOpcode()) {
3008 default:
3009 llvm_unreachable("Unexpected opcode");
3010 case ISD::FCEIL:
3011 case ISD::VP_FCEIL:
3012 case ISD::FFLOOR:
3013 case ISD::VP_FFLOOR:
3014 case ISD::FROUND:
3015 case ISD::FROUNDEVEN:
3016 case ISD::VP_FROUND:
3017 case ISD::VP_FROUNDEVEN:
3018 case ISD::VP_FROUNDTOZERO: {
3021 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3022 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3023 break;
3024 }
3025 case ISD::FTRUNC:
3026 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3027 Mask, VL);
3028 break;
3029 case ISD::FRINT:
3030 case ISD::VP_FRINT:
3031 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3032 break;
3033 case ISD::FNEARBYINT:
3034 case ISD::VP_FNEARBYINT:
3035 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3036 Mask, VL);
3037 break;
3038 }
3039
3040 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3041 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3042 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3043 Mask, VL);
3044
3045 // Restore the original sign so that -0.0 is preserved.
3046 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3047 Src, Src, Mask, VL);
3048
3049 if (!VT.isFixedLengthVector())
3050 return Truncated;
3051
3052 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3053}
3054
3055// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3056// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3057// qNan and coverting the new source to integer and back to FP.
3058static SDValue
3060 const RISCVSubtarget &Subtarget) {
3061 SDLoc DL(Op);
3062 MVT VT = Op.getSimpleValueType();
3063 SDValue Chain = Op.getOperand(0);
3064 SDValue Src = Op.getOperand(1);
3065
3066 MVT ContainerVT = VT;
3067 if (VT.isFixedLengthVector()) {
3068 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3069 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3070 }
3071
3072 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3073
3074 // Freeze the source since we are increasing the number of uses.
3075 Src = DAG.getFreeze(Src);
3076
3077 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3078 MVT MaskVT = Mask.getSimpleValueType();
3080 DAG.getVTList(MaskVT, MVT::Other),
3081 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3082 DAG.getUNDEF(MaskVT), Mask, VL});
3083 Chain = Unorder.getValue(1);
3085 DAG.getVTList(ContainerVT, MVT::Other),
3086 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3087 Chain = Src.getValue(1);
3088
3089 // We do the conversion on the absolute value and fix the sign at the end.
3090 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3091
3092 // Determine the largest integer that can be represented exactly. This and
3093 // values larger than it don't have any fractional bits so don't need to
3094 // be converted.
3095 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3096 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3097 APFloat MaxVal = APFloat(FltSem);
3098 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3099 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3100 SDValue MaxValNode =
3101 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3102 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3103 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3104
3105 // If abs(Src) was larger than MaxVal or nan, keep it.
3106 Mask = DAG.getNode(
3107 RISCVISD::SETCC_VL, DL, MaskVT,
3108 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3109
3110 // Truncate to integer and convert back to FP.
3111 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3112 MVT XLenVT = Subtarget.getXLenVT();
3113 SDValue Truncated;
3114
3115 switch (Op.getOpcode()) {
3116 default:
3117 llvm_unreachable("Unexpected opcode");
3118 case ISD::STRICT_FCEIL:
3119 case ISD::STRICT_FFLOOR:
3120 case ISD::STRICT_FROUND:
3124 Truncated = DAG.getNode(
3125 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3126 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3127 break;
3128 }
3129 case ISD::STRICT_FTRUNC:
3130 Truncated =
3132 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3133 break;
3136 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3137 Mask, VL);
3138 break;
3139 }
3140 Chain = Truncated.getValue(1);
3141
3142 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3143 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3144 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3145 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3146 Truncated, Mask, VL);
3147 Chain = Truncated.getValue(1);
3148 }
3149
3150 // Restore the original sign so that -0.0 is preserved.
3151 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3152 Src, Src, Mask, VL);
3153
3154 if (VT.isFixedLengthVector())
3155 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3156 return DAG.getMergeValues({Truncated, Chain}, DL);
3157}
3158
3159static SDValue
3161 const RISCVSubtarget &Subtarget) {
3162 MVT VT = Op.getSimpleValueType();
3163 if (VT.isVector())
3164 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3165
3166 if (DAG.shouldOptForSize())
3167 return SDValue();
3168
3169 SDLoc DL(Op);
3170 SDValue Src = Op.getOperand(0);
3171
3172 // Create an integer the size of the mantissa with the MSB set. This and all
3173 // values larger than it don't have any fractional bits so don't need to be
3174 // converted.
3175 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3176 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3177 APFloat MaxVal = APFloat(FltSem);
3178 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3179 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3180 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3181
3183 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3184 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3185}
3186
3187// Expand vector LRINT and LLRINT by converting to the integer domain.
3189 const RISCVSubtarget &Subtarget) {
3190 MVT VT = Op.getSimpleValueType();
3191 assert(VT.isVector() && "Unexpected type");
3192
3193 SDLoc DL(Op);
3194 SDValue Src = Op.getOperand(0);
3195 MVT ContainerVT = VT;
3196
3197 if (VT.isFixedLengthVector()) {
3198 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3199 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3200 }
3201
3202 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3203 SDValue Truncated =
3204 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3205
3206 if (!VT.isFixedLengthVector())
3207 return Truncated;
3208
3209 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3210}
3211
3212static SDValue
3214 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3215 SDValue Offset, SDValue Mask, SDValue VL,
3217 if (Merge.isUndef())
3219 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3220 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3221 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3222}
3223
3224static SDValue
3225getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3227 SDValue VL,
3229 if (Merge.isUndef())
3231 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3232 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3233 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3234}
3235
3236static MVT getLMUL1VT(MVT VT) {
3238 "Unexpected vector MVT");
3242}
3243
3247 int64_t Addend;
3248};
3249
3250static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3252 // We will use a SINT_TO_FP to materialize this constant so we should use a
3253 // signed APSInt here.
3254 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3255 // We use an arbitrary rounding mode here. If a floating-point is an exact
3256 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3257 // the rounding mode changes the output value, then it is not an exact
3258 // integer.
3260 bool IsExact;
3261 // If it is out of signed integer range, it will return an invalid operation.
3262 // If it is not an exact integer, IsExact is false.
3263 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3265 !IsExact)
3266 return std::nullopt;
3267 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3268}
3269
3270// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3271// to the (non-zero) step S and start value X. This can be then lowered as the
3272// RVV sequence (VID * S) + X, for example.
3273// The step S is represented as an integer numerator divided by a positive
3274// denominator. Note that the implementation currently only identifies
3275// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3276// cannot detect 2/3, for example.
3277// Note that this method will also match potentially unappealing index
3278// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3279// determine whether this is worth generating code for.
3280static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3281 unsigned EltSizeInBits) {
3282 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3283 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3284 return std::nullopt;
3285 bool IsInteger = Op.getValueType().isInteger();
3286
3287 std::optional<unsigned> SeqStepDenom;
3288 std::optional<int64_t> SeqStepNum, SeqAddend;
3289 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3290 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3291
3292 // First extract the ops into a list of constant integer values. This may not
3293 // be possible for floats if they're not all representable as integers.
3295 const unsigned OpSize = Op.getScalarValueSizeInBits();
3296 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3297 if (Elt.isUndef()) {
3298 Elts[Idx] = std::nullopt;
3299 continue;
3300 }
3301 if (IsInteger) {
3302 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3303 } else {
3304 auto ExactInteger =
3305 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3306 if (!ExactInteger)
3307 return std::nullopt;
3308 Elts[Idx] = *ExactInteger;
3309 }
3310 }
3311
3312 for (auto [Idx, Elt] : enumerate(Elts)) {
3313 // Assume undef elements match the sequence; we just have to be careful
3314 // when interpolating across them.
3315 if (!Elt)
3316 continue;
3317
3318 if (PrevElt) {
3319 // Calculate the step since the last non-undef element, and ensure
3320 // it's consistent across the entire sequence.
3321 unsigned IdxDiff = Idx - PrevElt->second;
3322 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3323
3324 // A zero-value value difference means that we're somewhere in the middle
3325 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3326 // step change before evaluating the sequence.
3327 if (ValDiff == 0)
3328 continue;
3329
3330 int64_t Remainder = ValDiff % IdxDiff;
3331 // Normalize the step if it's greater than 1.
3332 if (Remainder != ValDiff) {
3333 // The difference must cleanly divide the element span.
3334 if (Remainder != 0)
3335 return std::nullopt;
3336 ValDiff /= IdxDiff;
3337 IdxDiff = 1;
3338 }
3339
3340 if (!SeqStepNum)
3341 SeqStepNum = ValDiff;
3342 else if (ValDiff != SeqStepNum)
3343 return std::nullopt;
3344
3345 if (!SeqStepDenom)
3346 SeqStepDenom = IdxDiff;
3347 else if (IdxDiff != *SeqStepDenom)
3348 return std::nullopt;
3349 }
3350
3351 // Record this non-undef element for later.
3352 if (!PrevElt || PrevElt->first != *Elt)
3353 PrevElt = std::make_pair(*Elt, Idx);
3354 }
3355
3356 // We need to have logged a step for this to count as a legal index sequence.
3357 if (!SeqStepNum || !SeqStepDenom)
3358 return std::nullopt;
3359
3360 // Loop back through the sequence and validate elements we might have skipped
3361 // while waiting for a valid step. While doing this, log any sequence addend.
3362 for (auto [Idx, Elt] : enumerate(Elts)) {
3363 if (!Elt)
3364 continue;
3365 uint64_t ExpectedVal =
3366 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3367 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3368 if (!SeqAddend)
3369 SeqAddend = Addend;
3370 else if (Addend != SeqAddend)
3371 return std::nullopt;
3372 }
3373
3374 assert(SeqAddend && "Must have an addend if we have a step");
3375
3376 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3377}
3378
3379// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3380// and lower it as a VRGATHER_VX_VL from the source vector.
3381static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3382 SelectionDAG &DAG,
3383 const RISCVSubtarget &Subtarget) {
3384 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3385 return SDValue();
3386 SDValue Vec = SplatVal.getOperand(0);
3387 // Only perform this optimization on vectors of the same size for simplicity.
3388 // Don't perform this optimization for i1 vectors.
3389 // FIXME: Support i1 vectors, maybe by promoting to i8?
3390 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3391 return SDValue();
3392 SDValue Idx = SplatVal.getOperand(1);
3393 // The index must be a legal type.
3394 if (Idx.getValueType() != Subtarget.getXLenVT())
3395 return SDValue();
3396
3397 MVT ContainerVT = VT;
3398 if (VT.isFixedLengthVector()) {
3399 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3400 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3401 }
3402
3403 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3404
3405 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3406 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3407
3408 if (!VT.isFixedLengthVector())
3409 return Gather;
3410
3411 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3412}
3413
3414
3415/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3416/// which constitute a large proportion of the elements. In such cases we can
3417/// splat a vector with the dominant element and make up the shortfall with
3418/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3419/// Note that this includes vectors of 2 elements by association. The
3420/// upper-most element is the "dominant" one, allowing us to use a splat to
3421/// "insert" the upper element, and an insert of the lower element at position
3422/// 0, which improves codegen.
3424 const RISCVSubtarget &Subtarget) {
3425 MVT VT = Op.getSimpleValueType();
3426 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3427
3428 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3429
3430 SDLoc DL(Op);
3431 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3432
3433 MVT XLenVT = Subtarget.getXLenVT();
3434 unsigned NumElts = Op.getNumOperands();
3435
3436 SDValue DominantValue;
3437 unsigned MostCommonCount = 0;
3438 DenseMap<SDValue, unsigned> ValueCounts;
3439 unsigned NumUndefElts =
3440 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3441
3442 // Track the number of scalar loads we know we'd be inserting, estimated as
3443 // any non-zero floating-point constant. Other kinds of element are either
3444 // already in registers or are materialized on demand. The threshold at which
3445 // a vector load is more desirable than several scalar materializion and
3446 // vector-insertion instructions is not known.
3447 unsigned NumScalarLoads = 0;
3448
3449 for (SDValue V : Op->op_values()) {
3450 if (V.isUndef())
3451 continue;
3452
3453 ValueCounts.insert(std::make_pair(V, 0));
3454 unsigned &Count = ValueCounts[V];
3455 if (0 == Count)
3456 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3457 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3458
3459 // Is this value dominant? In case of a tie, prefer the highest element as
3460 // it's cheaper to insert near the beginning of a vector than it is at the
3461 // end.
3462 if (++Count >= MostCommonCount) {
3463 DominantValue = V;
3464 MostCommonCount = Count;
3465 }
3466 }
3467
3468 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3469 unsigned NumDefElts = NumElts - NumUndefElts;
3470 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3471
3472 // Don't perform this optimization when optimizing for size, since
3473 // materializing elements and inserting them tends to cause code bloat.
3474 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3475 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3476 ((MostCommonCount > DominantValueCountThreshold) ||
3477 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3478 // Start by splatting the most common element.
3479 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3480
3481 DenseSet<SDValue> Processed{DominantValue};
3482
3483 // We can handle an insert into the last element (of a splat) via
3484 // v(f)slide1down. This is slightly better than the vslideup insert
3485 // lowering as it avoids the need for a vector group temporary. It
3486 // is also better than using vmerge.vx as it avoids the need to
3487 // materialize the mask in a vector register.
3488 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3489 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3490 LastOp != DominantValue) {
3491 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3492 auto OpCode =
3494 if (!VT.isFloatingPoint())
3495 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3496 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3497 LastOp, Mask, VL);
3498 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3499 Processed.insert(LastOp);
3500 }
3501
3502 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3503 for (const auto &OpIdx : enumerate(Op->ops())) {
3504 const SDValue &V = OpIdx.value();
3505 if (V.isUndef() || !Processed.insert(V).second)
3506 continue;
3507 if (ValueCounts[V] == 1) {
3508 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3509 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3510 } else {
3511 // Blend in all instances of this value using a VSELECT, using a
3512 // mask where each bit signals whether that element is the one
3513 // we're after.
3515 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3516 return DAG.getConstant(V == V1, DL, XLenVT);
3517 });
3518 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3519 DAG.getBuildVector(SelMaskTy, DL, Ops),
3520 DAG.getSplatBuildVector(VT, DL, V), Vec);
3521 }
3522 }
3523
3524 return Vec;
3525 }
3526
3527 return SDValue();
3528}
3529
3531 const RISCVSubtarget &Subtarget) {
3532 MVT VT = Op.getSimpleValueType();
3533 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3534
3535 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3536
3537 SDLoc DL(Op);
3538 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3539
3540 MVT XLenVT = Subtarget.getXLenVT();
3541 unsigned NumElts = Op.getNumOperands();
3542
3543 if (VT.getVectorElementType() == MVT::i1) {
3544 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3545 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3546 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3547 }
3548
3549 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3550 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3551 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3552 }
3553
3554 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3555 // scalar integer chunks whose bit-width depends on the number of mask
3556 // bits and XLEN.
3557 // First, determine the most appropriate scalar integer type to use. This
3558 // is at most XLenVT, but may be shrunk to a smaller vector element type
3559 // according to the size of the final vector - use i8 chunks rather than
3560 // XLenVT if we're producing a v8i1. This results in more consistent
3561 // codegen across RV32 and RV64.
3562 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3563 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3564 // If we have to use more than one INSERT_VECTOR_ELT then this
3565 // optimization is likely to increase code size; avoid peforming it in
3566 // such a case. We can use a load from a constant pool in this case.
3567 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3568 return SDValue();
3569 // Now we can create our integer vector type. Note that it may be larger
3570 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3571 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3572 MVT IntegerViaVecVT =
3573 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3574 IntegerViaVecElts);
3575
3576 uint64_t Bits = 0;
3577 unsigned BitPos = 0, IntegerEltIdx = 0;
3578 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3579
3580 for (unsigned I = 0; I < NumElts;) {
3581 SDValue V = Op.getOperand(I);
3582 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3583 Bits |= ((uint64_t)BitValue << BitPos);
3584 ++BitPos;
3585 ++I;
3586
3587 // Once we accumulate enough bits to fill our scalar type or process the
3588 // last element, insert into our vector and clear our accumulated data.
3589 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3590 if (NumViaIntegerBits <= 32)
3591 Bits = SignExtend64<32>(Bits);
3592 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3593 Elts[IntegerEltIdx] = Elt;
3594 Bits = 0;
3595 BitPos = 0;
3596 IntegerEltIdx++;
3597 }
3598 }
3599
3600 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3601
3602 if (NumElts < NumViaIntegerBits) {
3603 // If we're producing a smaller vector than our minimum legal integer
3604 // type, bitcast to the equivalent (known-legal) mask type, and extract
3605 // our final mask.
3606 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3607 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3608 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3609 DAG.getConstant(0, DL, XLenVT));
3610 } else {
3611 // Else we must have produced an integer type with the same size as the
3612 // mask type; bitcast for the final result.
3613 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3614 Vec = DAG.getBitcast(VT, Vec);
3615 }
3616
3617 return Vec;
3618 }
3619
3620 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3621 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3623 if (!VT.isFloatingPoint())
3624 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3625 Splat =
3626 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3627 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3628 }
3629
3630 // Try and match index sequences, which we can lower to the vid instruction
3631 // with optional modifications. An all-undef vector is matched by
3632 // getSplatValue, above.
3633 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3634 int64_t StepNumerator = SimpleVID->StepNumerator;
3635 unsigned StepDenominator = SimpleVID->StepDenominator;
3636 int64_t Addend = SimpleVID->Addend;
3637
3638 assert(StepNumerator != 0 && "Invalid step");
3639 bool Negate = false;
3640 int64_t SplatStepVal = StepNumerator;
3641 unsigned StepOpcode = ISD::MUL;
3642 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3643 // anyway as the shift of 63 won't fit in uimm5.
3644 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3645 isPowerOf2_64(std::abs(StepNumerator))) {
3646 Negate = StepNumerator < 0;
3647 StepOpcode = ISD::SHL;
3648 SplatStepVal = Log2_64(std::abs(StepNumerator));
3649 }
3650
3651 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3652 // threshold since it's the immediate value many RVV instructions accept.
3653 // There is no vmul.vi instruction so ensure multiply constant can fit in
3654 // a single addi instruction.
3655 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3656 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3657 isPowerOf2_32(StepDenominator) &&
3658 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3659 MVT VIDVT =
3661 MVT VIDContainerVT =
3662 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3663 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3664 // Convert right out of the scalable type so we can use standard ISD
3665 // nodes for the rest of the computation. If we used scalable types with
3666 // these, we'd lose the fixed-length vector info and generate worse
3667 // vsetvli code.
3668 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3669 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3670 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3671 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3672 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3673 }
3674 if (StepDenominator != 1) {
3675 SDValue SplatStep =
3676 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3677 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3678 }
3679 if (Addend != 0 || Negate) {
3680 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3681 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3682 VID);
3683 }
3684 if (VT.isFloatingPoint()) {
3685 // TODO: Use vfwcvt to reduce register pressure.
3686 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3687 }
3688 return VID;
3689 }
3690 }
3691
3692 // For very small build_vectors, use a single scalar insert of a constant.
3693 // TODO: Base this on constant rematerialization cost, not size.
3694 const unsigned EltBitSize = VT.getScalarSizeInBits();
3695 if (VT.getSizeInBits() <= 32 &&
3697 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3698 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3699 "Unexpected sequence type");
3700 // If we can use the original VL with the modified element type, this
3701 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3702 // be moved into InsertVSETVLI?
3703 unsigned ViaVecLen =
3704 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3705 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3706
3707 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3708 uint64_t SplatValue = 0;
3709 // Construct the amalgamated value at this larger vector type.
3710 for (const auto &OpIdx : enumerate(Op->op_values())) {
3711 const auto &SeqV = OpIdx.value();
3712 if (!SeqV.isUndef())
3713 SplatValue |=
3714 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3715 }
3716
3717 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3718 // achieve better constant materializion.
3719 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3720 SplatValue = SignExtend64<32>(SplatValue);
3721
3722 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3723 DAG.getUNDEF(ViaVecVT),
3724 DAG.getConstant(SplatValue, DL, XLenVT),
3725 DAG.getVectorIdxConstant(0, DL));
3726 if (ViaVecLen != 1)
3728 MVT::getVectorVT(ViaIntVT, 1), Vec,
3729 DAG.getConstant(0, DL, XLenVT));
3730 return DAG.getBitcast(VT, Vec);
3731 }
3732
3733
3734 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3735 // when re-interpreted as a vector with a larger element type. For example,
3736 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3737 // could be instead splat as
3738 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3739 // TODO: This optimization could also work on non-constant splats, but it
3740 // would require bit-manipulation instructions to construct the splat value.
3741 SmallVector<SDValue> Sequence;
3742 const auto *BV = cast<BuildVectorSDNode>(Op);
3743 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3745 BV->getRepeatedSequence(Sequence) &&
3746 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3747 unsigned SeqLen = Sequence.size();
3748 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3749 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3750 ViaIntVT == MVT::i64) &&
3751 "Unexpected sequence type");
3752
3753 // If we can use the original VL with the modified element type, this
3754 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3755 // be moved into InsertVSETVLI?
3756 const unsigned RequiredVL = NumElts / SeqLen;
3757 const unsigned ViaVecLen =
3758 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3759 NumElts : RequiredVL;
3760 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3761
3762 unsigned EltIdx = 0;
3763 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3764 uint64_t SplatValue = 0;
3765 // Construct the amalgamated value which can be splatted as this larger
3766 // vector type.
3767 for (const auto &SeqV : Sequence) {
3768 if (!SeqV.isUndef())
3769 SplatValue |=
3770 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3771 EltIdx++;
3772 }
3773
3774 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3775 // achieve better constant materializion.
3776 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3777 SplatValue = SignExtend64<32>(SplatValue);
3778
3779 // Since we can't introduce illegal i64 types at this stage, we can only
3780 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3781 // way we can use RVV instructions to splat.
3782 assert((ViaIntVT.bitsLE(XLenVT) ||
3783 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3784 "Unexpected bitcast sequence");
3785 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3786 SDValue ViaVL =
3787 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3788 MVT ViaContainerVT =
3789 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3790 SDValue Splat =
3791 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3792 DAG.getUNDEF(ViaContainerVT),
3793 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3794 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3795 if (ViaVecLen != RequiredVL)
3797 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3798 DAG.getConstant(0, DL, XLenVT));
3799 return DAG.getBitcast(VT, Splat);
3800 }
3801 }
3802
3803 // If the number of signbits allows, see if we can lower as a <N x i8>.
3804 // Our main goal here is to reduce LMUL (and thus work) required to
3805 // build the constant, but we will also narrow if the resulting
3806 // narrow vector is known to materialize cheaply.
3807 // TODO: We really should be costing the smaller vector. There are
3808 // profitable cases this misses.
3809 if (EltBitSize > 8 && VT.isInteger() &&
3810 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3811 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3812 if (EltBitSize - SignBits < 8) {
3813 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3814 DL, Op->ops());
3815 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3816 Source, DAG, Subtarget);
3817 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3818 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3819 }
3820 }
3821
3822 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3823 return Res;
3824
3825 // For constant vectors, use generic constant pool lowering. Otherwise,
3826 // we'd have to materialize constants in GPRs just to move them into the
3827 // vector.
3828 return SDValue();
3829}
3830
3832 const RISCVSubtarget &Subtarget) {
3833 MVT VT = Op.getSimpleValueType();
3834 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3835
3836 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3838 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3839
3840 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3841
3842 SDLoc DL(Op);
3843 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3844
3845 MVT XLenVT = Subtarget.getXLenVT();
3846
3847 if (VT.getVectorElementType() == MVT::i1) {
3848 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3849 // vector type, we have a legal equivalently-sized i8 type, so we can use
3850 // that.
3851 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3852 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3853
3854 SDValue WideVec;
3855 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3856 // For a splat, perform a scalar truncate before creating the wider
3857 // vector.
3858 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3859 DAG.getConstant(1, DL, Splat.getValueType()));
3860 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3861 } else {
3862 SmallVector<SDValue, 8> Ops(Op->op_values());
3863 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3864 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3865 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3866 }
3867
3868 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3869 }
3870
3871 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3872 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3873 return Gather;
3874 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3876 if (!VT.isFloatingPoint())
3877 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3878 Splat =
3879 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3880 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3881 }
3882
3883 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3884 return Res;
3885
3886 // If we're compiling for an exact VLEN value, we can split our work per
3887 // register in the register group.
3888 if (const auto VLen = Subtarget.getRealVLen();
3889 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3890 MVT ElemVT = VT.getVectorElementType();
3891 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3892 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3893 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3894 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3895 assert(M1VT == getLMUL1VT(M1VT));
3896
3897 // The following semantically builds up a fixed length concat_vector
3898 // of the component build_vectors. We eagerly lower to scalable and
3899 // insert_subvector here to avoid DAG combining it back to a large
3900 // build_vector.
3901 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3902 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3903 SDValue Vec = DAG.getUNDEF(ContainerVT);
3904 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3905 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3906 SDValue SubBV =
3907 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3908 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3909 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3910 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3911 DAG.getVectorIdxConstant(InsertIdx, DL));
3912 }
3913 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3914 }
3915
3916 // For m1 vectors, if we have non-undef values in both halves of our vector,
3917 // split the vector into low and high halves, build them separately, then
3918 // use a vselect to combine them. For long vectors, this cuts the critical
3919 // path of the vslide1down sequence in half, and gives us an opportunity
3920 // to special case each half independently. Note that we don't change the
3921 // length of the sub-vectors here, so if both fallback to the generic
3922 // vslide1down path, we should be able to fold the vselect into the final
3923 // vslidedown (for the undef tail) for the first half w/ masking.
3924 unsigned NumElts = VT.getVectorNumElements();
3925 unsigned NumUndefElts =
3926 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3927 unsigned NumDefElts = NumElts - NumUndefElts;
3928 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3929 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3930 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3931 SmallVector<SDValue> MaskVals;
3932 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3933 SubVecAOps.reserve(NumElts);
3934 SubVecBOps.reserve(NumElts);
3935 for (unsigned i = 0; i < NumElts; i++) {
3936 SDValue Elem = Op->getOperand(i);
3937 if (i < NumElts / 2) {
3938 SubVecAOps.push_back(Elem);
3939 SubVecBOps.push_back(UndefElem);
3940 } else {
3941 SubVecAOps.push_back(UndefElem);
3942 SubVecBOps.push_back(Elem);
3943 }
3944 bool SelectMaskVal = (i < NumElts / 2);
3945 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3946 }
3947 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3948 MaskVals.size() == NumElts);
3949
3950 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3951 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3952 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3953 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3954 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3955 }
3956
3957 // Cap the cost at a value linear to the number of elements in the vector.
3958 // The default lowering is to use the stack. The vector store + scalar loads
3959 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3960 // being (at least) linear in LMUL. As a result, using the vslidedown
3961 // lowering for every element ends up being VL*LMUL..
3962 // TODO: Should we be directly costing the stack alternative? Doing so might
3963 // give us a more accurate upper bound.
3964 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3965
3966 // TODO: unify with TTI getSlideCost.
3967 InstructionCost PerSlideCost = 1;
3968 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3969 default: break;
3971 PerSlideCost = 2;
3972 break;
3974 PerSlideCost = 4;
3975 break;
3977 PerSlideCost = 8;
3978 break;
3979 }
3980
3981 // TODO: Should we be using the build instseq then cost + evaluate scheme
3982 // we use for integer constants here?
3983 unsigned UndefCount = 0;
3984 for (const SDValue &V : Op->ops()) {
3985 if (V.isUndef()) {
3986 UndefCount++;
3987 continue;
3988 }
3989 if (UndefCount) {
3990 LinearBudget -= PerSlideCost;
3991 UndefCount = 0;
3992 }
3993 LinearBudget -= PerSlideCost;
3994 }
3995 if (UndefCount) {
3996 LinearBudget -= PerSlideCost;
3997 }
3998
3999 if (LinearBudget < 0)
4000 return SDValue();
4001
4002 assert((!VT.isFloatingPoint() ||
4003 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4004 "Illegal type which will result in reserved encoding");
4005
4006 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4007
4008 SDValue Vec;
4009 UndefCount = 0;
4010 for (SDValue V : Op->ops()) {
4011 if (V.isUndef()) {
4012 UndefCount++;
4013 continue;
4014 }
4015
4016 // Start our sequence with a TA splat in the hopes that hardware is able to
4017 // recognize there's no dependency on the prior value of our temporary
4018 // register.
4019 if (!Vec) {
4020 Vec = DAG.getSplatVector(VT, DL, V);
4021 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4022 UndefCount = 0;
4023 continue;
4024 }
4025
4026 if (UndefCount) {
4027 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4028 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4029 Vec, Offset, Mask, VL, Policy);
4030 UndefCount = 0;
4031 }
4032 auto OpCode =
4034 if (!VT.isFloatingPoint())
4035 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4036 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4037 V, Mask, VL);
4038 }
4039 if (UndefCount) {
4040 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4041 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4042 Vec, Offset, Mask, VL, Policy);
4043 }
4044 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4045}
4046
4047static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4049 SelectionDAG &DAG) {
4050 if (!Passthru)
4051 Passthru = DAG.getUNDEF(VT);
4052 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4053 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4054 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4055 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4056 // node in order to try and match RVV vector/scalar instructions.
4057 if ((LoC >> 31) == HiC)
4058 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4059
4060 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4061 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4062 // vlmax vsetvli or vsetivli to change the VL.
4063 // FIXME: Support larger constants?
4064 // FIXME: Support non-constant VLs by saturating?
4065 if (LoC == HiC) {
4066 SDValue NewVL;
4067 if (isAllOnesConstant(VL) ||
4068 (isa<RegisterSDNode>(VL) &&
4069 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4070 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4071 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4072 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4073
4074 if (NewVL) {
4075 MVT InterVT =
4076 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4077 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4078 DAG.getUNDEF(InterVT), Lo, NewVL);
4079 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4080 }
4081 }
4082 }
4083
4084 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4085 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4086 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4087 Hi.getConstantOperandVal(1) == 31)
4088 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4089
4090 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4091 // even if it might be sign extended.
4092 if (Hi.isUndef())
4093 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4094
4095 // Fall back to a stack store and stride x0 vector load.
4096 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4097 Hi, VL);
4098}
4099
4100// Called by type legalization to handle splat of i64 on RV32.
4101// FIXME: We can optimize this when the type has sign or zero bits in one
4102// of the halves.
4103static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4104 SDValue Scalar, SDValue VL,
4105 SelectionDAG &DAG) {
4106 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4107 SDValue Lo, Hi;
4108 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4109 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4110}
4111
4112// This function lowers a splat of a scalar operand Splat with the vector
4113// length VL. It ensures the final sequence is type legal, which is useful when
4114// lowering a splat after type legalization.
4115static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4116 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4117 const RISCVSubtarget &Subtarget) {
4118 bool HasPassthru = Passthru && !Passthru.isUndef();
4119 if (!HasPassthru && !Passthru)
4120 Passthru = DAG.getUNDEF(VT);
4121 if (VT.isFloatingPoint())
4122 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4123
4124 MVT XLenVT = Subtarget.getXLenVT();
4125
4126 // Simplest case is that the operand needs to be promoted to XLenVT.
4127 if (Scalar.getValueType().bitsLE(XLenVT)) {
4128 // If the operand is a constant, sign extend to increase our chances
4129 // of being able to use a .vi instruction. ANY_EXTEND would become a
4130 // a zero extend and the simm5 check in isel would fail.
4131 // FIXME: Should we ignore the upper bits in isel instead?
4132 unsigned ExtOpc =
4133 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4134 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4135 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4136 }
4137
4138 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4139 "Unexpected scalar for splat lowering!");
4140
4141 if (isOneConstant(VL) && isNullConstant(Scalar))
4142 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4143 DAG.getConstant(0, DL, XLenVT), VL);
4144
4145 // Otherwise use the more complicated splatting algorithm.
4146 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4147}
4148
4149// This function lowers an insert of a scalar operand Scalar into lane
4150// 0 of the vector regardless of the value of VL. The contents of the
4151// remaining lanes of the result vector are unspecified. VL is assumed
4152// to be non-zero.
4154 const SDLoc &DL, SelectionDAG &DAG,
4155 const RISCVSubtarget &Subtarget) {
4156 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4157
4158 const MVT XLenVT = Subtarget.getXLenVT();
4159 SDValue Passthru = DAG.getUNDEF(VT);
4160
4161 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4162 isNullConstant(Scalar.getOperand(1))) {
4163 SDValue ExtractedVal = Scalar.getOperand(0);
4164 // The element types must be the same.
4165 if (ExtractedVal.getValueType().getVectorElementType() ==
4166 VT.getVectorElementType()) {
4167 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4168 MVT ExtractedContainerVT = ExtractedVT;
4169 if (ExtractedContainerVT.isFixedLengthVector()) {
4170 ExtractedContainerVT = getContainerForFixedLengthVector(
4171 DAG, ExtractedContainerVT, Subtarget);
4172 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4173 ExtractedVal, DAG, Subtarget);
4174 }
4175 if (ExtractedContainerVT.bitsLE(VT))
4176 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4177 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4178 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4179 DAG.getVectorIdxConstant(0, DL));
4180 }
4181 }
4182
4183
4184 if (VT.isFloatingPoint())
4185 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4186 DAG.getUNDEF(VT), Scalar, VL);
4187
4188 // Avoid the tricky legalization cases by falling back to using the
4189 // splat code which already handles it gracefully.
4190 if (!Scalar.getValueType().bitsLE(XLenVT))
4191 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4192 DAG.getConstant(1, DL, XLenVT),
4193 VT, DL, DAG, Subtarget);
4194
4195 // If the operand is a constant, sign extend to increase our chances
4196 // of being able to use a .vi instruction. ANY_EXTEND would become a
4197 // a zero extend and the simm5 check in isel would fail.
4198 // FIXME: Should we ignore the upper bits in isel instead?
4199 unsigned ExtOpc =
4200 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4201 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4202 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4203 DAG.getUNDEF(VT), Scalar, VL);
4204}
4205
4206// Is this a shuffle extracts either the even or odd elements of a vector?
4207// That is, specifically, either (a) or (b) below.
4208// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4209// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4210// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4211// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4212// Returns {Src Vector, Even Elements} om success
4213static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4214 SDValue V2, ArrayRef<int> Mask,
4215 const RISCVSubtarget &Subtarget) {
4216 // Need to be able to widen the vector.
4217 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4218 return false;
4219
4220 // Both input must be extracts.
4221 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4222 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4223 return false;
4224
4225 // Extracting from the same source.
4226 SDValue Src = V1.getOperand(0);
4227 if (Src != V2.getOperand(0))
4228 return false;
4229
4230 // Src needs to have twice the number of elements.
4231 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4232 return false;
4233
4234 // The extracts must extract the two halves of the source.
4235 if (V1.getConstantOperandVal(1) != 0 ||
4236 V2.getConstantOperandVal(1) != Mask.size())
4237 return false;
4238
4239 // First index must be the first even or odd element from V1.
4240 if (Mask[0] != 0 && Mask[0] != 1)
4241 return false;
4242
4243 // The others must increase by 2 each time.
4244 // TODO: Support undef elements?
4245 for (unsigned i = 1; i != Mask.size(); ++i)
4246 if (Mask[i] != Mask[i - 1] + 2)
4247 return false;
4248
4249 return true;
4250}
4251
4252/// Is this shuffle interleaving contiguous elements from one vector into the
4253/// even elements and contiguous elements from another vector into the odd
4254/// elements. \p EvenSrc will contain the element that should be in the first
4255/// even element. \p OddSrc will contain the element that should be in the first
4256/// odd element. These can be the first element in a source or the element half
4257/// way through the source.
4258static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4259 int &OddSrc, const RISCVSubtarget &Subtarget) {
4260 // We need to be able to widen elements to the next larger integer type.
4261 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4262 return false;
4263
4264 int Size = Mask.size();
4265 int NumElts = VT.getVectorNumElements();
4266 assert(Size == (int)NumElts && "Unexpected mask size");
4267
4268 SmallVector<unsigned, 2> StartIndexes;
4269 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4270 return false;
4271
4272 EvenSrc = StartIndexes[0];
4273 OddSrc = StartIndexes[1];
4274
4275 // One source should be low half of first vector.
4276 if (EvenSrc != 0 && OddSrc != 0)
4277 return false;
4278
4279 // Subvectors will be subtracted from either at the start of the two input
4280 // vectors, or at the start and middle of the first vector if it's an unary
4281 // interleave.
4282 // In both cases, HalfNumElts will be extracted.
4283 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4284 // we'll create an illegal extract_subvector.
4285 // FIXME: We could support other values using a slidedown first.
4286 int HalfNumElts = NumElts / 2;
4287 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4288}
4289
4290/// Match shuffles that concatenate two vectors, rotate the concatenation,
4291/// and then extract the original number of elements from the rotated result.
4292/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4293/// returned rotation amount is for a rotate right, where elements move from
4294/// higher elements to lower elements. \p LoSrc indicates the first source
4295/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4296/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4297/// 0 or 1 if a rotation is found.
4298///
4299/// NOTE: We talk about rotate to the right which matches how bit shift and
4300/// rotate instructions are described where LSBs are on the right, but LLVM IR
4301/// and the table below write vectors with the lowest elements on the left.
4302static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4303 int Size = Mask.size();
4304
4305 // We need to detect various ways of spelling a rotation:
4306 // [11, 12, 13, 14, 15, 0, 1, 2]
4307 // [-1, 12, 13, 14, -1, -1, 1, -1]
4308 // [-1, -1, -1, -1, -1, -1, 1, 2]
4309 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4310 // [-1, 4, 5, 6, -1, -1, 9, -1]
4311 // [-1, 4, 5, 6, -1, -1, -1, -1]
4312 int Rotation = 0;
4313 LoSrc = -1;
4314 HiSrc = -1;
4315 for (int i = 0; i != Size; ++i) {
4316 int M = Mask[i];
4317 if (M < 0)
4318 continue;
4319
4320 // Determine where a rotate vector would have started.
4321 int StartIdx = i - (M % Size);
4322 // The identity rotation isn't interesting, stop.
4323 if (StartIdx == 0)
4324 return -1;
4325
4326 // If we found the tail of a vector the rotation must be the missing
4327 // front. If we found the head of a vector, it must be how much of the
4328 // head.
4329 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4330
4331 if (Rotation == 0)
4332 Rotation = CandidateRotation;
4333 else if (Rotation != CandidateRotation)
4334 // The rotations don't match, so we can't match this mask.
4335 return -1;
4336
4337 // Compute which value this mask is pointing at.
4338 int MaskSrc = M < Size ? 0 : 1;
4339
4340 // Compute which of the two target values this index should be assigned to.
4341 // This reflects whether the high elements are remaining or the low elemnts
4342 // are remaining.
4343 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4344
4345 // Either set up this value if we've not encountered it before, or check
4346 // that it remains consistent.
4347 if (TargetSrc < 0)
4348 TargetSrc = MaskSrc;
4349 else if (TargetSrc != MaskSrc)
4350 // This may be a rotation, but it pulls from the inputs in some
4351 // unsupported interleaving.
4352 return -1;
4353 }
4354
4355 // Check that we successfully analyzed the mask, and normalize the results.
4356 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4357 assert((LoSrc >= 0 || HiSrc >= 0) &&
4358 "Failed to find a rotated input vector!");
4359
4360 return Rotation;
4361}
4362
4363// Lower a deinterleave shuffle to vnsrl.
4364// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4365// -> [p, q, r, s] (EvenElts == false)
4366// VT is the type of the vector to return, <[vscale x ]n x ty>
4367// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4369 bool EvenElts,
4370 const RISCVSubtarget &Subtarget,
4371 SelectionDAG &DAG) {
4372 // The result is a vector of type <m x n x ty>
4373 MVT ContainerVT = VT;
4374 // Convert fixed vectors to scalable if needed
4375 if (ContainerVT.isFixedLengthVector()) {
4376 assert(Src.getSimpleValueType().isFixedLengthVector());
4377 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4378
4379 // The source is a vector of type <m x n*2 x ty>
4380 MVT SrcContainerVT =
4382 ContainerVT.getVectorElementCount() * 2);
4383 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4384 }
4385
4386 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4387
4388 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4389 // This also converts FP to int.
4390 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4391 MVT WideSrcContainerVT = MVT::getVectorVT(
4392 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4393 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4394
4395 // The integer version of the container type.
4396 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4397
4398 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4399 // the original element size.
4400 unsigned Shift = EvenElts ? 0 : EltBits;
4401 SDValue SplatShift = DAG.getNode(
4402 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4403 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4404 SDValue Res =
4405 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4406 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4407 // Cast back to FP if needed.
4408 Res = DAG.getBitcast(ContainerVT, Res);
4409
4410 if (VT.isFixedLengthVector())
4411 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4412 return Res;
4413}
4414
4415// Lower the following shuffle to vslidedown.
4416// a)
4417// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4418// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4419// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4420// b)
4421// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4422// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4423// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4424// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4425// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4426// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4428 SDValue V1, SDValue V2,
4429 ArrayRef<int> Mask,
4430 const RISCVSubtarget &Subtarget,
4431 SelectionDAG &DAG) {
4432 auto findNonEXTRACT_SUBVECTORParent =
4433 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4434 uint64_t Offset = 0;
4435 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4436 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4437 // a scalable vector. But we don't want to match the case.
4438 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4439 Offset += Parent.getConstantOperandVal(1);
4440 Parent = Parent.getOperand(0);
4441 }
4442 return std::make_pair(Parent, Offset);
4443 };
4444
4445 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4446 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4447
4448 // Extracting from the same source.
4449 SDValue Src = V1Src;
4450 if (Src != V2Src)
4451 return SDValue();
4452
4453 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4454 SmallVector<int, 16> NewMask(Mask);
4455 for (size_t i = 0; i != NewMask.size(); ++i) {
4456 if (NewMask[i] == -1)
4457 continue;
4458
4459 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4460 NewMask[i] = NewMask[i] + V1IndexOffset;
4461 } else {
4462 // Minus NewMask.size() is needed. Otherwise, the b case would be
4463 // <5,6,7,12> instead of <5,6,7,8>.
4464 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4465 }
4466 }
4467
4468 // First index must be known and non-zero. It will be used as the slidedown
4469 // amount.
4470 if (NewMask[0] <= 0)
4471 return SDValue();
4472
4473 // NewMask is also continuous.
4474 for (unsigned i = 1; i != NewMask.size(); ++i)
4475 if (NewMask[i - 1] + 1 != NewMask[i])
4476 return SDValue();
4477
4478 MVT XLenVT = Subtarget.getXLenVT();
4479 MVT SrcVT = Src.getSimpleValueType();
4480 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4481 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4482 SDValue Slidedown =
4483 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4484 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4485 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4486 return DAG.getNode(
4488 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4489 DAG.getConstant(0, DL, XLenVT));
4490}
4491
4492// Because vslideup leaves the destination elements at the start intact, we can
4493// use it to perform shuffles that insert subvectors:
4494//
4495// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4496// ->
4497// vsetvli zero, 8, e8, mf2, ta, ma
4498// vslideup.vi v8, v9, 4
4499//
4500// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4501// ->
4502// vsetvli zero, 5, e8, mf2, tu, ma
4503// vslideup.v1 v8, v9, 2
4505 SDValue V1, SDValue V2,
4506 ArrayRef<int> Mask,
4507 const RISCVSubtarget &Subtarget,
4508 SelectionDAG &DAG) {
4509 unsigned NumElts = VT.getVectorNumElements();
4510 int NumSubElts, Index;
4511 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4512 Index))
4513 return SDValue();
4514
4515 bool OpsSwapped = Mask[Index] < (int)NumElts;
4516 SDValue InPlace = OpsSwapped ? V2 : V1;
4517 SDValue ToInsert = OpsSwapped ? V1 : V2;
4518
4519 MVT XLenVT = Subtarget.getXLenVT();
4520 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4521 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4522 // We slide up by the index that the subvector is being inserted at, and set
4523 // VL to the index + the number of elements being inserted.
4525 // If the we're adding a suffix to the in place vector, i.e. inserting right
4526 // up to the very end of it, then we don't actually care about the tail.
4527 if (NumSubElts + Index >= (int)NumElts)
4528 Policy |= RISCVII::TAIL_AGNOSTIC;
4529
4530 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4531 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4532 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4533
4534 SDValue Res;
4535 // If we're inserting into the lowest elements, use a tail undisturbed
4536 // vmv.v.v.
4537 if (Index == 0)
4538 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4539 VL);
4540 else
4541 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4542 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4543 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4544}
4545
4546/// Match v(f)slide1up/down idioms. These operations involve sliding
4547/// N-1 elements to make room for an inserted scalar at one end.
4549 SDValue V1, SDValue V2,
4550 ArrayRef<int> Mask,
4551 const RISCVSubtarget &Subtarget,
4552 SelectionDAG &DAG) {
4553 bool OpsSwapped = false;
4554 if (!isa<BuildVectorSDNode>(V1)) {
4555 if (!isa<BuildVectorSDNode>(V2))
4556 return SDValue();
4557 std::swap(V1, V2);
4558 OpsSwapped = true;
4559 }
4560 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4561 if (!Splat)
4562 return SDValue();
4563
4564 // Return true if the mask could describe a slide of Mask.size() - 1
4565 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4566 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4567 const unsigned S = (Offset > 0) ? 0 : -Offset;
4568 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4569 for (unsigned i = S; i != E; ++i)
4570 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4571 return false;
4572 return true;
4573 };
4574
4575 const unsigned NumElts = VT.getVectorNumElements();
4576 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4577 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4578 return SDValue();
4579
4580 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4581 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4582 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4583 return SDValue();
4584
4585 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4586 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4587 auto OpCode = IsVSlidedown ?
4590 if (!VT.isFloatingPoint())
4591 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4592 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4593 DAG.getUNDEF(ContainerVT),
4594 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4595 Splat, TrueMask, VL);
4596 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4597}
4598
4599// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4600// to create an interleaved vector of <[vscale x] n*2 x ty>.
4601// This requires that the size of ty is less than the subtarget's maximum ELEN.
4603 const SDLoc &DL, SelectionDAG &DAG,
4604 const RISCVSubtarget &Subtarget) {
4605 MVT VecVT = EvenV.getSimpleValueType();
4606 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4607 // Convert fixed vectors to scalable if needed
4608 if (VecContainerVT.isFixedLengthVector()) {
4609 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4610 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4611 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4612 }
4613
4614 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4615
4616 // We're working with a vector of the same size as the resulting
4617 // interleaved vector, but with half the number of elements and
4618 // twice the SEW (Hence the restriction on not using the maximum
4619 // ELEN)
4620 MVT WideVT =
4622 VecVT.getVectorElementCount());
4623 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4624 if (WideContainerVT.isFixedLengthVector())
4625 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4626
4627 // Bitcast the input vectors to integers in case they are FP
4628 VecContainerVT = VecContainerVT.changeTypeToInteger();
4629 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4630 OddV = DAG.getBitcast(VecContainerVT, OddV);
4631
4632 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4633 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4634
4635 SDValue Interleaved;
4636 if (OddV.isUndef()) {
4637 // If OddV is undef, this is a zero extend.
4638 // FIXME: Not only does this optimize the code, it fixes some correctness
4639 // issues because MIR does not have freeze.
4640 Interleaved =
4641 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4642 } else if (Subtarget.hasStdExtZvbb()) {
4643 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4644 SDValue OffsetVec =
4645 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4646 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4647 OffsetVec, Passthru, Mask, VL);
4648 if (!EvenV.isUndef())
4649 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4650 Interleaved, EvenV, Passthru, Mask, VL);
4651 } else if (EvenV.isUndef()) {
4652 Interleaved =
4653 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4654
4655 SDValue OffsetVec =
4656 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4657 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4658 Interleaved, OffsetVec, Passthru, Mask, VL);
4659 } else {
4660 // FIXME: We should freeze the odd vector here. We already handled the case
4661 // of provably undef/poison above.
4662
4663 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4664 // vwaddu.vv
4665 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4666 OddV, Passthru, Mask, VL);
4667
4668 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4669 SDValue AllOnesVec = DAG.getSplatVector(
4670 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4671 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4672 OddV, AllOnesVec, Passthru, Mask, VL);
4673
4674 // Add the two together so we get
4675 // (OddV * 0xff...ff) + (OddV + EvenV)
4676 // = (OddV * 0x100...00) + EvenV
4677 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4678 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4679 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4680 Interleaved, OddsMul, Passthru, Mask, VL);
4681 }
4682
4683 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4684 MVT ResultContainerVT = MVT::getVectorVT(
4685 VecVT.getVectorElementType(), // Make sure to use original type
4686 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4687 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4688
4689 // Convert back to a fixed vector if needed
4690 MVT ResultVT =
4693 if (ResultVT.isFixedLengthVector())
4694 Interleaved =
4695 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4696
4697 return Interleaved;
4698}
4699
4700// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4701// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4703 SelectionDAG &DAG,
4704 const RISCVSubtarget &Subtarget) {
4705 SDLoc DL(SVN);
4706 MVT VT = SVN->getSimpleValueType(0);
4707 SDValue V = SVN->getOperand(0);
4708 unsigned NumElts = VT.getVectorNumElements();
4709
4710 assert(VT.getVectorElementType() == MVT::i1);
4711
4713 SVN->getMask().size()) ||
4714 !SVN->getOperand(1).isUndef())
4715 return SDValue();
4716
4717 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4718 EVT ViaVT = EVT::getVectorVT(
4719 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4720 EVT ViaBitVT =
4721 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4722
4723 // If we don't have zvbb or the larger element type > ELEN, the operation will
4724 // be illegal.
4726 ViaVT) ||
4727 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4728 return SDValue();
4729
4730 // If the bit vector doesn't fit exactly into the larger element type, we need
4731 // to insert it into the larger vector and then shift up the reversed bits
4732 // afterwards to get rid of the gap introduced.
4733 if (ViaEltSize > NumElts)
4734 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4735 V, DAG.getVectorIdxConstant(0, DL));
4736
4737 SDValue Res =
4738 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4739
4740 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4741 // element type.
4742 if (ViaEltSize > NumElts)
4743 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4744 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4745
4746 Res = DAG.getBitcast(ViaBitVT, Res);
4747
4748 if (ViaEltSize > NumElts)
4749 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4750 DAG.getVectorIdxConstant(0, DL));
4751 return Res;
4752}
4753
4755 SelectionDAG &DAG,
4756 const RISCVSubtarget &Subtarget,
4757 MVT &RotateVT, unsigned &RotateAmt) {
4758 SDLoc DL(SVN);
4759
4760 EVT VT = SVN->getValueType(0);
4761 unsigned NumElts = VT.getVectorNumElements();
4762 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4763 unsigned NumSubElts;
4764 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4765 NumElts, NumSubElts, RotateAmt))
4766 return false;
4767 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4768 NumElts / NumSubElts);
4769
4770 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4771 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4772}
4773
4774// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4775// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4776// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4778 SelectionDAG &DAG,
4779 const RISCVSubtarget &Subtarget) {
4780 SDLoc DL(SVN);
4781
4782 EVT VT = SVN->getValueType(0);
4783 unsigned RotateAmt;
4784 MVT RotateVT;
4785 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4786 return SDValue();
4787
4788 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4789
4790 SDValue Rotate;
4791 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4792 // so canonicalize to vrev8.
4793 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4794 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4795 else
4796 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4797 DAG.getConstant(RotateAmt, DL, RotateVT));
4798
4799 return DAG.getBitcast(VT, Rotate);
4800}
4801
4802// If compiling with an exactly known VLEN, see if we can split a
4803// shuffle on m2 or larger into a small number of m1 sized shuffles
4804// which write each destination registers exactly once.
4806 SelectionDAG &DAG,
4807 const RISCVSubtarget &Subtarget) {
4808 SDLoc DL(SVN);
4809 MVT VT = SVN->getSimpleValueType(0);
4810 SDValue V1 = SVN->getOperand(0);
4811 SDValue V2 = SVN->getOperand(1);
4812 ArrayRef<int> Mask = SVN->getMask();
4813 unsigned NumElts = VT.getVectorNumElements();
4814
4815 // If we don't know exact data layout, not much we can do. If this
4816 // is already m1 or smaller, no point in splitting further.
4817 const auto VLen = Subtarget.getRealVLen();
4818 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4819 return SDValue();
4820
4821 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4822 // expansion for.
4823 unsigned RotateAmt;
4824 MVT RotateVT;
4825 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4826 return SDValue();
4827
4828 MVT ElemVT = VT.getVectorElementType();
4829 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4830 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4831
4833 OutMasks(VRegsPerSrc, {-1, {}});
4834
4835 // Check if our mask can be done as a 1-to-1 mapping from source
4836 // to destination registers in the group without needing to
4837 // write each destination more than once.
4838 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4839 int DstVecIdx = DstIdx / ElemsPerVReg;
4840 int DstSubIdx = DstIdx % ElemsPerVReg;
4841 int SrcIdx = Mask[DstIdx];
4842 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4843 continue;
4844 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4845 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4846 if (OutMasks[DstVecIdx].first == -1)
4847 OutMasks[DstVecIdx].first = SrcVecIdx;
4848 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4849 // Note: This case could easily be handled by keeping track of a chain
4850 // of source values and generating two element shuffles below. This is
4851 // less an implementation question, and more a profitability one.
4852 return SDValue();
4853
4854 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4855 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4856 }
4857
4858 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4859 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4860 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4861 assert(M1VT == getLMUL1VT(M1VT));
4862 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4863 SDValue Vec = DAG.getUNDEF(ContainerVT);
4864 // The following semantically builds up a fixed length concat_vector
4865 // of the component shuffle_vectors. We eagerly lower to scalable here
4866 // to avoid DAG combining it back to a large shuffle_vector again.
4867 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4868 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4869 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4870 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4871 if (SrcVecIdx == -1)
4872 continue;
4873 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4874 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4875 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4876 DAG.getVectorIdxConstant(ExtractIdx, DL));
4877 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4878 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4879 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4880 unsigned InsertIdx = DstVecIdx * NumOpElts;
4881 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4882 DAG.getVectorIdxConstant(InsertIdx, DL));
4883 }
4884 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4885}
4886
4888 const RISCVSubtarget &Subtarget) {
4889 SDValue V1 = Op.getOperand(0);
4890 SDValue V2 = Op.getOperand(1);
4891 SDLoc DL(Op);
4892 MVT XLenVT = Subtarget.getXLenVT();
4893 MVT VT = Op.getSimpleValueType();
4894 unsigned NumElts = VT.getVectorNumElements();
4895 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4896
4897 if (VT.getVectorElementType() == MVT::i1) {
4898 // Lower to a vror.vi of a larger element type if possible before we promote
4899 // i1s to i8s.
4900 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4901 return V;
4902 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4903 return V;
4904
4905 // Promote i1 shuffle to i8 shuffle.
4906 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4907 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4908 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4909 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4910 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4911 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4912 ISD::SETNE);
4913 }
4914
4915 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4916
4917 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4918
4919 if (SVN->isSplat()) {
4920 const int Lane = SVN->getSplatIndex();
4921 if (Lane >= 0) {
4922 MVT SVT = VT.getVectorElementType();
4923
4924 // Turn splatted vector load into a strided load with an X0 stride.
4925 SDValue V = V1;
4926 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4927 // with undef.
4928 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4929 int Offset = Lane;
4930 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4931 int OpElements =
4932 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4933 V = V.getOperand(Offset / OpElements);
4934 Offset %= OpElements;
4935 }
4936
4937 // We need to ensure the load isn't atomic or volatile.
4938 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4939 auto *Ld = cast<LoadSDNode>(V);
4940 Offset *= SVT.getStoreSize();
4941 SDValue NewAddr = DAG.getMemBasePlusOffset(
4942 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4943
4944 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4945 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4946 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4947 SDValue IntID =
4948 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4949 SDValue Ops[] = {Ld->getChain(),
4950 IntID,
4951 DAG.getUNDEF(ContainerVT),
4952 NewAddr,
4953 DAG.getRegister(RISCV::X0, XLenVT),
4954 VL};
4955 SDValue NewLoad = DAG.getMemIntrinsicNode(
4956 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4958 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4959 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4960 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4961 }
4962
4963 // Otherwise use a scalar load and splat. This will give the best
4964 // opportunity to fold a splat into the operation. ISel can turn it into
4965 // the x0 strided load if we aren't able to fold away the select.
4966 if (SVT.isFloatingPoint())
4967 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4968 Ld->getPointerInfo().getWithOffset(Offset),
4969 Ld->getOriginalAlign(),
4970 Ld->getMemOperand()->getFlags());
4971 else
4972 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4973 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4974 Ld->getOriginalAlign(),
4975 Ld->getMemOperand()->getFlags());
4977
4978 unsigned Opc =
4980 SDValue Splat =
4981 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4982 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4983 }
4984
4985 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4986 assert(Lane < (int)NumElts && "Unexpected lane!");
4987 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4988 V1, DAG.getConstant(Lane, DL, XLenVT),
4989 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4990 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4991 }
4992 }
4993
4994 // For exact VLEN m2 or greater, try to split to m1 operations if we
4995 // can split cleanly.
4996 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4997 return V;
4998
4999 ArrayRef<int> Mask = SVN->getMask();
5000
5001 if (SDValue V =
5002 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5003 return V;
5004
5005 if (SDValue V =
5006 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5007 return V;
5008
5009 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5010 // available.
5011 if (Subtarget.hasStdExtZvkb())
5012 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5013 return V;
5014
5015 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5016 // be undef which can be handled with a single SLIDEDOWN/UP.
5017 int LoSrc, HiSrc;
5018 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5019 if (Rotation > 0) {
5020 SDValue LoV, HiV;
5021 if (LoSrc >= 0) {
5022 LoV = LoSrc == 0 ? V1 : V2;
5023 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5024 }
5025 if (HiSrc >= 0) {
5026 HiV = HiSrc == 0 ? V1 : V2;
5027 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5028 }
5029
5030 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5031 // to slide LoV up by (NumElts - Rotation).
5032 unsigned InvRotate = NumElts - Rotation;
5033
5034 SDValue Res = DAG.getUNDEF(ContainerVT);
5035 if (HiV) {
5036 // Even though we could use a smaller VL, don't to avoid a vsetivli
5037 // toggle.
5038 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5039 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5040 }
5041 if (LoV)
5042 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5043 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5045
5046 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5047 }
5048
5049 // If this is a deinterleave and we can widen the vector, then we can use
5050 // vnsrl to deinterleave.
5051 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5052 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5053 Subtarget, DAG);
5054 }
5055
5056 if (SDValue V =
5057 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5058 return V;
5059
5060 // Detect an interleave shuffle and lower to
5061 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5062 int EvenSrc, OddSrc;
5063 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5064 // Extract the halves of the vectors.
5065 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5066
5067 int Size = Mask.size();
5068 SDValue EvenV, OddV;
5069 assert(EvenSrc >= 0 && "Undef source?");
5070 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5071 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5072 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5073
5074 assert(OddSrc >= 0 && "Undef source?");
5075 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5076 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5077 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5078
5079 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5080 }
5081
5082
5083 // Handle any remaining single source shuffles
5084 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5085 if (V2.isUndef()) {
5086 // We might be able to express the shuffle as a bitrotate. But even if we
5087 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5088 // shifts and a vor will have a higher throughput than a vrgather.
5089 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5090 return V;
5091
5092 if (VT.getScalarSizeInBits() == 8 &&
5093 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5094 // On such a vector we're unable to use i8 as the index type.
5095 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5096 // may involve vector splitting if we're already at LMUL=8, or our
5097 // user-supplied maximum fixed-length LMUL.
5098 return SDValue();
5099 }
5100
5101 // Base case for the two operand recursion below - handle the worst case
5102 // single source shuffle.
5103 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5104 MVT IndexVT = VT.changeTypeToInteger();
5105 // Since we can't introduce illegal index types at this stage, use i16 and
5106 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5107 // than XLenVT.
5108 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5109 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5110 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5111 }
5112
5113 // If the mask allows, we can do all the index computation in 16 bits. This
5114 // requires less work and less register pressure at high LMUL, and creates
5115 // smaller constants which may be cheaper to materialize.
5116 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5117 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5118 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5119 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5120 }
5121
5122 MVT IndexContainerVT =
5123 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5124
5125 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5126 SmallVector<SDValue> GatherIndicesLHS;
5127 for (int MaskIndex : Mask) {
5128 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5129 GatherIndicesLHS.push_back(IsLHSIndex
5130 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5131 : DAG.getUNDEF(XLenVT));
5132 }
5133 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5134 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5135 Subtarget);
5136 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5137 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5138 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5139 }
5140
5141 // By default we preserve the original operand order, and use a mask to
5142 // select LHS as true and RHS as false. However, since RVV vector selects may
5143 // feature splats but only on the LHS, we may choose to invert our mask and
5144 // instead select between RHS and LHS.
5145 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5146
5147 // Detect shuffles which can be re-expressed as vector selects; these are
5148 // shuffles in which each element in the destination is taken from an element
5149 // at the corresponding index in either source vectors.
5150 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5151 int MaskIndex = MaskIdx.value();
5152 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5153 });
5154 if (IsSelect) {
5155 // Now construct the mask that will be used by the vselect operation.
5156 SmallVector<SDValue> MaskVals;
5157 for (int MaskIndex : Mask) {
5158 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5159 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5160 }
5161
5162 if (SwapOps)
5163 std::swap(V1, V2);
5164
5165 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5166 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5167 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5168 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5169 }
5170
5171 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5172 // merged with a second vrgather.
5173 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5174 SmallVector<SDValue> MaskVals;
5175
5176 // Now construct the mask that will be used by the blended vrgather operation.
5177 // Cconstruct the appropriate indices into each vector.
5178 for (int MaskIndex : Mask) {
5179 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5180 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5181 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5182 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5183 ? MaskIndex : -1);
5184 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5185 }
5186
5187 if (SwapOps) {
5188 std::swap(V1, V2);
5189 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5190 }
5191
5192 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5193 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5194 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5195
5196 // Recursively invoke lowering for each operand if we had two
5197 // independent single source shuffles, and then combine the result via a
5198 // vselect. Note that the vselect will likely be folded back into the
5199 // second permute (vrgather, or other) by the post-isel combine.
5200 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5201 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5202 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5203}
5204
5206 // Support splats for any type. These should type legalize well.
5207 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5208 return true;
5209
5210 // Only support legal VTs for other shuffles for now.
5211 if (!isTypeLegal(VT))
5212 return false;
5213
5214 MVT SVT = VT.getSimpleVT();
5215
5216 // Not for i1 vectors.
5217 if (SVT.getScalarType() == MVT::i1)
5218 return false;
5219
5220 int Dummy1, Dummy2;
5221 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5222 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5223}
5224
5225// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5226// the exponent.
5227SDValue
5228RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5229 SelectionDAG &DAG) const {
5230 MVT VT = Op.getSimpleValueType();
5231 unsigned EltSize = VT.getScalarSizeInBits();
5232 SDValue Src = Op.getOperand(0);
5233 SDLoc DL(Op);
5234 MVT ContainerVT = VT;
5235
5236 SDValue Mask, VL;
5237 if (Op->isVPOpcode()) {
5238 Mask = Op.getOperand(1);
5239 if (VT.isFixedLengthVector())
5240 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5241 Subtarget);
5242 VL = Op.getOperand(2);
5243 }
5244
5245 // We choose FP type that can represent the value if possible. Otherwise, we
5246 // use rounding to zero conversion for correct exponent of the result.
5247 // TODO: Use f16 for i8 when possible?
5248 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5249 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5250 FloatEltVT = MVT::f32;
5251 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5252
5253 // Legal types should have been checked in the RISCVTargetLowering
5254 // constructor.
5255 // TODO: Splitting may make sense in some cases.
5256 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5257 "Expected legal float type!");
5258
5259 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5260 // The trailing zero count is equal to log2 of this single bit value.
5261 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5262 SDValue Neg = DAG.getNegative(Src, DL, VT);
5263 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5264 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5265 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5266 Src, Mask, VL);
5267 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5268 }
5269
5270 // We have a legal FP type, convert to it.
5271 SDValue FloatVal;
5272 if (FloatVT.bitsGT(VT)) {
5273 if (Op->isVPOpcode())
5274 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5275 else
5276 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5277 } else {
5278 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5279 if (VT.isFixedLengthVector()) {
5280 ContainerVT = getContainerForFixedLengthVector(VT);
5281 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5282 }
5283 if (!Op->isVPOpcode())
5284 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5285 SDValue RTZRM =
5287 MVT ContainerFloatVT =
5288 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5289 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5290 Src, Mask, RTZRM, VL);
5291 if (VT.isFixedLengthVector())
5292 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5293 }
5294 // Bitcast to integer and shift the exponent to the LSB.
5295 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5296 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5297 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5298
5299 SDValue Exp;
5300 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5301 if (Op->isVPOpcode()) {
5302 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5303 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5304 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5305 } else {
5306 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5307 DAG.getConstant(ShiftAmt, DL, IntVT));
5308 if (IntVT.bitsLT(VT))
5309 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5310 else if (IntVT.bitsGT(VT))
5311 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5312 }
5313
5314 // The exponent contains log2 of the value in biased form.
5315 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5316 // For trailing zeros, we just need to subtract the bias.
5317 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5318 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5319 DAG.getConstant(ExponentBias, DL, VT));
5320 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5321 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5322 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5323
5324 // For leading zeros, we need to remove the bias and convert from log2 to
5325 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5326 unsigned Adjust = ExponentBias + (EltSize - 1);
5327 SDValue Res;
5328 if (Op->isVPOpcode())
5329 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5330 Mask, VL);
5331 else
5332 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5333
5334 // The above result with zero input equals to Adjust which is greater than
5335 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5336 if (Op.getOpcode() == ISD::CTLZ)
5337 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5338 else if (Op.getOpcode() == ISD::VP_CTLZ)
5339 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5340 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5341 return Res;
5342}
5343
5344// While RVV has alignment restrictions, we should always be able to load as a
5345// legal equivalently-sized byte-typed vector instead. This method is
5346// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5347// the load is already correctly-aligned, it returns SDValue().
5348SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5349 SelectionDAG &DAG) const {
5350 auto *Load = cast<LoadSDNode>(Op);
5351 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5352
5354 Load->getMemoryVT(),
5355 *Load->getMemOperand()))
5356 return SDValue();
5357
5358 SDLoc DL(Op);
5359 MVT VT = Op.getSimpleValueType();
5360 unsigned EltSizeBits = VT.getScalarSizeInBits();
5361 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5362 "Unexpected unaligned RVV load type");
5363 MVT NewVT =
5364 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5365 assert(NewVT.isValid() &&
5366 "Expecting equally-sized RVV vector types to be legal");
5367 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5368 Load->getPointerInfo(), Load->getOriginalAlign(),
5369 Load->getMemOperand()->getFlags());
5370 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5371}
5372
5373// While RVV has alignment restrictions, we should always be able to store as a
5374// legal equivalently-sized byte-typed vector instead. This method is
5375// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5376// returns SDValue() if the store is already correctly aligned.
5377SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5378 SelectionDAG &DAG) const {
5379 auto *Store = cast<StoreSDNode>(Op);
5380 assert(Store && Store->getValue().getValueType().isVector() &&
5381 "Expected vector store");
5382
5384 Store->getMemoryVT(),
5385 *Store->getMemOperand()))
5386 return SDValue();
5387
5388 SDLoc DL(Op);
5389 SDValue StoredVal = Store->getValue();
5390 MVT VT = StoredVal.getSimpleValueType();
5391 unsigned EltSizeBits = VT.getScalarSizeInBits();
5392 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5393 "Unexpected unaligned RVV store type");
5394 MVT NewVT =
5395 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5396 assert(NewVT.isValid() &&
5397 "Expecting equally-sized RVV vector types to be legal");
5398 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5399 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5400 Store->getPointerInfo(), Store->getOriginalAlign(),
5401 Store->getMemOperand()->getFlags());
5402}
5403
5405 const RISCVSubtarget &Subtarget) {
5406 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5407
5408 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5409
5410 // All simm32 constants should be handled by isel.
5411 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5412 // this check redundant, but small immediates are common so this check
5413 // should have better compile time.
5414 if (isInt<32>(Imm))
5415 return Op;
5416
5417 // We only need to cost the immediate, if constant pool lowering is enabled.
5418 if (!Subtarget.useConstantPoolForLargeInts())
5419 return Op;
5420
5422 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5423 return Op;
5424
5425 // Optimizations below are disabled for opt size. If we're optimizing for
5426 // size, use a constant pool.
5427 if (DAG.shouldOptForSize())
5428 return SDValue();
5429
5430 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5431 // that if it will avoid a constant pool.
5432 // It will require an extra temporary register though.
5433 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5434 // low and high 32 bits are the same and bit 31 and 63 are set.
5435 unsigned ShiftAmt, AddOpc;
5436 RISCVMatInt::InstSeq SeqLo =
5437 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5438 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5439 return Op;
5440
5441 return SDValue();
5442}
5443
5445 const RISCVSubtarget &Subtarget) {
5446 SDLoc dl(Op);
5447 AtomicOrdering FenceOrdering =
5448 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5449 SyncScope::ID FenceSSID =
5450 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5451
5452 if (Subtarget.hasStdExtZtso()) {
5453 // The only fence that needs an instruction is a sequentially-consistent
5454 // cross-thread fence.
5455 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5456 FenceSSID == SyncScope::System)
5457 return Op;
5458
5459 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5460 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5461 }
5462
5463 // singlethread fences only synchronize with signal handlers on the same
5464 // thread and thus only need to preserve instruction order, not actually
5465 // enforce memory ordering.
5466 if (FenceSSID == SyncScope::SingleThread)
5467 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5468 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5469
5470 return Op;
5471}
5472
5474 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5475 "Unexpected custom legalisation");
5476
5477 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5478 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5479 SDLoc DL(Op);
5480 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5481 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5482 SDValue Result =
5483 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5484
5485 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5486 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5487 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5488 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5489 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5490 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5491 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5492}
5493
5495 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5496 "Unexpected custom legalisation");
5497
5498 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5499 // sign extend allows overflow of the lower 32 bits to be detected on
5500 // the promoted size.
5501 SDLoc DL(Op);
5502 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5503 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5504 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5505 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5506}
5507
5508// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5510 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5511 "Unexpected custom legalisation");
5512 if (isa<ConstantSDNode>(Op.getOperand(1)))
5513 return SDValue();
5514
5515 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5516 SDLoc DL(Op);
5517 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5518 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5519 SDValue WideOp =
5520 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5521 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5522 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5523 DAG.getValueType(MVT::i32));
5524 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5525 ISD::SETNE);
5526 return DAG.getMergeValues({Res, Ovf}, DL);
5527}
5528
5529// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5531 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5532 "Unexpected custom legalisation");
5533 SDLoc DL(Op);
5534 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5535 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5536 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5537 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5538 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5539 DAG.getValueType(MVT::i32));
5540 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5541 ISD::SETNE);
5542 return DAG.getMergeValues({Res, Ovf}, DL);
5543}
5544
5545SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5546 SelectionDAG &DAG) const {
5547 SDLoc DL(Op);
5548 MVT VT = Op.getSimpleValueType();
5549 MVT XLenVT = Subtarget.getXLenVT();
5550 unsigned Check = Op.getConstantOperandVal(1);
5551 unsigned TDCMask = 0;
5552 if (Check & fcSNan)
5553 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5554 if (Check & fcQNan)
5555 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5556 if (Check & fcPosInf)
5558 if (Check & fcNegInf)
5560 if (Check & fcPosNormal)
5562 if (Check & fcNegNormal)
5564 if (Check & fcPosSubnormal)
5566 if (Check & fcNegSubnormal)
5568 if (Check & fcPosZero)
5569 TDCMask |= RISCV::FPMASK_Positive_Zero;
5570 if (Check & fcNegZero)
5571 TDCMask |= RISCV::FPMASK_Negative_Zero;
5572
5573 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5574
5575 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5576
5577 if (VT.isVector()) {
5578 SDValue Op0 = Op.getOperand(0);
5579 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5580
5581 if (VT.isScalableVector()) {
5583 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5584 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5585 Mask = Op.getOperand(2);
5586 VL = Op.getOperand(3);
5587 }
5588 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5589 VL, Op->getFlags());
5590 if (IsOneBitMask)
5591 return DAG.getSetCC(DL, VT, FPCLASS,
5592 DAG.getConstant(TDCMask, DL, DstVT),
5594 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5595 DAG.getConstant(TDCMask, DL, DstVT));
5596 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5597 ISD::SETNE);
5598 }
5599
5600 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5601 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5602 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5603 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5604 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5605 Mask = Op.getOperand(2);
5606 MVT MaskContainerVT =
5607 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5608 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5609 VL = Op.getOperand(3);
5610 }
5611 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5612
5613 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5614 Mask, VL, Op->getFlags());
5615
5616 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5617 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5618 if (IsOneBitMask) {
5619 SDValue VMSEQ =
5620 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5621 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5622 DAG.getUNDEF(ContainerVT), Mask, VL});
5623 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5624 }
5625 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5626 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5627
5628 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5629 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5630 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5631
5632 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5633 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5634 DAG.getUNDEF(ContainerVT), Mask, VL});
5635 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5636 }
5637
5638 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5639 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5640 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5642 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5643}
5644
5645// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5646// operations propagate nans.
5648 const RISCVSubtarget &Subtarget) {
5649 SDLoc DL(Op);
5650 MVT VT = Op.getSimpleValueType();
5651
5652 SDValue X = Op.getOperand(0);
5653 SDValue Y = Op.getOperand(1);
5654
5655 if (!VT.isVector()) {
5656 MVT XLenVT = Subtarget.getXLenVT();
5657
5658 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5659 // ensures that when one input is a nan, the other will also be a nan
5660 // allowing the nan to propagate. If both inputs are nan, this will swap the
5661 // inputs which is harmless.
5662
5663 SDValue NewY = Y;
5664 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5665 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5666 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5667 }
5668
5669 SDValue NewX = X;
5670 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5671 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5672 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5673 }
5674
5675 unsigned Opc =
5676 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5677 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5678 }
5679
5680 // Check no NaNs before converting to fixed vector scalable.
5681 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5682 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5683
5684 MVT ContainerVT = VT;
5685 if (VT.isFixedLengthVector()) {
5686 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5687 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5688 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5689 }
5690
5691 SDValue Mask, VL;
5692 if (Op->isVPOpcode()) {
5693 Mask = Op.getOperand(2);
5694 if (VT.isFixedLengthVector())
5695 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5696 Subtarget);
5697 VL = Op.getOperand(3);
5698 } else {
5699 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5700 }
5701
5702 SDValue NewY = Y;
5703 if (!XIsNeverNan) {
5704 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5705 {X, X, DAG.getCondCode(ISD::SETOEQ),
5706 DAG.getUNDEF(ContainerVT), Mask, VL});
5707 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5708 DAG.getUNDEF(ContainerVT), VL);
5709 }
5710
5711 SDValue NewX = X;
5712 if (!YIsNeverNan) {
5713 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5714 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5715 DAG.getUNDEF(ContainerVT), Mask, VL});
5716 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5717 DAG.getUNDEF(ContainerVT), VL);
5718 }
5719
5720 unsigned Opc =
5721 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5724 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5725 DAG.getUNDEF(ContainerVT), Mask, VL);
5726 if (VT.isFixedLengthVector())
5727 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5728 return Res;
5729}
5730
5731/// Get a RISC-V target specified VL op for a given SDNode.
5732static unsigned getRISCVVLOp(SDValue Op) {
5733#define OP_CASE(NODE) \
5734 case ISD::NODE: \
5735 return RISCVISD::NODE##_VL;
5736#define VP_CASE(NODE) \
5737 case ISD::VP_##NODE: \
5738 return RISCVISD::NODE##_VL;
5739 // clang-format off
5740 switch (Op.getOpcode()) {
5741 default:
5742 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5743 OP_CASE(ADD)
5744 OP_CASE(SUB)
5745 OP_CASE(MUL)
5746 OP_CASE(MULHS)
5747 OP_CASE(MULHU)
5748 OP_CASE(SDIV)
5749 OP_CASE(SREM)
5750 OP_CASE(UDIV)
5751 OP_CASE(UREM)
5752 OP_CASE(SHL)
5753 OP_CASE(SRA)
5754 OP_CASE(SRL)
5755 OP_CASE(ROTL)
5756 OP_CASE(ROTR)
5757 OP_CASE(BSWAP)
5758 OP_CASE(CTTZ)
5759 OP_CASE(CTLZ)
5760 OP_CASE(CTPOP)
5761 OP_CASE(BITREVERSE)
5762 OP_CASE(SADDSAT)
5763 OP_CASE(UADDSAT)
5764 OP_CASE(SSUBSAT)
5765 OP_CASE(USUBSAT)
5766 OP_CASE(AVGFLOORU)
5767 OP_CASE(AVGCEILU)
5768 OP_CASE(FADD)
5769 OP_CASE(FSUB)
5770 OP_CASE(FMUL)
5771 OP_CASE(FDIV)
5772 OP_CASE(FNEG)
5773 OP_CASE(FABS)
5774 OP_CASE(FSQRT)
5775 OP_CASE(SMIN)
5776 OP_CASE(SMAX)
5777 OP_CASE(UMIN)
5778 OP_CASE(UMAX)
5779 OP_CASE(STRICT_FADD)
5780 OP_CASE(STRICT_FSUB)
5781 OP_CASE(STRICT_FMUL)
5782 OP_CASE(STRICT_FDIV)
5783 OP_CASE(STRICT_FSQRT)
5784 VP_CASE(ADD) // VP_ADD
5785 VP_CASE(SUB) // VP_SUB
5786 VP_CASE(MUL) // VP_MUL
5787 VP_CASE(SDIV) // VP_SDIV
5788 VP_CASE(SREM) // VP_SREM
5789 VP_CASE(UDIV) // VP_UDIV
5790 VP_CASE(UREM) // VP_UREM
5791 VP_CASE(SHL) // VP_SHL
5792 VP_CASE(FADD) // VP_FADD
5793 VP_CASE(FSUB) // VP_FSUB
5794 VP_CASE(FMUL) // VP_FMUL
5795 VP_CASE(FDIV) // VP_FDIV
5796 VP_CASE(FNEG) // VP_FNEG
5797 VP_CASE(FABS) // VP_FABS
5798 VP_CASE(SMIN) // VP_SMIN
5799 VP_CASE(SMAX) // VP_SMAX
5800 VP_CASE(UMIN) // VP_UMIN
5801 VP_CASE(UMAX) // VP_UMAX
5802 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5803 VP_CASE(SETCC) // VP_SETCC
5804 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5805 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5806 VP_CASE(BITREVERSE) // VP_BITREVERSE
5807 VP_CASE(SADDSAT) // VP_SADDSAT
5808 VP_CASE(UADDSAT) // VP_UADDSAT
5809 VP_CASE(SSUBSAT) // VP_SSUBSAT
5810 VP_CASE(USUBSAT) // VP_USUBSAT
5811 VP_CASE(BSWAP) // VP_BSWAP
5812 VP_CASE(CTLZ) // VP_CTLZ
5813 VP_CASE(CTTZ) // VP_CTTZ
5814 VP_CASE(CTPOP) // VP_CTPOP
5816 case ISD::VP_CTLZ_ZERO_UNDEF:
5817 return RISCVISD::CTLZ_VL;
5819 case ISD::VP_CTTZ_ZERO_UNDEF:
5820 return RISCVISD::CTTZ_VL;
5821 case ISD::FMA:
5822 case ISD::VP_FMA:
5823 return RISCVISD::VFMADD_VL;
5824 case ISD::STRICT_FMA:
5826 case ISD::AND:
5827 case ISD::VP_AND:
5828 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5829 return RISCVISD::VMAND_VL;
5830 return RISCVISD::AND_VL;
5831 case ISD::OR:
5832 case ISD::VP_OR:
5833 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5834 return RISCVISD::VMOR_VL;
5835 return RISCVISD::OR_VL;
5836 case ISD::XOR:
5837 case ISD::VP_XOR:
5838 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5839 return RISCVISD::VMXOR_VL;
5840 return RISCVISD::XOR_VL;
5841 case ISD::VP_SELECT:
5842 case ISD::VP_MERGE:
5843 return RISCVISD::VMERGE_VL;
5844 case ISD::VP_ASHR:
5845 return RISCVISD::SRA_VL;
5846 case ISD::VP_LSHR:
5847 return RISCVISD::SRL_VL;
5848 case ISD::VP_SQRT:
5849 return RISCVISD::FSQRT_VL;
5850 case ISD::VP_SIGN_EXTEND:
5851 return RISCVISD::VSEXT_VL;
5852 case ISD::VP_ZERO_EXTEND:
5853 return RISCVISD::VZEXT_VL;
5854 case ISD::VP_FP_TO_SINT:
5856 case ISD::VP_FP_TO_UINT:
5858 case ISD::FMINNUM:
5859 case ISD::VP_FMINNUM:
5860 return RISCVISD::VFMIN_VL;
5861 case ISD::FMAXNUM:
5862 case ISD::VP_FMAXNUM:
5863 return RISCVISD::VFMAX_VL;
5864 case ISD::LRINT:
5865 case ISD::VP_LRINT:
5866 case ISD::LLRINT:
5867 case ISD::VP_LLRINT:
5869 }
5870 // clang-format on
5871#undef OP_CASE
5872#undef VP_CASE
5873}
5874
5875/// Return true if a RISC-V target specified op has a merge operand.
5876static bool hasMergeOp(unsigned Opcode) {
5877 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5879 "not a RISC-V target specific op");
5881 126 &&
5884 21 &&
5885 "adding target specific op should update this function");
5886 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5887 return true;
5888 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5889 return true;
5890 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5891 return true;
5892 if (Opcode == RISCVISD::SETCC_VL)
5893 return true;
5894 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5895 return true;
5896 if (Opcode == RISCVISD::VMERGE_VL)
5897 return true;
5898 return false;
5899}
5900
5901/// Return true if a RISC-V target specified op has a mask operand.
5902static bool hasMaskOp(unsigned Opcode) {
5903 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5905 "not a RISC-V target specific op");
5907 126 &&
5910 21 &&
5911 "adding target specific op should update this function");
5912 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5913 return true;
5914 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5915 return true;
5916 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5918 return true;
5919 return false;
5920}
5921
5923 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5924 SDLoc DL(Op);
5925
5928
5929 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5930 if (!Op.getOperand(j).getValueType().isVector()) {
5931 LoOperands[j] = Op.getOperand(j);
5932 HiOperands[j] = Op.getOperand(j);
5933 continue;
5934 }
5935 std::tie(LoOperands[j], HiOperands[j]) =
5936 DAG.SplitVector(Op.getOperand(j), DL);
5937 }
5938
5939 SDValue LoRes =
5940 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5941 SDValue HiRes =
5942 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5943
5944 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5945}
5946
5948 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5949 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5950 SDLoc DL(Op);
5951
5954
5955 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5956 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5957 std::tie(LoOperands[j], HiOperands[j]) =
5958 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5959 continue;
5960 }
5961 if (!Op.getOperand(j).getValueType().isVector()) {
5962 LoOperands[j] = Op.getOperand(j);
5963 HiOperands[j] = Op.getOperand(j);
5964 continue;
5965 }
5966 std::tie(LoOperands[j], HiOperands[j]) =
5967 DAG.SplitVector(Op.getOperand(j), DL);
5968 }
5969
5970 SDValue LoRes =
5971 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5972 SDValue HiRes =
5973 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5974
5975 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5976}
5977
5979 SDLoc DL(Op);
5980
5981 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5982 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5983 auto [EVLLo, EVLHi] =
5984 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5985
5986 SDValue ResLo =
5987 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5988 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5989 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5990 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5991}
5992
5994
5995 assert(Op->isStrictFPOpcode());
5996
5997 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5998
5999 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6000 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6001
6002 SDLoc DL(Op);
6003
6006
6007 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6008 if (!Op.getOperand(j).getValueType().isVector()) {
6009 LoOperands[j] = Op.getOperand(j);
6010 HiOperands[j] = Op.getOperand(j);
6011 continue;
6012 }
6013 std::tie(LoOperands[j], HiOperands[j]) =
6014 DAG.SplitVector(Op.getOperand(j), DL);
6015 }
6016
6017 SDValue LoRes =
6018 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6019 HiOperands[0] = LoRes.getValue(1);
6020 SDValue HiRes =
6021 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6022
6023 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6024 LoRes.getValue(0), HiRes.getValue(0));
6025 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6026}
6027
6029 SelectionDAG &DAG) const {
6030 switch (Op.getOpcode()) {
6031 default:
6032 report_fatal_error("unimplemented operand");
6033 case ISD::ATOMIC_FENCE:
6034 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6035 case ISD::GlobalAddress:
6036 return lowerGlobalAddress(Op, DAG);
6037 case ISD::BlockAddress:
6038 return lowerBlockAddress(Op, DAG);
6039 case ISD::ConstantPool:
6040 return lowerConstantPool(Op, DAG);
6041 case ISD::JumpTable:
6042 return lowerJumpTable(Op, DAG);
6044 return lowerGlobalTLSAddress(Op, DAG);
6045 case ISD::Constant:
6046 return lowerConstant(Op, DAG, Subtarget);
6047 case ISD::SELECT:
6048 return lowerSELECT(Op, DAG);
6049 case ISD::BRCOND:
6050 return lowerBRCOND(Op, DAG);
6051 case ISD::VASTART:
6052 return lowerVASTART(Op, DAG);
6053 case ISD::FRAMEADDR:
6054 return lowerFRAMEADDR(Op, DAG);
6055 case ISD::RETURNADDR:
6056 return lowerRETURNADDR(Op, DAG);
6057 case ISD::SADDO:
6058 case ISD::SSUBO:
6059 return lowerSADDO_SSUBO(Op, DAG);
6060 case ISD::SMULO:
6061 return lowerSMULO(Op, DAG);
6062 case ISD::SHL_PARTS:
6063 return lowerShiftLeftParts(Op, DAG);
6064 case ISD::SRA_PARTS:
6065 return lowerShiftRightParts(Op, DAG, true);
6066 case ISD::SRL_PARTS:
6067 return lowerShiftRightParts(Op, DAG, false);
6068 case ISD::ROTL:
6069 case ISD::ROTR:
6070 if (Op.getValueType().isFixedLengthVector()) {
6071 assert(Subtarget.hasStdExtZvkb());
6072 return lowerToScalableOp(Op, DAG);
6073 }
6074 assert(Subtarget.hasVendorXTHeadBb() &&
6075 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6076 "Unexpected custom legalization");
6077 // XTHeadBb only supports rotate by constant.
6078 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6079 return SDValue();
6080 return Op;
6081 case ISD::BITCAST: {
6082 SDLoc DL(Op);
6083 EVT VT = Op.getValueType();
6084 SDValue Op0 = Op.getOperand(0);
6085 EVT Op0VT = Op0.getValueType();
6086 MVT XLenVT = Subtarget.getXLenVT();
6087 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6088 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6089 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6090 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6091 return FPConv;
6092 }
6093 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6094 Subtarget.hasStdExtZfbfmin()) {
6095 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6096 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6097 return FPConv;
6098 }
6099 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6100 Subtarget.hasStdExtFOrZfinx()) {
6101 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6102 SDValue FPConv =
6103 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6104 return FPConv;
6105 }
6106 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6107 SDValue Lo, Hi;
6108 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6109 SDValue RetReg =
6110 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6111 return RetReg;
6112 }
6113
6114 // Consider other scalar<->scalar casts as legal if the types are legal.
6115 // Otherwise expand them.
6116 if (!VT.isVector() && !Op0VT.isVector()) {
6117 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6118 return Op;
6119 return SDValue();
6120 }
6121
6122 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6123 "Unexpected types");
6124
6125 if (VT.isFixedLengthVector()) {
6126 // We can handle fixed length vector bitcasts with a simple replacement
6127 // in isel.
6128 if (Op0VT.isFixedLengthVector())
6129 return Op;
6130 // When bitcasting from scalar to fixed-length vector, insert the scalar
6131 // into a one-element vector of the result type, and perform a vector
6132 // bitcast.
6133 if (!Op0VT.isVector()) {
6134 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6135 if (!isTypeLegal(BVT))
6136 return SDValue();
6137 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6138 DAG.getUNDEF(BVT), Op0,
6139 DAG.getVectorIdxConstant(0, DL)));
6140 }
6141 return SDValue();
6142 }
6143 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6144 // thus: bitcast the vector to a one-element vector type whose element type
6145 // is the same as the result type, and extract the first element.
6146 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6147 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6148 if (!isTypeLegal(BVT))
6149 return SDValue();
6150 SDValue BVec = DAG.getBitcast(BVT, Op0);
6151 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6152 DAG.getVectorIdxConstant(0, DL));
6153 }
6154 return SDValue();
6155 }
6157 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6159 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6161 return LowerINTRINSIC_VOID(Op, DAG);
6162 case ISD::IS_FPCLASS:
6163 return LowerIS_FPCLASS(Op, DAG);
6164 case ISD::BITREVERSE: {
6165 MVT VT = Op.getSimpleValueType();
6166 if (VT.isFixedLengthVector()) {
6167 assert(Subtarget.hasStdExtZvbb());
6168 return lowerToScalableOp(Op, DAG);
6169 }
6170 SDLoc DL(Op);
6171 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6172 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6173 // Expand bitreverse to a bswap(rev8) followed by brev8.
6174 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6175 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6176 }
6177 case ISD::TRUNCATE:
6178 // Only custom-lower vector truncates
6179 if (!Op.getSimpleValueType().isVector())
6180 return Op;
6181 return lowerVectorTruncLike(Op, DAG);
6182 case ISD::ANY_EXTEND:
6183 case ISD::ZERO_EXTEND:
6184 if (Op.getOperand(0).getValueType().isVector() &&
6185 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6186 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6187 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6188 case ISD::SIGN_EXTEND:
6189 if (Op.getOperand(0).getValueType().isVector() &&
6190 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6191 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6192 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6194 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6196 return lowerINSERT_VECTOR_ELT(Op, DAG);
6198 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6199 case ISD::SCALAR_TO_VECTOR: {
6200 MVT VT = Op.getSimpleValueType();
6201 SDLoc DL(Op);
6202 SDValue Scalar = Op.getOperand(0);
6203 if (VT.getVectorElementType() == MVT::i1) {
6204 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6205 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6206 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6207 }
6208 MVT ContainerVT = VT;
6209 if (VT.isFixedLengthVector())
6210 ContainerVT = getContainerForFixedLengthVector(VT);
6211 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6212 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6213 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6214 DAG.getUNDEF(ContainerVT), Scalar, VL);
6215 if (VT.isFixedLengthVector())
6216 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6217 return V;
6218 }
6219 case ISD::VSCALE: {
6220 MVT XLenVT = Subtarget.getXLenVT();
6221 MVT VT = Op.getSimpleValueType();
6222 SDLoc DL(Op);
6223 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6224 // We define our scalable vector types for lmul=1 to use a 64 bit known
6225 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6226 // vscale as VLENB / 8.
6227 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6228 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6229 report_fatal_error("Support for VLEN==32 is incomplete.");
6230 // We assume VLENB is a multiple of 8. We manually choose the best shift
6231 // here because SimplifyDemandedBits isn't always able to simplify it.
6232 uint64_t Val = Op.getConstantOperandVal(0);
6233 if (isPowerOf2_64(Val)) {
6234 uint64_t Log2 = Log2_64(Val);
6235 if (Log2 < 3)
6236 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6237 DAG.getConstant(3 - Log2, DL, VT));
6238 else if (Log2 > 3)
6239 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6240 DAG.getConstant(Log2 - 3, DL, XLenVT));
6241 } else if ((Val % 8) == 0) {
6242 // If the multiplier is a multiple of 8, scale it down to avoid needing
6243 // to shift the VLENB value.
6244 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6245 DAG.getConstant(Val / 8, DL, XLenVT));
6246 } else {
6247 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6248 DAG.getConstant(3, DL, XLenVT));
6249 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6250 DAG.getConstant(Val, DL, XLenVT));
6251 }
6252 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6253 }
6254 case ISD::FPOWI: {
6255 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6256 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6257 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6258 Op.getOperand(1).getValueType() == MVT::i32) {
6259 SDLoc DL(Op);
6260 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6261 SDValue Powi =
6262 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6263 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6264 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6265 }
6266 return SDValue();
6267 }
6268 case ISD::FMAXIMUM:
6269 case ISD::FMINIMUM:
6270 if (Op.getValueType() == MVT::nxv32f16 &&
6271 (Subtarget.hasVInstructionsF16Minimal() &&
6272 !Subtarget.hasVInstructionsF16()))
6273 return SplitVectorOp(Op, DAG);
6274 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6275 case ISD::FP_EXTEND: {
6276 SDLoc DL(Op);
6277 EVT VT = Op.getValueType();
6278 SDValue Op0 = Op.getOperand(0);
6279 EVT Op0VT = Op0.getValueType();
6280 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6281 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6282 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6283 SDValue FloatVal =
6284 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6285 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6286 }
6287
6288 if (!Op.getValueType().isVector())
6289 return Op;
6290 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6291 }
6292 case ISD::FP_ROUND: {
6293 SDLoc DL(Op);
6294 EVT VT = Op.getValueType();
6295 SDValue Op0 = Op.getOperand(0);
6296 EVT Op0VT = Op0.getValueType();
6297 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6298 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6299 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6300 Subtarget.hasStdExtDOrZdinx()) {
6301 SDValue FloatVal =
6302 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6303 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6304 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6305 }
6306
6307 if (!Op.getValueType().isVector())
6308 return Op;
6309 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6310 }
6313 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6314 case ISD::SINT_TO_FP:
6315 case ISD::UINT_TO_FP:
6316 if (Op.getValueType().isVector() &&
6317 Op.getValueType().getScalarType() == MVT::f16 &&
6318 (Subtarget.hasVInstructionsF16Minimal() &&
6319 !Subtarget.hasVInstructionsF16())) {
6320 if (Op.getValueType() == MVT::nxv32f16)
6321 return SplitVectorOp(Op, DAG);
6322 // int -> f32
6323 SDLoc DL(Op);
6324 MVT NVT =
6325 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6326 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6327 // f32 -> f16
6328 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6329 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6330 }
6331 [[fallthrough]];
6332 case ISD::FP_TO_SINT:
6333 case ISD::FP_TO_UINT:
6334 if (SDValue Op1 = Op.getOperand(0);
6335 Op1.getValueType().isVector() &&
6336 Op1.getValueType().getScalarType() == MVT::f16 &&
6337 (Subtarget.hasVInstructionsF16Minimal() &&
6338 !Subtarget.hasVInstructionsF16())) {
6339 if (Op1.getValueType() == MVT::nxv32f16)
6340 return SplitVectorOp(Op, DAG);
6341 // f16 -> f32
6342 SDLoc DL(Op);
6343 MVT NVT = MVT::getVectorVT(MVT::f32,
6344 Op1.getValueType().getVectorElementCount());
6345 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6346 // f32 -> int
6347 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6348 }
6349 [[fallthrough]];
6354 // RVV can only do fp<->int conversions to types half/double the size as
6355 // the source. We custom-lower any conversions that do two hops into
6356 // sequences.
6357 MVT VT = Op.getSimpleValueType();
6358 if (!VT.isVector())
6359 return Op;
6360 SDLoc DL(Op);
6361 bool IsStrict = Op->isStrictFPOpcode();
6362 SDValue Src = Op.getOperand(0 + IsStrict);
6363 MVT EltVT = VT.getVectorElementType();
6364 MVT SrcVT = Src.getSimpleValueType();
6365 MVT SrcEltVT = SrcVT.getVectorElementType();
6366 unsigned EltSize = EltVT.getSizeInBits();
6367 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6368 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6369 "Unexpected vector element types");
6370
6371 bool IsInt2FP = SrcEltVT.isInteger();
6372 // Widening conversions
6373 if (EltSize > (2 * SrcEltSize)) {
6374 if (IsInt2FP) {
6375 // Do a regular integer sign/zero extension then convert to float.
6376 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6378 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6379 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6382 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6383 if (IsStrict)
6384 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6385 Op.getOperand(0), Ext);
6386 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6387 }
6388 // FP2Int
6389 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6390 // Do one doubling fp_extend then complete the operation by converting
6391 // to int.
6392 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6393 if (IsStrict) {
6394 auto [FExt, Chain] =
6395 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6396 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6397 }
6398 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6399 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6400 }
6401
6402 // Narrowing conversions
6403 if (SrcEltSize > (2 * EltSize)) {
6404 if (IsInt2FP) {
6405 // One narrowing int_to_fp, then an fp_round.
6406 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6407 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6408 if (IsStrict) {
6409 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6410 DAG.getVTList(InterimFVT, MVT::Other),
6411 Op.getOperand(0), Src);
6412 SDValue Chain = Int2FP.getValue(1);
6413 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6414 }
6415 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6416 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6417 }
6418 // FP2Int
6419 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6420 // representable by the integer, the result is poison.
6421 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6423 if (IsStrict) {
6424 SDValue FP2Int =
6425 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6426 Op.getOperand(0), Src);
6427 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6428 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6429 }
6430 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6431 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6432 }
6433
6434 // Scalable vectors can exit here. Patterns will handle equally-sized
6435 // conversions halving/doubling ones.
6436 if (!VT.isFixedLengthVector())
6437 return Op;
6438
6439 // For fixed-length vectors we lower to a custom "VL" node.
6440 unsigned RVVOpc = 0;
6441 switch (Op.getOpcode()) {
6442 default:
6443 llvm_unreachable("Impossible opcode");
6444 case ISD::FP_TO_SINT:
6446 break;
6447 case ISD::FP_TO_UINT:
6449 break;
6450 case ISD::SINT_TO_FP:
6451 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6452 break;
6453 case ISD::UINT_TO_FP:
6454 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6455 break;
6458 break;
6461 break;
6464 break;
6467 break;
6468 }
6469
6470 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6471 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6472 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6473 "Expected same element count");
6474
6475 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6476
6477 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6478 if (IsStrict) {
6479 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6480 Op.getOperand(0), Src, Mask, VL);
6481 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6482 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6483 }
6484 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6485 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6486 }
6489 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6490 case ISD::FP_TO_BF16: {
6491 // Custom lower to ensure the libcall return is passed in an FPR on hard
6492 // float ABIs.
6493 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6494 SDLoc DL(Op);
6495 MakeLibCallOptions CallOptions;
6496 RTLIB::Libcall LC =
6497 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6498 SDValue Res =
6499 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6500 if (Subtarget.is64Bit() && !RV64LegalI32)
6501 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6502 return DAG.getBitcast(MVT::i32, Res);
6503 }
6504 case ISD::BF16_TO_FP: {
6505 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6506 MVT VT = Op.getSimpleValueType();
6507 SDLoc DL(Op);
6508 Op = DAG.getNode(
6509 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6510 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6511 SDValue Res = Subtarget.is64Bit()
6512 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6513 : DAG.getBitcast(MVT::f32, Op);
6514 // fp_extend if the target VT is bigger than f32.
6515 if (VT != MVT::f32)
6516 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6517 return Res;
6518 }
6519 case ISD::FP_TO_FP16: {
6520 // Custom lower to ensure the libcall return is passed in an FPR on hard
6521 // float ABIs.
6522 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6523 SDLoc DL(Op);
6524 MakeLibCallOptions CallOptions;
6525 RTLIB::Libcall LC =
6526 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6527 SDValue Res =
6528 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6529 if (Subtarget.is64Bit() && !RV64LegalI32)
6530 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6531 return DAG.getBitcast(MVT::i32, Res);
6532 }
6533 case ISD::FP16_TO_FP: {
6534 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6535 // float ABIs.
6536 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6537 SDLoc DL(Op);
6538 MakeLibCallOptions CallOptions;
6539 SDValue Arg = Subtarget.is64Bit()
6540 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6541 Op.getOperand(0))
6542 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6543 SDValue Res =
6544 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6545 .first;
6546 return Res;
6547 }
6548 case ISD::FTRUNC:
6549 case ISD::FCEIL:
6550 case ISD::FFLOOR:
6551 case ISD::FNEARBYINT:
6552 case ISD::FRINT:
6553 case ISD::FROUND:
6554 case ISD::FROUNDEVEN:
6555 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6556 case ISD::LRINT:
6557 case ISD::LLRINT:
6558 return lowerVectorXRINT(Op, DAG, Subtarget);
6559 case ISD::VECREDUCE_ADD:
6564 return lowerVECREDUCE(Op, DAG);
6565 case ISD::VECREDUCE_AND:
6566 case ISD::VECREDUCE_OR:
6567 case ISD::VECREDUCE_XOR:
6568 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6569 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6570 return lowerVECREDUCE(Op, DAG);
6577 return lowerFPVECREDUCE(Op, DAG);
6578 case ISD::VP_REDUCE_ADD:
6579 case ISD::VP_REDUCE_UMAX:
6580 case ISD::VP_REDUCE_SMAX:
6581 case ISD::VP_REDUCE_UMIN:
6582 case ISD::VP_REDUCE_SMIN:
6583 case ISD::VP_REDUCE_FADD:
6584 case ISD::VP_REDUCE_SEQ_FADD:
6585 case ISD::VP_REDUCE_FMIN:
6586 case ISD::VP_REDUCE_FMAX:
6587 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6588 (Subtarget.hasVInstructionsF16Minimal() &&
6589 !Subtarget.hasVInstructionsF16()))
6590 return SplitVectorReductionOp(Op, DAG);
6591 return lowerVPREDUCE(Op, DAG);
6592 case ISD::VP_REDUCE_AND:
6593 case ISD::VP_REDUCE_OR:
6594 case ISD::VP_REDUCE_XOR:
6595 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6596 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6597 return lowerVPREDUCE(Op, DAG);
6598 case ISD::UNDEF: {
6599 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6600 return convertFromScalableVector(Op.getSimpleValueType(),
6601 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6602 }
6604 return lowerINSERT_SUBVECTOR(Op, DAG);
6606 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6608 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6610 return lowerVECTOR_INTERLEAVE(Op, DAG);
6611 case ISD::STEP_VECTOR:
6612 return lowerSTEP_VECTOR(Op, DAG);
6614 return lowerVECTOR_REVERSE(Op, DAG);
6615 case ISD::VECTOR_SPLICE:
6616 return lowerVECTOR_SPLICE(Op, DAG);
6617 case ISD::BUILD_VECTOR:
6618 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6619 case ISD::SPLAT_VECTOR:
6620 if (Op.getValueType().getScalarType() == MVT::f16 &&
6621 (Subtarget.hasVInstructionsF16Minimal() &&
6622 !Subtarget.hasVInstructionsF16())) {
6623 if (Op.getValueType() == MVT::nxv32f16)
6624 return SplitVectorOp(Op, DAG);
6625 SDLoc DL(Op);
6626 SDValue NewScalar =
6627 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6628 SDValue NewSplat = DAG.getNode(
6630 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6631 NewScalar);
6632 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6633 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6634 }
6635 if (Op.getValueType().getVectorElementType() == MVT::i1)
6636 return lowerVectorMaskSplat(Op, DAG);
6637 return SDValue();
6639 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6640 case ISD::CONCAT_VECTORS: {
6641 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6642 // better than going through the stack, as the default expansion does.
6643 SDLoc DL(Op);
6644 MVT VT = Op.getSimpleValueType();
6645 MVT ContainerVT = VT;
6646 if (VT.isFixedLengthVector())
6647 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6648
6649 // Recursively split concat_vectors with more than 2 operands:
6650 //
6651 // concat_vector op1, op2, op3, op4
6652 // ->
6653 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6654 //
6655 // This reduces the length of the chain of vslideups and allows us to
6656 // perform the vslideups at a smaller LMUL, limited to MF2.
6657 if (Op.getNumOperands() > 2 &&
6658 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6659 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6661 size_t HalfNumOps = Op.getNumOperands() / 2;
6662 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6663 Op->ops().take_front(HalfNumOps));
6664 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6665 Op->ops().drop_front(HalfNumOps));
6666 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6667 }
6668
6669 unsigned NumOpElts =
6670 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6671 SDValue Vec = DAG.getUNDEF(VT);
6672 for (const auto &OpIdx : enumerate(Op->ops())) {
6673 SDValue SubVec = OpIdx.value();
6674 // Don't insert undef subvectors.
6675 if (SubVec.isUndef())
6676 continue;
6677 Vec =
6678 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6679 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6680 }
6681 return Vec;
6682 }
6683 case ISD::LOAD:
6684 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6685 return V;
6686 if (Op.getValueType().isFixedLengthVector())
6687 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6688 return Op;
6689 case ISD::STORE:
6690 if (auto V = expandUnalignedRVVStore(Op, DAG))
6691 return V;
6692 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6693 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6694 return Op;
6695 case ISD::MLOAD:
6696 case ISD::VP_LOAD:
6697 return lowerMaskedLoad(Op, DAG);
6698 case ISD::MSTORE:
6699 case ISD::VP_STORE:
6700 return lowerMaskedStore(Op, DAG);
6701 case ISD::SELECT_CC: {
6702 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6703 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6704 // into separate SETCC+SELECT just like LegalizeDAG.
6705 SDValue Tmp1 = Op.getOperand(0);
6706 SDValue Tmp2 = Op.getOperand(1);
6707 SDValue True = Op.getOperand(2);
6708 SDValue False = Op.getOperand(3);
6709 EVT VT = Op.getValueType();
6710 SDValue CC = Op.getOperand(4);
6711 EVT CmpVT = Tmp1.getValueType();
6712 EVT CCVT =
6713 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6714 SDLoc DL(Op);
6715 SDValue Cond =
6716 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6717 return DAG.getSelect(DL, VT, Cond, True, False);
6718 }
6719 case ISD::SETCC: {
6720 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6721 if (OpVT.isScalarInteger()) {
6722 MVT VT = Op.getSimpleValueType();
6723 SDValue LHS = Op.getOperand(0);
6724 SDValue RHS = Op.getOperand(1);
6725 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6726 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6727 "Unexpected CondCode");
6728
6729 SDLoc DL(Op);
6730
6731 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6732 // convert this to the equivalent of (set(u)ge X, C+1) by using
6733 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6734 // in a register.
6735 if (isa<ConstantSDNode>(RHS)) {
6736 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6737 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6738 // If this is an unsigned compare and the constant is -1, incrementing
6739 // the constant would change behavior. The result should be false.
6740 if (CCVal == ISD::SETUGT && Imm == -1)
6741 return DAG.getConstant(0, DL, VT);
6742 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6743 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6744 SDValue SetCC = DAG.getSetCC(
6745 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6746 return DAG.getLogicalNOT(DL, SetCC, VT);
6747 }
6748 }
6749
6750 // Not a constant we could handle, swap the operands and condition code to
6751 // SETLT/SETULT.
6752 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6753 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6754 }
6755
6756 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6757 (Subtarget.hasVInstructionsF16Minimal() &&
6758 !Subtarget.hasVInstructionsF16()))
6759 return SplitVectorOp(Op, DAG);
6760
6761 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6762 }
6763 case ISD::ADD:
6764 case ISD::SUB:
6765 case ISD::MUL:
6766 case ISD::MULHS:
6767 case ISD::MULHU:
6768 case ISD::AND:
6769 case ISD::OR:
6770 case ISD::XOR:
6771 case ISD::SDIV:
6772 case ISD::SREM:
6773 case ISD::UDIV:
6774 case ISD::UREM:
6775 case ISD::BSWAP:
6776 case ISD::CTPOP:
6777 return lowerToScalableOp(Op, DAG);
6778 case ISD::SHL:
6779 case ISD::SRA:
6780 case ISD::SRL:
6781 if (Op.getSimpleValueType().isFixedLengthVector())
6782 return lowerToScalableOp(Op, DAG);
6783 // This can be called for an i32 shift amount that needs to be promoted.
6784 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6785 "Unexpected custom legalisation");
6786 return SDValue();
6787 case ISD::FADD:
6788 case ISD::FSUB:
6789 case ISD::FMUL:
6790 case ISD::FDIV:
6791 case ISD::FNEG:
6792 case ISD::FABS:
6793 case ISD::FSQRT:
6794 case ISD::FMA:
6795 case ISD::FMINNUM:
6796 case ISD::FMAXNUM:
6797 if (Op.getValueType() == MVT::nxv32f16 &&
6798 (Subtarget.hasVInstructionsF16Minimal() &&
6799 !Subtarget.hasVInstructionsF16()))
6800 return SplitVectorOp(Op, DAG);
6801 [[fallthrough]];
6802 case ISD::AVGFLOORU:
6803 case ISD::AVGCEILU:
6804 case ISD::SMIN:
6805 case ISD::SMAX:
6806 case ISD::UMIN:
6807 case ISD::UMAX:
6808 return lowerToScalableOp(Op, DAG);
6809 case ISD::UADDSAT:
6810 case ISD::USUBSAT:
6811 if (!Op.getValueType().isVector())
6812 return lowerUADDSAT_USUBSAT(Op, DAG);
6813 return lowerToScalableOp(Op, DAG);
6814 case ISD::SADDSAT:
6815 case ISD::SSUBSAT:
6816 if (!Op.getValueType().isVector())
6817 return lowerSADDSAT_SSUBSAT(Op, DAG);
6818 return lowerToScalableOp(Op, DAG);
6819 case ISD::ABDS:
6820 case ISD::ABDU: {
6821 SDLoc dl(Op);
6822 EVT VT = Op->getValueType(0);
6823 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6824 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6825 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6826
6827 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6828 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6829 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6830 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6831 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6832 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6833 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6834 }
6835 case ISD::ABS:
6836 case ISD::VP_ABS:
6837 return lowerABS(Op, DAG);
6838 case ISD::CTLZ:
6840 case ISD::CTTZ:
6842 if (Subtarget.hasStdExtZvbb())
6843 return lowerToScalableOp(Op, DAG);
6844 assert(Op.getOpcode() != ISD::CTTZ);
6845 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6846 case ISD::VSELECT:
6847 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6848 case ISD::FCOPYSIGN:
6849 if (Op.getValueType() == MVT::nxv32f16 &&
6850 (Subtarget.hasVInstructionsF16Minimal() &&
6851 !Subtarget.hasVInstructionsF16()))
6852 return SplitVectorOp(Op, DAG);
6853 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6854 case ISD::STRICT_FADD:
6855 case ISD::STRICT_FSUB:
6856 case ISD::STRICT_FMUL:
6857 case ISD::STRICT_FDIV:
6858 case ISD::STRICT_FSQRT:
6859 case ISD::STRICT_FMA:
6860 if (Op.getValueType() == MVT::nxv32f16 &&
6861 (Subtarget.hasVInstructionsF16Minimal() &&
6862 !Subtarget.hasVInstructionsF16()))
6863 return SplitStrictFPVectorOp(Op, DAG);
6864 return lowerToScalableOp(Op, DAG);
6865 case ISD::STRICT_FSETCC:
6867 return lowerVectorStrictFSetcc(Op, DAG);
6868 case ISD::STRICT_FCEIL:
6869 case ISD::STRICT_FRINT:
6870 case ISD::STRICT_FFLOOR:
6871 case ISD::STRICT_FTRUNC:
6873 case ISD::STRICT_FROUND:
6875 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6876 case ISD::MGATHER:
6877 case ISD::VP_GATHER:
6878 return lowerMaskedGather(Op, DAG);
6879 case ISD::MSCATTER:
6880 case ISD::VP_SCATTER:
6881 return lowerMaskedScatter(Op, DAG);
6882 case ISD::GET_ROUNDING:
6883 return lowerGET_ROUNDING(Op, DAG);
6884 case ISD::SET_ROUNDING:
6885 return lowerSET_ROUNDING(Op, DAG);
6886 case ISD::EH_DWARF_CFA:
6887 return lowerEH_DWARF_CFA(Op, DAG);
6888 case ISD::VP_SELECT:
6889 case ISD::VP_MERGE:
6890 case ISD::VP_ADD:
6891 case ISD::VP_SUB:
6892 case ISD::VP_MUL:
6893 case ISD::VP_SDIV:
6894 case ISD::VP_UDIV:
6895 case ISD::VP_SREM:
6896 case ISD::VP_UREM:
6897 case ISD::VP_UADDSAT:
6898 case ISD::VP_USUBSAT:
6899 case ISD::VP_SADDSAT:
6900 case ISD::VP_SSUBSAT:
6901 case ISD::VP_LRINT:
6902 case ISD::VP_LLRINT:
6903 return lowerVPOp(Op, DAG);
6904 case ISD::VP_AND:
6905 case ISD::VP_OR:
6906 case ISD::VP_XOR:
6907 return lowerLogicVPOp(Op, DAG);
6908 case ISD::VP_FADD:
6909 case ISD::VP_FSUB:
6910 case ISD::VP_FMUL:
6911 case ISD::VP_FDIV:
6912 case ISD::VP_FNEG:
6913 case ISD::VP_FABS:
6914 case ISD::VP_SQRT:
6915 case ISD::VP_FMA:
6916 case ISD::VP_FMINNUM:
6917 case ISD::VP_FMAXNUM:
6918 case ISD::VP_FCOPYSIGN:
6919 if (Op.getValueType() == MVT::nxv32f16 &&
6920 (Subtarget.hasVInstructionsF16Minimal() &&
6921 !Subtarget.hasVInstructionsF16()))
6922 return SplitVPOp(Op, DAG);
6923 [[fallthrough]];
6924 case ISD::VP_ASHR:
6925 case ISD::VP_LSHR:
6926 case ISD::VP_SHL:
6927 return lowerVPOp(Op, DAG);
6928 case ISD::VP_IS_FPCLASS:
6929 return LowerIS_FPCLASS(Op, DAG);
6930 case ISD::VP_SIGN_EXTEND:
6931 case ISD::VP_ZERO_EXTEND:
6932 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6933 return lowerVPExtMaskOp(Op, DAG);
6934 return lowerVPOp(Op, DAG);
6935 case ISD::VP_TRUNCATE:
6936 return lowerVectorTruncLike(Op, DAG);
6937 case ISD::VP_FP_EXTEND:
6938 case ISD::VP_FP_ROUND:
6939 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6940 case ISD::VP_SINT_TO_FP:
6941 case ISD::VP_UINT_TO_FP:
6942 if (Op.getValueType().isVector() &&
6943 Op.getValueType().getScalarType() == MVT::f16 &&
6944 (Subtarget.hasVInstructionsF16Minimal() &&
6945 !Subtarget.hasVInstructionsF16())) {
6946 if (Op.getValueType() == MVT::nxv32f16)
6947 return SplitVPOp(Op, DAG);
6948 // int -> f32
6949 SDLoc DL(Op);
6950 MVT NVT =
6951 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6952 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6953 // f32 -> f16
6954 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6955 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6956 }
6957 [[fallthrough]];
6958 case ISD::VP_FP_TO_SINT:
6959 case ISD::VP_FP_TO_UINT:
6960 if (SDValue Op1 = Op.getOperand(0);
6961 Op1.getValueType().isVector() &&
6962 Op1.getValueType().getScalarType() == MVT::f16 &&
6963 (Subtarget.hasVInstructionsF16Minimal() &&
6964 !Subtarget.hasVInstructionsF16())) {
6965 if (Op1.getValueType() == MVT::nxv32f16)
6966 return SplitVPOp(Op, DAG);
6967 // f16 -> f32
6968 SDLoc DL(Op);
6969 MVT NVT = MVT::getVectorVT(MVT::f32,
6970 Op1.getValueType().getVectorElementCount());
6971 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6972 // f32 -> int
6973 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6974 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6975 }
6976 return lowerVPFPIntConvOp(Op, DAG);
6977 case ISD::VP_SETCC:
6978 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6979 (Subtarget.hasVInstructionsF16Minimal() &&
6980 !Subtarget.hasVInstructionsF16()))
6981 return SplitVPOp(Op, DAG);
6982 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6983 return lowerVPSetCCMaskOp(Op, DAG);
6984 [[fallthrough]];
6985 case ISD::VP_SMIN:
6986 case ISD::VP_SMAX:
6987 case ISD::VP_UMIN:
6988 case ISD::VP_UMAX:
6989 case ISD::VP_BITREVERSE:
6990 case ISD::VP_BSWAP:
6991 return lowerVPOp(Op, DAG);
6992 case ISD::VP_CTLZ:
6993 case ISD::VP_CTLZ_ZERO_UNDEF:
6994 if (Subtarget.hasStdExtZvbb())
6995 return lowerVPOp(Op, DAG);
6996 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6997 case ISD::VP_CTTZ:
6998 case ISD::VP_CTTZ_ZERO_UNDEF:
6999 if (Subtarget.hasStdExtZvbb())
7000 return lowerVPOp(Op, DAG);
7001 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7002 case ISD::VP_CTPOP:
7003 return lowerVPOp(Op, DAG);
7004 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7005 return lowerVPStridedLoad(Op, DAG);
7006 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7007 return lowerVPStridedStore(Op, DAG);
7008 case ISD::VP_FCEIL:
7009 case ISD::VP_FFLOOR:
7010 case ISD::VP_FRINT:
7011 case ISD::VP_FNEARBYINT:
7012 case ISD::VP_FROUND:
7013 case ISD::VP_FROUNDEVEN:
7014 case ISD::VP_FROUNDTOZERO:
7015 if (Op.getValueType() == MVT::nxv32f16 &&
7016 (Subtarget.hasVInstructionsF16Minimal() &&
7017 !Subtarget.hasVInstructionsF16()))
7018 return SplitVPOp(Op, DAG);
7019 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7020 case ISD::VP_FMAXIMUM:
7021 case ISD::VP_FMINIMUM:
7022 if (Op.getValueType() == MVT::nxv32f16 &&
7023 (Subtarget.hasVInstructionsF16Minimal() &&
7024 !Subtarget.hasVInstructionsF16()))
7025 return SplitVPOp(Op, DAG);
7026 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7027 case ISD::EXPERIMENTAL_VP_SPLICE:
7028 return lowerVPSpliceExperimental(Op, DAG);
7029 case ISD::EXPERIMENTAL_VP_REVERSE:
7030 return lowerVPReverseExperimental(Op, DAG);
7031 }
7032}
7033
7035 SelectionDAG &DAG, unsigned Flags) {
7036 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7037}
7038
7040 SelectionDAG &DAG, unsigned Flags) {
7041 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7042 Flags);
7043}
7044
7046 SelectionDAG &DAG, unsigned Flags) {
7047 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7048 N->getOffset(), Flags);
7049}
7050
7052 SelectionDAG &DAG, unsigned Flags) {
7053 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7054}
7055
7056template <class NodeTy>
7057SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7058 bool IsLocal, bool IsExternWeak) const {
7059 SDLoc DL(N);
7060 EVT Ty = getPointerTy(DAG.getDataLayout());
7061
7062 // When HWASAN is used and tagging of global variables is enabled
7063 // they should be accessed via the GOT, since the tagged address of a global
7064 // is incompatible with existing code models. This also applies to non-pic
7065 // mode.
7066 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7067 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7068 if (IsLocal && !Subtarget.allowTaggedGlobals())
7069 // Use PC-relative addressing to access the symbol. This generates the
7070 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7071 // %pcrel_lo(auipc)).
7072 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7073
7074 // Use PC-relative addressing to access the GOT for this symbol, then load
7075 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7076 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7077 SDValue Load =
7078 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7084 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7085 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7086 return Load;
7087 }
7088
7089 switch (getTargetMachine().getCodeModel()) {
7090 default:
7091 report_fatal_error("Unsupported code model for lowering");
7092 case CodeModel::Small: {
7093 // Generate a sequence for accessing addresses within the first 2 GiB of
7094 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7095 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7096 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7097 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7098 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7099 }
7100 case CodeModel::Medium: {
7101 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7102 if (IsExternWeak) {
7103 // An extern weak symbol may be undefined, i.e. have value 0, which may
7104 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7105 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7106 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7107 SDValue Load =
7108 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7114 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7115 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7116 return Load;
7117 }
7118
7119 // Generate a sequence for accessing addresses within any 2GiB range within
7120 // the address space. This generates the pattern (PseudoLLA sym), which
7121 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7122 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7123 }
7124 }
7125}
7126
7127SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7128 SelectionDAG &DAG) const {
7129 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7130 assert(N->getOffset() == 0 && "unexpected offset in global node");
7131 const GlobalValue *GV = N->getGlobal();
7132 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7133}
7134
7135SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7136 SelectionDAG &DAG) const {
7137 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7138
7139 return getAddr(N, DAG);
7140}
7141
7142SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7143 SelectionDAG &DAG) const {
7144 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7145
7146 return getAddr(N, DAG);
7147}
7148
7149SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7150 SelectionDAG &DAG) const {
7151 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7152
7153 return getAddr(N, DAG);
7154}
7155
7156SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7157 SelectionDAG &DAG,
7158 bool UseGOT) const {
7159 SDLoc DL(N);
7160 EVT Ty = getPointerTy(DAG.getDataLayout());
7161 const GlobalValue *GV = N->getGlobal();
7162 MVT XLenVT = Subtarget.getXLenVT();
7163
7164 if (UseGOT) {
7165 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7166 // load the address from the GOT and add the thread pointer. This generates
7167 // the pattern (PseudoLA_TLS_IE sym), which expands to
7168 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7169 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7170 SDValue Load =
7171 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7177 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7178 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7179
7180 // Add the thread pointer.
7181 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7182 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7183 }
7184
7185 // Generate a sequence for accessing the address relative to the thread
7186 // pointer, with the appropriate adjustment for the thread pointer offset.
7187 // This generates the pattern
7188 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7189 SDValue AddrHi =
7191 SDValue AddrAdd =
7193 SDValue AddrLo =
7195
7196 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7197 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7198 SDValue MNAdd =
7199 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7200 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7201}
7202
7203SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7204 SelectionDAG &DAG) const {
7205 SDLoc DL(N);
7206 EVT Ty = getPointerTy(DAG.getDataLayout());
7207 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7208 const GlobalValue *GV = N->getGlobal();
7209
7210 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7211 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7212 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7213 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7214 SDValue Load =
7215 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7216
7217 // Prepare argument list to generate call.
7219 ArgListEntry Entry;
7220 Entry.Node = Load;
7221 Entry.Ty = CallTy;
7222 Args.push_back(Entry);
7223
7224 // Setup call to __tls_get_addr.
7226 CLI.setDebugLoc(DL)
7227 .setChain(DAG.getEntryNode())
7228 .setLibCallee(CallingConv::C, CallTy,
7229 DAG.getExternalSymbol("__tls_get_addr", Ty),
7230 std::move(Args));
7231
7232 return LowerCallTo(CLI).first;
7233}
7234
7235SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7236 SelectionDAG &DAG) const {
7237 SDLoc DL(N);
7238 EVT Ty = getPointerTy(DAG.getDataLayout());
7239 const GlobalValue *GV = N->getGlobal();
7240
7241 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7242 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7243 //
7244 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7245 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7246 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7247 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7248 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7249 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7250}
7251
7252SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7253 SelectionDAG &DAG) const {
7254 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7255 assert(N->getOffset() == 0 && "unexpected offset in global node");
7256
7257 if (DAG.getTarget().useEmulatedTLS())
7258 return LowerToTLSEmulatedModel(N, DAG);
7259
7261
7264 report_fatal_error("In GHC calling convention TLS is not supported");
7265
7266 SDValue Addr;
7267 switch (Model) {
7269 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7270 break;
7272 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7273 break;
7276 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7277 : getDynamicTLSAddr(N, DAG);
7278 break;
7279 }
7280
7281 return Addr;
7282}
7283
7284// Return true if Val is equal to (setcc LHS, RHS, CC).
7285// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7286// Otherwise, return std::nullopt.
7287static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7288 ISD::CondCode CC, SDValue Val) {
7289 assert(Val->getOpcode() == ISD::SETCC);
7290 SDValue LHS2 = Val.getOperand(0);
7291 SDValue RHS2 = Val.getOperand(1);
7292 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7293
7294 if (LHS == LHS2 && RHS == RHS2) {
7295 if (CC == CC2)
7296 return true;
7297 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7298 return false;
7299 } else if (LHS == RHS2 && RHS == LHS2) {
7301 if (CC == CC2)
7302 return true;
7303 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7304 return false;
7305 }
7306
7307 return std::nullopt;
7308}
7309
7311 const RISCVSubtarget &Subtarget) {
7312 SDValue CondV = N->getOperand(0);
7313 SDValue TrueV = N->getOperand(1);
7314 SDValue FalseV = N->getOperand(2);
7315 MVT VT = N->getSimpleValueType(0);
7316 SDLoc DL(N);
7317
7318 if (!Subtarget.hasConditionalMoveFusion()) {
7319 // (select c, -1, y) -> -c | y
7320 if (isAllOnesConstant(TrueV)) {
7321 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7322 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7323 }
7324 // (select c, y, -1) -> (c-1) | y
7325 if (isAllOnesConstant(FalseV)) {
7326 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7327 DAG.getAllOnesConstant(DL, VT));
7328 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7329 }
7330
7331 // (select c, 0, y) -> (c-1) & y
7332 if (isNullConstant(TrueV)) {
7333 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7334 DAG.getAllOnesConstant(DL, VT));
7335 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7336 }
7337 // (select c, y, 0) -> -c & y
7338 if (isNullConstant(FalseV)) {
7339 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7340 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7341 }
7342 }
7343
7344 // select c, ~x, x --> xor -c, x
7345 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7346 const APInt &TrueVal = TrueV->getAsAPIntVal();
7347 const APInt &FalseVal = FalseV->getAsAPIntVal();
7348 if (~TrueVal == FalseVal) {
7349 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7350 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7351 }
7352 }
7353
7354 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7355 // when both truev and falsev are also setcc.
7356 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7357 FalseV.getOpcode() == ISD::SETCC) {
7358 SDValue LHS = CondV.getOperand(0);
7359 SDValue RHS = CondV.getOperand(1);
7360 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7361
7362 // (select x, x, y) -> x | y
7363 // (select !x, x, y) -> x & y
7364 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7365 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7366 DAG.getFreeze(FalseV));
7367 }
7368 // (select x, y, x) -> x & y
7369 // (select !x, y, x) -> x | y
7370 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7371 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7372 DAG.getFreeze(TrueV), FalseV);
7373 }
7374 }
7375
7376 return SDValue();
7377}
7378
7379// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7380// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7381// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7382// being `0` or `-1`. In such cases we can replace `select` with `and`.
7383// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7384// than `c0`?
7385static SDValue
7387 const RISCVSubtarget &Subtarget) {
7388 if (Subtarget.hasShortForwardBranchOpt())
7389 return SDValue();
7390
7391 unsigned SelOpNo = 0;
7392 SDValue Sel = BO->getOperand(0);
7393 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7394 SelOpNo = 1;
7395 Sel = BO->getOperand(1);
7396 }
7397
7398 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7399 return SDValue();
7400
7401 unsigned ConstSelOpNo = 1;
7402 unsigned OtherSelOpNo = 2;
7403 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7404 ConstSelOpNo = 2;
7405 OtherSelOpNo = 1;
7406 }
7407 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7408 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7409 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7410 return SDValue();
7411
7412 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7413 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7414 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7415 return SDValue();
7416
7417 SDLoc DL(Sel);
7418 EVT VT = BO->getValueType(0);
7419
7420 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7421 if (SelOpNo == 1)
7422 std::swap(NewConstOps[0], NewConstOps[1]);
7423
7424 SDValue NewConstOp =
7425 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7426 if (!NewConstOp)
7427 return SDValue();
7428
7429 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7430 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7431 return SDValue();
7432
7433 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7434 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7435 if (SelOpNo == 1)
7436 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7437 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7438
7439 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7440 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7441 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7442}
7443
7444SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7445 SDValue CondV = Op.getOperand(0);
7446 SDValue TrueV = Op.getOperand(1);
7447 SDValue FalseV = Op.getOperand(2);
7448 SDLoc DL(Op);
7449 MVT VT = Op.getSimpleValueType();
7450 MVT XLenVT = Subtarget.getXLenVT();
7451
7452 // Lower vector SELECTs to VSELECTs by splatting the condition.
7453 if (VT.isVector()) {
7454 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7455 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7456 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7457 }
7458
7459 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7460 // nodes to implement the SELECT. Performing the lowering here allows for
7461 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7462 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7463 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7464 VT.isScalarInteger()) {
7465 // (select c, t, 0) -> (czero_eqz t, c)
7466 if (isNullConstant(FalseV))
7467 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7468 // (select c, 0, f) -> (czero_nez f, c)
7469 if (isNullConstant(TrueV))
7470 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7471
7472 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7473 if (TrueV.getOpcode() == ISD::AND &&
7474 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7475 return DAG.getNode(
7476 ISD::OR, DL, VT, TrueV,
7477 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7478 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7479 if (FalseV.getOpcode() == ISD::AND &&
7480 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7481 return DAG.getNode(
7482 ISD::OR, DL, VT, FalseV,
7483 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7484
7485 // Try some other optimizations before falling back to generic lowering.
7486 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7487 return V;
7488
7489 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7490 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7491 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7492 const APInt &TrueVal = TrueV->getAsAPIntVal();
7493 const APInt &FalseVal = FalseV->getAsAPIntVal();
7494 const int TrueValCost = RISCVMatInt::getIntMatCost(
7495 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7496 const int FalseValCost = RISCVMatInt::getIntMatCost(
7497 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7498 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7499 SDValue LHSVal = DAG.getConstant(
7500 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7501 SDValue RHSVal =
7502 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7503 SDValue CMOV =
7505 DL, VT, LHSVal, CondV);
7506 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7507 }
7508
7509 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7510 // Unless we have the short forward branch optimization.
7511 if (!Subtarget.hasConditionalMoveFusion())
7512 return DAG.getNode(
7513 ISD::OR, DL, VT,
7514 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7515 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7516 }
7517
7518 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7519 return V;
7520
7521 if (Op.hasOneUse()) {
7522 unsigned UseOpc = Op->use_begin()->getOpcode();
7523 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7524 SDNode *BinOp = *Op->use_begin();
7525 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7526 DAG, Subtarget)) {
7527 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7528 return lowerSELECT(NewSel, DAG);
7529 }
7530 }
7531 }
7532
7533 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7534 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7535 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7536 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7537 if (FPTV && FPFV) {
7538 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7539 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7540 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7541 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7542 DAG.getConstant(1, DL, XLenVT));
7543 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7544 }
7545 }
7546
7547 // If the condition is not an integer SETCC which operates on XLenVT, we need
7548 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7549 // (select condv, truev, falsev)
7550 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7551 if (CondV.getOpcode() != ISD::SETCC ||
7552 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7553 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7554 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7555
7556 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7557
7558 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7559 }
7560
7561 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7562 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7563 // advantage of the integer compare+branch instructions. i.e.:
7564 // (select (setcc lhs, rhs, cc), truev, falsev)
7565 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7566 SDValue LHS = CondV.getOperand(0);
7567 SDValue RHS = CondV.getOperand(1);
7568 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7569
7570 // Special case for a select of 2 constants that have a diffence of 1.
7571 // Normally this is done by DAGCombine, but if the select is introduced by
7572 // type legalization or op legalization, we miss it. Restricting to SETLT
7573 // case for now because that is what signed saturating add/sub need.
7574 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7575 // but we would probably want to swap the true/false values if the condition
7576 // is SETGE/SETLE to avoid an XORI.
7577 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7578 CCVal == ISD::SETLT) {
7579 const APInt &TrueVal = TrueV->getAsAPIntVal();
7580 const APInt &FalseVal = FalseV->getAsAPIntVal();
7581 if (TrueVal - 1 == FalseVal)
7582 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7583 if (TrueVal + 1 == FalseVal)
7584 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7585 }
7586
7587 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7588 // 1 < x ? x : 1 -> 0 < x ? x : 1
7589 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7590 RHS == TrueV && LHS == FalseV) {
7591 LHS = DAG.getConstant(0, DL, VT);
7592 // 0 <u x is the same as x != 0.
7593 if (CCVal == ISD::SETULT) {
7594 std::swap(LHS, RHS);
7595 CCVal = ISD::SETNE;
7596 }
7597 }
7598
7599 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7600 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7601 RHS == FalseV) {
7602 RHS = DAG.getConstant(0, DL, VT);
7603 }
7604
7605 SDValue TargetCC = DAG.getCondCode(CCVal);
7606
7607 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7608 // (select (setcc lhs, rhs, CC), constant, falsev)
7609 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7610 std::swap(TrueV, FalseV);
7611 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7612 }
7613
7614 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7615 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7616}
7617
7618SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7619 SDValue CondV = Op.getOperand(1);
7620 SDLoc DL(Op);
7621 MVT XLenVT = Subtarget.getXLenVT();
7622
7623 if (CondV.getOpcode() == ISD::SETCC &&
7624 CondV.getOperand(0).getValueType() == XLenVT) {
7625 SDValue LHS = CondV.getOperand(0);
7626 SDValue RHS = CondV.getOperand(1);
7627 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7628
7629 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7630
7631 SDValue TargetCC = DAG.getCondCode(CCVal);
7632 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7633 LHS, RHS, TargetCC, Op.getOperand(2));
7634 }
7635
7636 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7637 CondV, DAG.getConstant(0, DL, XLenVT),
7638 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7639}
7640
7641SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7644
7645 SDLoc DL(Op);
7646 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7648
7649 // vastart just stores the address of the VarArgsFrameIndex slot into the
7650 // memory location argument.
7651 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7652 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7653 MachinePointerInfo(SV));
7654}
7655
7656SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7657 SelectionDAG &DAG) const {
7658 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7660 MachineFrameInfo &MFI = MF.getFrameInfo();
7661 MFI.setFrameAddressIsTaken(true);
7662 Register FrameReg = RI.getFrameRegister(MF);
7663 int XLenInBytes = Subtarget.getXLen() / 8;
7664
7665 EVT VT = Op.getValueType();
7666 SDLoc DL(Op);
7667 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7668 unsigned Depth = Op.getConstantOperandVal(0);
7669 while (Depth--) {
7670 int Offset = -(XLenInBytes * 2);
7671 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7673 FrameAddr =
7674 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7675 }
7676 return FrameAddr;
7677}
7678
7679SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7680 SelectionDAG &DAG) const {
7681 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7683 MachineFrameInfo &MFI = MF.getFrameInfo();
7684 MFI.setReturnAddressIsTaken(true);
7685 MVT XLenVT = Subtarget.getXLenVT();
7686 int XLenInBytes = Subtarget.getXLen() / 8;
7687
7689 return SDValue();
7690
7691 EVT VT = Op.getValueType();
7692 SDLoc DL(Op);
7693 unsigned Depth = Op.getConstantOperandVal(0);
7694 if (Depth) {
7695 int Off = -XLenInBytes;
7696 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7697 SDValue Offset = DAG.getConstant(Off, DL, VT);
7698 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7699 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7701 }
7702
7703 // Return the value of the return address register, marking it an implicit
7704 // live-in.
7705 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7706 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7707}
7708
7709SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7710 SelectionDAG &DAG) const {
7711 SDLoc DL(Op);
7712 SDValue Lo = Op.getOperand(0);
7713 SDValue Hi = Op.getOperand(1);
7714 SDValue Shamt = Op.getOperand(2);
7715 EVT VT = Lo.getValueType();
7716
7717 // if Shamt-XLEN < 0: // Shamt < XLEN
7718 // Lo = Lo << Shamt
7719 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7720 // else:
7721 // Lo = 0
7722 // Hi = Lo << (Shamt-XLEN)
7723
7724 SDValue Zero = DAG.getConstant(0, DL, VT);
7725 SDValue One = DAG.getConstant(1, DL, VT);
7726 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7727 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7728 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7729 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7730
7731 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7732 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7733 SDValue ShiftRightLo =
7734 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7735 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7736 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7737 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7738
7739 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7740
7741 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7742 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7743
7744 SDValue Parts[2] = {Lo, Hi};
7745 return DAG.getMergeValues(Parts, DL);
7746}
7747
7748SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7749 bool IsSRA) const {
7750 SDLoc DL(Op);
7751 SDValue Lo = Op.getOperand(0);
7752 SDValue Hi = Op.getOperand(1);
7753 SDValue Shamt = Op.getOperand(2);
7754 EVT VT = Lo.getValueType();
7755
7756 // SRA expansion:
7757 // if Shamt-XLEN < 0: // Shamt < XLEN
7758 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7759 // Hi = Hi >>s Shamt
7760 // else:
7761 // Lo = Hi >>s (Shamt-XLEN);
7762 // Hi = Hi >>s (XLEN-1)
7763 //
7764 // SRL expansion:
7765 // if Shamt-XLEN < 0: // Shamt < XLEN
7766 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7767 // Hi = Hi >>u Shamt
7768 // else:
7769 // Lo = Hi >>u (Shamt-XLEN);
7770 // Hi = 0;
7771
7772 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7773
7774 SDValue Zero = DAG.getConstant(0, DL, VT);
7775 SDValue One = DAG.getConstant(1, DL, VT);
7776 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7777 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7778 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7779 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7780
7781 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7782 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7783 SDValue ShiftLeftHi =
7784 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7785 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7786 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7787 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7788 SDValue HiFalse =
7789 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7790
7791 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7792
7793 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7794 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7795
7796 SDValue Parts[2] = {Lo, Hi};
7797 return DAG.getMergeValues(Parts, DL);
7798}
7799
7800// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7801// legal equivalently-sized i8 type, so we can use that as a go-between.
7802SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7803 SelectionDAG &DAG) const {
7804 SDLoc DL(Op);
7805 MVT VT = Op.getSimpleValueType();
7806 SDValue SplatVal = Op.getOperand(0);
7807 // All-zeros or all-ones splats are handled specially.
7808 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7809 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7810 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7811 }
7812 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7813 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7814 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7815 }
7816 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7817 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7818 DAG.getConstant(1, DL, SplatVal.getValueType()));
7819 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7820 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7821 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7822}
7823
7824// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7825// illegal (currently only vXi64 RV32).
7826// FIXME: We could also catch non-constant sign-extended i32 values and lower
7827// them to VMV_V_X_VL.
7828SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7829 SelectionDAG &DAG) const {
7830 SDLoc DL(Op);
7831 MVT VecVT = Op.getSimpleValueType();
7832 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7833 "Unexpected SPLAT_VECTOR_PARTS lowering");
7834
7835 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7836 SDValue Lo = Op.getOperand(0);
7837 SDValue Hi = Op.getOperand(1);
7838
7839 MVT ContainerVT = VecVT;
7840 if (VecVT.isFixedLengthVector())
7841 ContainerVT = getContainerForFixedLengthVector(VecVT);
7842
7843 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7844
7845 SDValue Res =
7846 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7847
7848 if (VecVT.isFixedLengthVector())
7849 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7850
7851 return Res;
7852}
7853
7854// Custom-lower extensions from mask vectors by using a vselect either with 1
7855// for zero/any-extension or -1 for sign-extension:
7856// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7857// Note that any-extension is lowered identically to zero-extension.
7858SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7859 int64_t ExtTrueVal) const {
7860 SDLoc DL(Op);
7861 MVT VecVT = Op.getSimpleValueType();
7862 SDValue Src = Op.getOperand(0);
7863 // Only custom-lower extensions from mask types
7864 assert(Src.getValueType().isVector() &&
7865 Src.getValueType().getVectorElementType() == MVT::i1);
7866
7867 if (VecVT.isScalableVector()) {
7868 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7869 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7870 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7871 }
7872
7873 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7874 MVT I1ContainerVT =
7875 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7876
7877 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7878
7879 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7880
7881 MVT XLenVT = Subtarget.getXLenVT();
7882 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7883 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7884
7885 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7886 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7887 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7888 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7889 SDValue Select =
7890 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7891 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7892
7893 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7894}
7895
7896SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7897 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7898 MVT ExtVT = Op.getSimpleValueType();
7899 // Only custom-lower extensions from fixed-length vector types.
7900 if (!ExtVT.isFixedLengthVector())
7901 return Op;
7902 MVT VT = Op.getOperand(0).getSimpleValueType();
7903 // Grab the canonical container type for the extended type. Infer the smaller
7904 // type from that to ensure the same number of vector elements, as we know
7905 // the LMUL will be sufficient to hold the smaller type.
7906 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7907 // Get the extended container type manually to ensure the same number of
7908 // vector elements between source and dest.
7909 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7910 ContainerExtVT.getVectorElementCount());
7911
7912 SDValue Op1 =
7913 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7914
7915 SDLoc DL(Op);
7916 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7917
7918 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7919
7920 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7921}
7922
7923// Custom-lower truncations from vectors to mask vectors by using a mask and a
7924// setcc operation:
7925// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7926SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7927 SelectionDAG &DAG) const {
7928 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7929 SDLoc DL(Op);
7930 EVT MaskVT = Op.getValueType();
7931 // Only expect to custom-lower truncations to mask types
7932 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7933 "Unexpected type for vector mask lowering");
7934 SDValue Src = Op.getOperand(0);
7935 MVT VecVT = Src.getSimpleValueType();
7936 SDValue Mask, VL;
7937 if (IsVPTrunc) {
7938 Mask = Op.getOperand(1);
7939 VL = Op.getOperand(2);
7940 }
7941 // If this is a fixed vector, we need to convert it to a scalable vector.
7942 MVT ContainerVT = VecVT;
7943
7944 if (VecVT.isFixedLengthVector()) {
7945 ContainerVT = getContainerForFixedLengthVector(VecVT);
7946 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7947 if (IsVPTrunc) {
7948 MVT MaskContainerVT =
7949 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7950 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7951 }
7952 }
7953
7954 if (!IsVPTrunc) {
7955 std::tie(Mask, VL) =
7956 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7957 }
7958
7959 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7960 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7961
7962 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7963 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7964 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7965 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7966
7967 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7968 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7969 DAG.getUNDEF(ContainerVT), Mask, VL);
7970 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7971 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7972 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7973 if (MaskVT.isFixedLengthVector())
7974 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7975 return Trunc;
7976}
7977
7978SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7979 SelectionDAG &DAG) const {
7980 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7981 SDLoc DL(Op);
7982
7983 MVT VT = Op.getSimpleValueType();
7984 // Only custom-lower vector truncates
7985 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7986
7987 // Truncates to mask types are handled differently
7988 if (VT.getVectorElementType() == MVT::i1)
7989 return lowerVectorMaskTruncLike(Op, DAG);
7990
7991 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7992 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7993 // truncate by one power of two at a time.
7994 MVT DstEltVT = VT.getVectorElementType();
7995
7996 SDValue Src = Op.getOperand(0);
7997 MVT SrcVT = Src.getSimpleValueType();
7998 MVT SrcEltVT = SrcVT.getVectorElementType();
7999
8000 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8001 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8002 "Unexpected vector truncate lowering");
8003
8004 MVT ContainerVT = SrcVT;
8005 SDValue Mask, VL;
8006 if (IsVPTrunc) {
8007 Mask = Op.getOperand(1);
8008 VL = Op.getOperand(2);
8009 }
8010 if (SrcVT.isFixedLengthVector()) {
8011 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8012 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8013 if (IsVPTrunc) {
8014 MVT MaskVT = getMaskTypeFor(ContainerVT);
8015 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8016 }
8017 }
8018
8019 SDValue Result = Src;
8020 if (!IsVPTrunc) {
8021 std::tie(Mask, VL) =
8022 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8023 }
8024
8025 LLVMContext &Context = *DAG.getContext();
8026 const ElementCount Count = ContainerVT.getVectorElementCount();
8027 do {
8028 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8029 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8030 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8031 Mask, VL);
8032 } while (SrcEltVT != DstEltVT);
8033
8034 if (SrcVT.isFixedLengthVector())
8035 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8036
8037 return Result;
8038}
8039
8040SDValue
8041RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8042 SelectionDAG &DAG) const {
8043 SDLoc DL(Op);
8044 SDValue Chain = Op.getOperand(0);
8045 SDValue Src = Op.getOperand(1);
8046 MVT VT = Op.getSimpleValueType();
8047 MVT SrcVT = Src.getSimpleValueType();
8048 MVT ContainerVT = VT;
8049 if (VT.isFixedLengthVector()) {
8050 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8051 ContainerVT =
8052 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8053 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8054 }
8055
8056 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8057
8058 // RVV can only widen/truncate fp to types double/half the size as the source.
8059 if ((VT.getVectorElementType() == MVT::f64 &&
8060 SrcVT.getVectorElementType() == MVT::f16) ||
8061 (VT.getVectorElementType() == MVT::f16 &&
8062 SrcVT.getVectorElementType() == MVT::f64)) {
8063 // For double rounding, the intermediate rounding should be round-to-odd.
8064 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8067 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8068 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8069 Chain, Src, Mask, VL);
8070 Chain = Src.getValue(1);
8071 }
8072
8073 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8076 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8077 Chain, Src, Mask, VL);
8078 if (VT.isFixedLengthVector()) {
8079 // StrictFP operations have two result values. Their lowered result should
8080 // have same result count.
8081 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8082 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8083 }
8084 return Res;
8085}
8086
8087SDValue
8088RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8089 SelectionDAG &DAG) const {
8090 bool IsVP =
8091 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8092 bool IsExtend =
8093 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8094 // RVV can only do truncate fp to types half the size as the source. We
8095 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8096 // conversion instruction.
8097 SDLoc DL(Op);
8098 MVT VT = Op.getSimpleValueType();
8099
8100 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8101
8102 SDValue Src = Op.getOperand(0);
8103 MVT SrcVT = Src.getSimpleValueType();
8104
8105 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8106 SrcVT.getVectorElementType() != MVT::f16);
8107 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8108 SrcVT.getVectorElementType() != MVT::f64);
8109
8110 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8111
8112 // Prepare any fixed-length vector operands.
8113 MVT ContainerVT = VT;
8114 SDValue Mask, VL;
8115 if (IsVP) {
8116 Mask = Op.getOperand(1);
8117 VL = Op.getOperand(2);
8118 }
8119 if (VT.isFixedLengthVector()) {
8120 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8121 ContainerVT =
8122 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8123 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8124 if (IsVP) {
8125 MVT MaskVT = getMaskTypeFor(ContainerVT);
8126 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8127 }
8128 }
8129
8130 if (!IsVP)
8131 std::tie(Mask, VL) =
8132 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8133
8134 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8135
8136 if (IsDirectConv) {
8137 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8138 if (VT.isFixedLengthVector())
8139 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8140 return Src;
8141 }
8142
8143 unsigned InterConvOpc =
8145
8146 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8147 SDValue IntermediateConv =
8148 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8149 SDValue Result =
8150 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8151 if (VT.isFixedLengthVector())
8152 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8153 return Result;
8154}
8155
8156// Given a scalable vector type and an index into it, returns the type for the
8157// smallest subvector that the index fits in. This can be used to reduce LMUL
8158// for operations like vslidedown.
8159//
8160// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8161static std::optional<MVT>
8162getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8163 const RISCVSubtarget &Subtarget) {
8164 assert(VecVT.isScalableVector());
8165 const unsigned EltSize = VecVT.getScalarSizeInBits();
8166 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8167 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8168 MVT SmallerVT;
8169 if (MaxIdx < MinVLMAX)
8170 SmallerVT = getLMUL1VT(VecVT);
8171 else if (MaxIdx < MinVLMAX * 2)
8172 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8173 else if (MaxIdx < MinVLMAX * 4)
8174 SmallerVT = getLMUL1VT(VecVT)
8177 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8178 return std::nullopt;
8179 return SmallerVT;
8180}
8181
8182// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8183// first position of a vector, and that vector is slid up to the insert index.
8184// By limiting the active vector length to index+1 and merging with the
8185// original vector (with an undisturbed tail policy for elements >= VL), we
8186// achieve the desired result of leaving all elements untouched except the one
8187// at VL-1, which is replaced with the desired value.
8188SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8189 SelectionDAG &DAG) const {
8190 SDLoc DL(Op);
8191 MVT VecVT = Op.getSimpleValueType();
8192 SDValue Vec = Op.getOperand(0);
8193 SDValue Val = Op.getOperand(1);
8194 SDValue Idx = Op.getOperand(2);
8195
8196 if (VecVT.getVectorElementType() == MVT::i1) {
8197 // FIXME: For now we just promote to an i8 vector and insert into that,
8198 // but this is probably not optimal.
8199 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8200 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8201 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8202 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8203 }
8204
8205 MVT ContainerVT = VecVT;
8206 // If the operand is a fixed-length vector, convert to a scalable one.
8207 if (VecVT.isFixedLengthVector()) {
8208 ContainerVT = getContainerForFixedLengthVector(VecVT);
8209 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8210 }
8211
8212 // If we know the index we're going to insert at, we can shrink Vec so that
8213 // we're performing the scalar inserts and slideup on a smaller LMUL.
8214 MVT OrigContainerVT = ContainerVT;
8215 SDValue OrigVec = Vec;
8216 SDValue AlignedIdx;
8217 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8218 const unsigned OrigIdx = IdxC->getZExtValue();
8219 // Do we know an upper bound on LMUL?
8220 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8221 DL, DAG, Subtarget)) {
8222 ContainerVT = *ShrunkVT;
8223 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8224 }
8225
8226 // If we're compiling for an exact VLEN value, we can always perform
8227 // the insert in m1 as we can determine the register corresponding to
8228 // the index in the register group.
8229 const MVT M1VT = getLMUL1VT(ContainerVT);
8230 if (auto VLEN = Subtarget.getRealVLen();
8231 VLEN && ContainerVT.bitsGT(M1VT)) {
8232 EVT ElemVT = VecVT.getVectorElementType();
8233 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8234 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8235 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8236 unsigned ExtractIdx =
8237 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8238 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8239 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8240 ContainerVT = M1VT;
8241 }
8242
8243 if (AlignedIdx)
8244 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8245 AlignedIdx);
8246 }
8247
8248 MVT XLenVT = Subtarget.getXLenVT();
8249
8250 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8251 // Even i64-element vectors on RV32 can be lowered without scalar
8252 // legalization if the most-significant 32 bits of the value are not affected
8253 // by the sign-extension of the lower 32 bits.
8254 // TODO: We could also catch sign extensions of a 32-bit value.
8255 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8256 const auto *CVal = cast<ConstantSDNode>(Val);
8257 if (isInt<32>(CVal->getSExtValue())) {
8258 IsLegalInsert = true;
8259 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8260 }
8261 }
8262
8263 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8264
8265 SDValue ValInVec;
8266
8267 if (IsLegalInsert) {
8268 unsigned Opc =
8270 if (isNullConstant(Idx)) {
8271 if (!VecVT.isFloatingPoint())
8272 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8273 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8274
8275 if (AlignedIdx)
8276 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8277 Vec, AlignedIdx);
8278 if (!VecVT.isFixedLengthVector())
8279 return Vec;
8280 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8281 }
8282 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8283 } else {
8284 // On RV32, i64-element vectors must be specially handled to place the
8285 // value at element 0, by using two vslide1down instructions in sequence on
8286 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8287 // this.
8288 SDValue ValLo, ValHi;
8289 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8290 MVT I32ContainerVT =
8291 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8292 SDValue I32Mask =
8293 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8294 // Limit the active VL to two.
8295 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8296 // If the Idx is 0 we can insert directly into the vector.
8297 if (isNullConstant(Idx)) {
8298 // First slide in the lo value, then the hi in above it. We use slide1down
8299 // to avoid the register group overlap constraint of vslide1up.
8300 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8301 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8302 // If the source vector is undef don't pass along the tail elements from
8303 // the previous slide1down.
8304 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8305 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8306 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8307 // Bitcast back to the right container type.
8308 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8309
8310 if (AlignedIdx)
8311 ValInVec =
8312 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8313 ValInVec, AlignedIdx);
8314 if (!VecVT.isFixedLengthVector())
8315 return ValInVec;
8316 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8317 }
8318
8319 // First slide in the lo value, then the hi in above it. We use slide1down
8320 // to avoid the register group overlap constraint of vslide1up.
8321 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8322 DAG.getUNDEF(I32ContainerVT),
8323 DAG.getUNDEF(I32ContainerVT), ValLo,
8324 I32Mask, InsertI64VL);
8325 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8326 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8327 I32Mask, InsertI64VL);
8328 // Bitcast back to the right container type.
8329 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8330 }
8331
8332 // Now that the value is in a vector, slide it into position.
8333 SDValue InsertVL =
8334 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8335
8336 // Use tail agnostic policy if Idx is the last index of Vec.
8338 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8339 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8340 Policy = RISCVII::TAIL_AGNOSTIC;
8341 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8342 Idx, Mask, InsertVL, Policy);
8343
8344 if (AlignedIdx)
8345 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8346 Slideup, AlignedIdx);
8347 if (!VecVT.isFixedLengthVector())
8348 return Slideup;
8349 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8350}
8351
8352// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8353// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8354// types this is done using VMV_X_S to allow us to glean information about the
8355// sign bits of the result.
8356SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8357 SelectionDAG &DAG) const {
8358 SDLoc DL(Op);
8359 SDValue Idx = Op.getOperand(1);
8360 SDValue Vec = Op.getOperand(0);
8361 EVT EltVT = Op.getValueType();
8362 MVT VecVT = Vec.getSimpleValueType();
8363 MVT XLenVT = Subtarget.getXLenVT();
8364
8365 if (VecVT.getVectorElementType() == MVT::i1) {
8366 // Use vfirst.m to extract the first bit.
8367 if (isNullConstant(Idx)) {
8368 MVT ContainerVT = VecVT;
8369 if (VecVT.isFixedLengthVector()) {
8370 ContainerVT = getContainerForFixedLengthVector(VecVT);
8371 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8372 }
8373 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8374 SDValue Vfirst =
8375 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8376 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8377 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8378 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8379 }
8380 if (VecVT.isFixedLengthVector()) {
8381 unsigned NumElts = VecVT.getVectorNumElements();
8382 if (NumElts >= 8) {
8383 MVT WideEltVT;
8384 unsigned WidenVecLen;
8385 SDValue ExtractElementIdx;
8386 SDValue ExtractBitIdx;
8387 unsigned MaxEEW = Subtarget.getELen();
8388 MVT LargestEltVT = MVT::getIntegerVT(
8389 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8390 if (NumElts <= LargestEltVT.getSizeInBits()) {
8391 assert(isPowerOf2_32(NumElts) &&
8392 "the number of elements should be power of 2");
8393 WideEltVT = MVT::getIntegerVT(NumElts);
8394 WidenVecLen = 1;
8395 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8396 ExtractBitIdx = Idx;
8397 } else {
8398 WideEltVT = LargestEltVT;
8399 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8400 // extract element index = index / element width
8401 ExtractElementIdx = DAG.getNode(
8402 ISD::SRL, DL, XLenVT, Idx,
8403 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8404 // mask bit index = index % element width
8405 ExtractBitIdx = DAG.getNode(
8406 ISD::AND, DL, XLenVT, Idx,
8407 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8408 }
8409 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8410 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8411 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8412 Vec, ExtractElementIdx);
8413 // Extract the bit from GPR.
8414 SDValue ShiftRight =
8415 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8416 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8417 DAG.getConstant(1, DL, XLenVT));
8418 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8419 }
8420 }
8421 // Otherwise, promote to an i8 vector and extract from that.
8422 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8423 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8424 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8425 }
8426
8427 // If this is a fixed vector, we need to convert it to a scalable vector.
8428 MVT ContainerVT = VecVT;
8429 if (VecVT.isFixedLengthVector()) {
8430 ContainerVT = getContainerForFixedLengthVector(VecVT);
8431 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8432 }
8433
8434 // If we're compiling for an exact VLEN value and we have a known
8435 // constant index, we can always perform the extract in m1 (or
8436 // smaller) as we can determine the register corresponding to
8437 // the index in the register group.
8438 const auto VLen = Subtarget.getRealVLen();
8439 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8440 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8441 MVT M1VT = getLMUL1VT(ContainerVT);
8442 unsigned OrigIdx = IdxC->getZExtValue();
8443 EVT ElemVT = VecVT.getVectorElementType();
8444 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8445 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8446 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8447 unsigned ExtractIdx =
8448 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8449 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8450 DAG.getVectorIdxConstant(ExtractIdx, DL));
8451 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8452 ContainerVT = M1VT;
8453 }
8454
8455 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8456 // contains our index.
8457 std::optional<uint64_t> MaxIdx;
8458 if (VecVT.isFixedLengthVector())
8459 MaxIdx = VecVT.getVectorNumElements() - 1;
8460 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8461 MaxIdx = IdxC->getZExtValue();
8462 if (MaxIdx) {
8463 if (auto SmallerVT =
8464 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8465 ContainerVT = *SmallerVT;
8466 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8467 DAG.getConstant(0, DL, XLenVT));
8468 }
8469 }
8470
8471 // If after narrowing, the required slide is still greater than LMUL2,
8472 // fallback to generic expansion and go through the stack. This is done
8473 // for a subtle reason: extracting *all* elements out of a vector is
8474 // widely expected to be linear in vector size, but because vslidedown
8475 // is linear in LMUL, performing N extracts using vslidedown becomes
8476 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8477 // seems to have the same problem (the store is linear in LMUL), but the
8478 // generic expansion *memoizes* the store, and thus for many extracts of
8479 // the same vector we end up with one store and a bunch of loads.
8480 // TODO: We don't have the same code for insert_vector_elt because we
8481 // have BUILD_VECTOR and handle the degenerate case there. Should we
8482 // consider adding an inverse BUILD_VECTOR node?
8483 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8484 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8485 return SDValue();
8486
8487 // If the index is 0, the vector is already in the right position.
8488 if (!isNullConstant(Idx)) {
8489 // Use a VL of 1 to avoid processing more elements than we need.
8490 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8491 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8492 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8493 }
8494
8495 if (!EltVT.isInteger()) {
8496 // Floating-point extracts are handled in TableGen.
8497 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8498 DAG.getVectorIdxConstant(0, DL));
8499 }
8500
8501 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8502 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8503}
8504
8505// Some RVV intrinsics may claim that they want an integer operand to be
8506// promoted or expanded.
8508 const RISCVSubtarget &Subtarget) {
8509 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8510 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8511 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8512 "Unexpected opcode");
8513
8514 if (!Subtarget.hasVInstructions())
8515 return SDValue();
8516
8517 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8518 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8519 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8520
8521 SDLoc DL(Op);
8522
8524 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8525 if (!II || !II->hasScalarOperand())
8526 return SDValue();
8527
8528 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8529 assert(SplatOp < Op.getNumOperands());
8530
8531 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8532 SDValue &ScalarOp = Operands[SplatOp];
8533 MVT OpVT = ScalarOp.getSimpleValueType();
8534 MVT XLenVT = Subtarget.getXLenVT();
8535
8536 // If this isn't a scalar, or its type is XLenVT we're done.
8537 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8538 return SDValue();
8539
8540 // Simplest case is that the operand needs to be promoted to XLenVT.
8541 if (OpVT.bitsLT(XLenVT)) {
8542 // If the operand is a constant, sign extend to increase our chances
8543 // of being able to use a .vi instruction. ANY_EXTEND would become a
8544 // a zero extend and the simm5 check in isel would fail.
8545 // FIXME: Should we ignore the upper bits in isel instead?
8546 unsigned ExtOpc =
8547 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8548 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8549 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8550 }
8551
8552 // Use the previous operand to get the vXi64 VT. The result might be a mask
8553 // VT for compares. Using the previous operand assumes that the previous
8554 // operand will never have a smaller element size than a scalar operand and
8555 // that a widening operation never uses SEW=64.
8556 // NOTE: If this fails the below assert, we can probably just find the
8557 // element count from any operand or result and use it to construct the VT.
8558 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8559 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8560
8561 // The more complex case is when the scalar is larger than XLenVT.
8562 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8563 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8564
8565 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8566 // instruction to sign-extend since SEW>XLEN.
8567 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8568 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8569 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8570 }
8571
8572 switch (IntNo) {
8573 case Intrinsic::riscv_vslide1up:
8574 case Intrinsic::riscv_vslide1down:
8575 case Intrinsic::riscv_vslide1up_mask:
8576 case Intrinsic::riscv_vslide1down_mask: {
8577 // We need to special case these when the scalar is larger than XLen.
8578 unsigned NumOps = Op.getNumOperands();
8579 bool IsMasked = NumOps == 7;
8580
8581 // Convert the vector source to the equivalent nxvXi32 vector.
8582 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8583 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8584 SDValue ScalarLo, ScalarHi;
8585 std::tie(ScalarLo, ScalarHi) =
8586 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8587
8588 // Double the VL since we halved SEW.
8589 SDValue AVL = getVLOperand(Op);
8590 SDValue I32VL;
8591
8592 // Optimize for constant AVL
8593 if (isa<ConstantSDNode>(AVL)) {
8594 const auto [MinVLMAX, MaxVLMAX] =
8596
8597 uint64_t AVLInt = AVL->getAsZExtVal();
8598 if (AVLInt <= MinVLMAX) {
8599 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8600 } else if (AVLInt >= 2 * MaxVLMAX) {
8601 // Just set vl to VLMAX in this situation
8603 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8604 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8605 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8606 SDValue SETVLMAX = DAG.getTargetConstant(
8607 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8608 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8609 LMUL);
8610 } else {
8611 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8612 // is related to the hardware implementation.
8613 // So let the following code handle
8614 }
8615 }
8616 if (!I32VL) {
8618 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8619 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8620 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8621 SDValue SETVL =
8622 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8623 // Using vsetvli instruction to get actually used length which related to
8624 // the hardware implementation
8625 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8626 SEW, LMUL);
8627 I32VL =
8628 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8629 }
8630
8631 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8632
8633 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8634 // instructions.
8635 SDValue Passthru;
8636 if (IsMasked)
8637 Passthru = DAG.getUNDEF(I32VT);
8638 else
8639 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8640
8641 if (IntNo == Intrinsic::riscv_vslide1up ||
8642 IntNo == Intrinsic::riscv_vslide1up_mask) {
8643 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8644 ScalarHi, I32Mask, I32VL);
8645 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8646 ScalarLo, I32Mask, I32VL);
8647 } else {
8648 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8649 ScalarLo, I32Mask, I32VL);
8650 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8651 ScalarHi, I32Mask, I32VL);
8652 }
8653
8654 // Convert back to nxvXi64.
8655 Vec = DAG.getBitcast(VT, Vec);
8656
8657 if (!IsMasked)
8658 return Vec;
8659 // Apply mask after the operation.
8660 SDValue Mask = Operands[NumOps - 3];
8661 SDValue MaskedOff = Operands[1];
8662 // Assume Policy operand is the last operand.
8663 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8664 // We don't need to select maskedoff if it's undef.
8665 if (MaskedOff.isUndef())
8666 return Vec;
8667 // TAMU
8668 if (Policy == RISCVII::TAIL_AGNOSTIC)
8669 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8670 DAG.getUNDEF(VT), AVL);
8671 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8672 // It's fine because vmerge does not care mask policy.
8673 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8674 MaskedOff, AVL);
8675 }
8676 }
8677
8678 // We need to convert the scalar to a splat vector.
8679 SDValue VL = getVLOperand(Op);
8680 assert(VL.getValueType() == XLenVT);
8681 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8682 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8683}
8684
8685// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8686// scalable vector llvm.get.vector.length for now.
8687//
8688// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8689// (vscale * VF). The vscale and VF are independent of element width. We use
8690// SEW=8 for the vsetvli because it is the only element width that supports all
8691// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8692// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8693// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8694// SEW and LMUL are better for the surrounding vector instructions.
8696 const RISCVSubtarget &Subtarget) {
8697 MVT XLenVT = Subtarget.getXLenVT();
8698
8699 // The smallest LMUL is only valid for the smallest element width.
8700 const unsigned ElementWidth = 8;
8701
8702 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8703 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8704 // We don't support VF==1 with ELEN==32.
8705 [[maybe_unused]] unsigned MinVF =
8706 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8707
8708 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8709 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8710 "Unexpected VF");
8711
8712 bool Fractional = VF < LMul1VF;
8713 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8714 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8715 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8716
8717 SDLoc DL(N);
8718
8719 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8720 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8721
8722 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8723
8724 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8725 SDValue Res =
8726 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8727 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8728}
8729
8731 const RISCVSubtarget &Subtarget) {
8732 SDValue Op0 = N->getOperand(1);
8733 MVT OpVT = Op0.getSimpleValueType();
8734 MVT ContainerVT = OpVT;
8735 if (OpVT.isFixedLengthVector()) {
8736 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8737 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8738 }
8739 MVT XLenVT = Subtarget.getXLenVT();
8740 SDLoc DL(N);
8741 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8742 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8743 if (isOneConstant(N->getOperand(2)))
8744 return Res;
8745
8746 // Convert -1 to VL.
8747 SDValue Setcc =
8748 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8749 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8750 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8751}
8752
8753static inline void promoteVCIXScalar(const SDValue &Op,
8755 SelectionDAG &DAG) {
8756 const RISCVSubtarget &Subtarget =
8758
8759 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8760 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8761 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8762 SDLoc DL(Op);
8763
8765 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8766 if (!II || !II->hasScalarOperand())
8767 return;
8768
8769 unsigned SplatOp = II->ScalarOperand + 1;
8770 assert(SplatOp < Op.getNumOperands());
8771
8772 SDValue &ScalarOp = Operands[SplatOp];
8773 MVT OpVT = ScalarOp.getSimpleValueType();
8774 MVT XLenVT = Subtarget.getXLenVT();
8775
8776 // The code below is partially copied from lowerVectorIntrinsicScalars.
8777 // If this isn't a scalar, or its type is XLenVT we're done.
8778 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8779 return;
8780
8781 // Manually emit promote operation for scalar operation.
8782 if (OpVT.bitsLT(XLenVT)) {
8783 unsigned ExtOpc =
8784 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8785 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8786 }
8787
8788 return;
8789}
8790
8791static void processVCIXOperands(SDValue &OrigOp,
8793 SelectionDAG &DAG) {
8794 promoteVCIXScalar(OrigOp, Operands, DAG);
8795 const RISCVSubtarget &Subtarget =
8797 for (SDValue &V : Operands) {
8798 EVT ValType = V.getValueType();
8799 if (ValType.isVector() && ValType.isFloatingPoint()) {
8800 MVT InterimIVT =
8801 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8802 ValType.getVectorElementCount());
8803 V = DAG.getBitcast(InterimIVT, V);
8804 }
8805 if (ValType.isFixedLengthVector()) {
8806 MVT OpContainerVT = getContainerForFixedLengthVector(
8807 DAG, V.getSimpleValueType(), Subtarget);
8808 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8809 }
8810 }
8811}
8812
8813// LMUL * VLEN should be greater than or equal to EGS * SEW
8814static inline bool isValidEGW(int EGS, EVT VT,
8815 const RISCVSubtarget &Subtarget) {
8816 return (Subtarget.getRealMinVLen() *
8818 EGS * VT.getScalarSizeInBits();
8819}
8820
8821SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8822 SelectionDAG &DAG) const {
8823 unsigned IntNo = Op.getConstantOperandVal(0);
8824 SDLoc DL(Op);
8825 MVT XLenVT = Subtarget.getXLenVT();
8826
8827 switch (IntNo) {
8828 default:
8829 break; // Don't custom lower most intrinsics.
8830 case Intrinsic::thread_pointer: {
8831 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8832 return DAG.getRegister(RISCV::X4, PtrVT);
8833 }
8834 case Intrinsic::riscv_orc_b:
8835 case Intrinsic::riscv_brev8:
8836 case Intrinsic::riscv_sha256sig0:
8837 case Intrinsic::riscv_sha256sig1:
8838 case Intrinsic::riscv_sha256sum0:
8839 case Intrinsic::riscv_sha256sum1:
8840 case Intrinsic::riscv_sm3p0:
8841 case Intrinsic::riscv_sm3p1: {
8842 unsigned Opc;
8843 switch (IntNo) {
8844 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8845 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8846 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8847 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8848 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8849 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8850 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8851 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8852 }
8853
8854 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8855 SDValue NewOp =
8856 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8857 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8858 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8859 }
8860
8861 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8862 }
8863 case Intrinsic::riscv_sm4ks:
8864 case Intrinsic::riscv_sm4ed: {
8865 unsigned Opc =
8866 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8867
8868 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8869 SDValue NewOp0 =
8870 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8871 SDValue NewOp1 =
8872 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8873 SDValue Res =
8874 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8875 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8876 }
8877
8878 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8879 Op.getOperand(3));
8880 }
8881 case Intrinsic::riscv_zip:
8882 case Intrinsic::riscv_unzip: {
8883 unsigned Opc =
8884 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8885 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8886 }
8887 case Intrinsic::riscv_mopr: {
8888 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8889 SDValue NewOp =
8890 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8891 SDValue Res = DAG.getNode(
8892 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8893 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8894 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8895 }
8896 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8897 Op.getOperand(2));
8898 }
8899
8900 case Intrinsic::riscv_moprr: {
8901 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8902 SDValue NewOp0 =
8903 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8904 SDValue NewOp1 =
8905 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8906 SDValue Res = DAG.getNode(
8907 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8908 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8909 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8910 }
8911 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8912 Op.getOperand(2), Op.getOperand(3));
8913 }
8914 case Intrinsic::riscv_clmul:
8915 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8916 SDValue NewOp0 =
8917 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8918 SDValue NewOp1 =
8919 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8920 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8921 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8922 }
8923 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8924 Op.getOperand(2));
8925 case Intrinsic::riscv_clmulh:
8926 case Intrinsic::riscv_clmulr: {
8927 unsigned Opc =
8928 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8929 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8930 SDValue NewOp0 =
8931 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8932 SDValue NewOp1 =
8933 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8934 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8935 DAG.getConstant(32, DL, MVT::i64));
8936 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8937 DAG.getConstant(32, DL, MVT::i64));
8938 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8939 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8940 DAG.getConstant(32, DL, MVT::i64));
8941 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8942 }
8943
8944 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8945 }
8946 case Intrinsic::experimental_get_vector_length:
8947 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8948 case Intrinsic::experimental_cttz_elts:
8949 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8950 case Intrinsic::riscv_vmv_x_s: {
8951 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8952 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8953 }
8954 case Intrinsic::riscv_vfmv_f_s:
8955 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8956 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
8957 case Intrinsic::riscv_vmv_v_x:
8958 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8959 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8960 Subtarget);
8961 case Intrinsic::riscv_vfmv_v_f:
8962 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8963 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8964 case Intrinsic::riscv_vmv_s_x: {
8965 SDValue Scalar = Op.getOperand(2);
8966
8967 if (Scalar.getValueType().bitsLE(XLenVT)) {
8968 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8969 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8970 Op.getOperand(1), Scalar, Op.getOperand(3));
8971 }
8972
8973 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8974
8975 // This is an i64 value that lives in two scalar registers. We have to
8976 // insert this in a convoluted way. First we build vXi64 splat containing
8977 // the two values that we assemble using some bit math. Next we'll use
8978 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8979 // to merge element 0 from our splat into the source vector.
8980 // FIXME: This is probably not the best way to do this, but it is
8981 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8982 // point.
8983 // sw lo, (a0)
8984 // sw hi, 4(a0)
8985 // vlse vX, (a0)
8986 //
8987 // vid.v vVid
8988 // vmseq.vx mMask, vVid, 0
8989 // vmerge.vvm vDest, vSrc, vVal, mMask
8990 MVT VT = Op.getSimpleValueType();
8991 SDValue Vec = Op.getOperand(1);
8992 SDValue VL = getVLOperand(Op);
8993
8994 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8995 if (Op.getOperand(1).isUndef())
8996 return SplattedVal;
8997 SDValue SplattedIdx =
8998 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8999 DAG.getConstant(0, DL, MVT::i32), VL);
9000
9001 MVT MaskVT = getMaskTypeFor(VT);
9002 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9003 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9004 SDValue SelectCond =
9005 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9006 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9007 DAG.getUNDEF(MaskVT), Mask, VL});
9008 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9009 Vec, DAG.getUNDEF(VT), VL);
9010 }
9011 case Intrinsic::riscv_vfmv_s_f:
9012 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9013 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9014 // EGS * EEW >= 128 bits
9015 case Intrinsic::riscv_vaesdf_vv:
9016 case Intrinsic::riscv_vaesdf_vs:
9017 case Intrinsic::riscv_vaesdm_vv:
9018 case Intrinsic::riscv_vaesdm_vs:
9019 case Intrinsic::riscv_vaesef_vv:
9020 case Intrinsic::riscv_vaesef_vs:
9021 case Intrinsic::riscv_vaesem_vv:
9022 case Intrinsic::riscv_vaesem_vs:
9023 case Intrinsic::riscv_vaeskf1:
9024 case Intrinsic::riscv_vaeskf2:
9025 case Intrinsic::riscv_vaesz_vs:
9026 case Intrinsic::riscv_vsm4k:
9027 case Intrinsic::riscv_vsm4r_vv:
9028 case Intrinsic::riscv_vsm4r_vs: {
9029 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9030 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9031 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9032 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9033 return Op;
9034 }
9035 // EGS * EEW >= 256 bits
9036 case Intrinsic::riscv_vsm3c:
9037 case Intrinsic::riscv_vsm3me: {
9038 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9039 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9040 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9041 return Op;
9042 }
9043 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9044 case Intrinsic::riscv_vsha2ch:
9045 case Intrinsic::riscv_vsha2cl:
9046 case Intrinsic::riscv_vsha2ms: {
9047 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9048 !Subtarget.hasStdExtZvknhb())
9049 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9050 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9051 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9052 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9053 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9054 return Op;
9055 }
9056 case Intrinsic::riscv_sf_vc_v_x:
9057 case Intrinsic::riscv_sf_vc_v_i:
9058 case Intrinsic::riscv_sf_vc_v_xv:
9059 case Intrinsic::riscv_sf_vc_v_iv:
9060 case Intrinsic::riscv_sf_vc_v_vv:
9061 case Intrinsic::riscv_sf_vc_v_fv:
9062 case Intrinsic::riscv_sf_vc_v_xvv:
9063 case Intrinsic::riscv_sf_vc_v_ivv:
9064 case Intrinsic::riscv_sf_vc_v_vvv:
9065 case Intrinsic::riscv_sf_vc_v_fvv:
9066 case Intrinsic::riscv_sf_vc_v_xvw:
9067 case Intrinsic::riscv_sf_vc_v_ivw:
9068 case Intrinsic::riscv_sf_vc_v_vvw:
9069 case Intrinsic::riscv_sf_vc_v_fvw: {
9070 MVT VT = Op.getSimpleValueType();
9071
9072 SmallVector<SDValue> Operands{Op->op_values()};
9074
9075 MVT RetVT = VT;
9076 if (VT.isFixedLengthVector())
9078 else if (VT.isFloatingPoint())
9081
9082 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9083
9084 if (VT.isFixedLengthVector())
9085 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9086 else if (VT.isFloatingPoint())
9087 NewNode = DAG.getBitcast(VT, NewNode);
9088
9089 if (Op == NewNode)
9090 break;
9091
9092 return NewNode;
9093 }
9094 }
9095
9096 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9097}
9098
9100 unsigned Type) {
9101 SDLoc DL(Op);
9102 SmallVector<SDValue> Operands{Op->op_values()};
9103 Operands.erase(Operands.begin() + 1);
9104
9105 const RISCVSubtarget &Subtarget =
9107 MVT VT = Op.getSimpleValueType();
9108 MVT RetVT = VT;
9109 MVT FloatVT = VT;
9110
9111 if (VT.isFloatingPoint()) {
9112 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9114 FloatVT = RetVT;
9115 }
9116 if (VT.isFixedLengthVector())
9118 Subtarget);
9119
9121
9122 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9123 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9124 SDValue Chain = NewNode.getValue(1);
9125
9126 if (VT.isFixedLengthVector())
9127 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9128 if (VT.isFloatingPoint())
9129 NewNode = DAG.getBitcast(VT, NewNode);
9130
9131 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9132
9133 return NewNode;
9134}
9135
9137 unsigned Type) {
9138 SmallVector<SDValue> Operands{Op->op_values()};
9139 Operands.erase(Operands.begin() + 1);
9141
9142 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9143}
9144
9145SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9146 SelectionDAG &DAG) const {
9147 unsigned IntNo = Op.getConstantOperandVal(1);
9148 switch (IntNo) {
9149 default:
9150 break;
9151 case Intrinsic::riscv_masked_strided_load: {
9152 SDLoc DL(Op);
9153 MVT XLenVT = Subtarget.getXLenVT();
9154
9155 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9156 // the selection of the masked intrinsics doesn't do this for us.
9157 SDValue Mask = Op.getOperand(5);
9158 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9159
9160 MVT VT = Op->getSimpleValueType(0);
9161 MVT ContainerVT = VT;
9162 if (VT.isFixedLengthVector())
9163 ContainerVT = getContainerForFixedLengthVector(VT);
9164
9165 SDValue PassThru = Op.getOperand(2);
9166 if (!IsUnmasked) {
9167 MVT MaskVT = getMaskTypeFor(ContainerVT);
9168 if (VT.isFixedLengthVector()) {
9169 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9170 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9171 }
9172 }
9173
9174 auto *Load = cast<MemIntrinsicSDNode>(Op);
9175 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9176 SDValue Ptr = Op.getOperand(3);
9177 SDValue Stride = Op.getOperand(4);
9178 SDValue Result, Chain;
9179
9180 // TODO: We restrict this to unmasked loads currently in consideration of
9181 // the complexity of handling all falses masks.
9182 MVT ScalarVT = ContainerVT.getVectorElementType();
9183 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9184 SDValue ScalarLoad =
9185 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9186 ScalarVT, Load->getMemOperand());
9187 Chain = ScalarLoad.getValue(1);
9188 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9189 Subtarget);
9190 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9191 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9192 Load->getMemOperand());
9193 Chain = ScalarLoad.getValue(1);
9194 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9195 } else {
9196 SDValue IntID = DAG.getTargetConstant(
9197 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9198 XLenVT);
9199
9200 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9201 if (IsUnmasked)
9202 Ops.push_back(DAG.getUNDEF(ContainerVT));
9203 else
9204 Ops.push_back(PassThru);
9205 Ops.push_back(Ptr);
9206 Ops.push_back(Stride);
9207 if (!IsUnmasked)
9208 Ops.push_back(Mask);
9209 Ops.push_back(VL);
9210 if (!IsUnmasked) {
9211 SDValue Policy =
9213 Ops.push_back(Policy);
9214 }
9215
9216 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9217 Result =
9219 Load->getMemoryVT(), Load->getMemOperand());
9220 Chain = Result.getValue(1);
9221 }
9222 if (VT.isFixedLengthVector())
9223 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9224 return DAG.getMergeValues({Result, Chain}, DL);
9225 }
9226 case Intrinsic::riscv_seg2_load:
9227 case Intrinsic::riscv_seg3_load:
9228 case Intrinsic::riscv_seg4_load:
9229 case Intrinsic::riscv_seg5_load:
9230 case Intrinsic::riscv_seg6_load:
9231 case Intrinsic::riscv_seg7_load:
9232 case Intrinsic::riscv_seg8_load: {
9233 SDLoc DL(Op);
9234 static const Intrinsic::ID VlsegInts[7] = {
9235 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9236 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9237 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9238 Intrinsic::riscv_vlseg8};
9239 unsigned NF = Op->getNumValues() - 1;
9240 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9241 MVT XLenVT = Subtarget.getXLenVT();
9242 MVT VT = Op->getSimpleValueType(0);
9243 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9244
9245 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9246 Subtarget);
9247 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9248 auto *Load = cast<MemIntrinsicSDNode>(Op);
9249 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9250 ContainerVTs.push_back(MVT::Other);
9251 SDVTList VTs = DAG.getVTList(ContainerVTs);
9252 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9253 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9254 Ops.push_back(Op.getOperand(2));
9255 Ops.push_back(VL);
9256 SDValue Result =
9258 Load->getMemoryVT(), Load->getMemOperand());
9260 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9261 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9262 DAG, Subtarget));
9263 Results.push_back(Result.getValue(NF));
9264 return DAG.getMergeValues(Results, DL);
9265 }
9266 case Intrinsic::riscv_sf_vc_v_x_se:
9268 case Intrinsic::riscv_sf_vc_v_i_se:
9270 case Intrinsic::riscv_sf_vc_v_xv_se:
9272 case Intrinsic::riscv_sf_vc_v_iv_se:
9274 case Intrinsic::riscv_sf_vc_v_vv_se:
9276 case Intrinsic::riscv_sf_vc_v_fv_se:
9278 case Intrinsic::riscv_sf_vc_v_xvv_se:
9280 case Intrinsic::riscv_sf_vc_v_ivv_se:
9282 case Intrinsic::riscv_sf_vc_v_vvv_se:
9284 case Intrinsic::riscv_sf_vc_v_fvv_se:
9286 case Intrinsic::riscv_sf_vc_v_xvw_se:
9288 case Intrinsic::riscv_sf_vc_v_ivw_se:
9290 case Intrinsic::riscv_sf_vc_v_vvw_se:
9292 case Intrinsic::riscv_sf_vc_v_fvw_se:
9294 }
9295
9296 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9297}
9298
9299SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9300 SelectionDAG &DAG) const {
9301 unsigned IntNo = Op.getConstantOperandVal(1);
9302 switch (IntNo) {
9303 default:
9304 break;
9305 case Intrinsic::riscv_masked_strided_store: {
9306 SDLoc DL(Op);
9307 MVT XLenVT = Subtarget.getXLenVT();
9308
9309 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9310 // the selection of the masked intrinsics doesn't do this for us.
9311 SDValue Mask = Op.getOperand(5);
9312 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9313
9314 SDValue Val = Op.getOperand(2);
9315 MVT VT = Val.getSimpleValueType();
9316 MVT ContainerVT = VT;
9317 if (VT.isFixedLengthVector()) {
9318 ContainerVT = getContainerForFixedLengthVector(VT);
9319 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9320 }
9321 if (!IsUnmasked) {
9322 MVT MaskVT = getMaskTypeFor(ContainerVT);
9323 if (VT.isFixedLengthVector())
9324 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9325 }
9326
9327 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9328
9329 SDValue IntID = DAG.getTargetConstant(
9330 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9331 XLenVT);
9332
9333 auto *Store = cast<MemIntrinsicSDNode>(Op);
9334 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9335 Ops.push_back(Val);
9336 Ops.push_back(Op.getOperand(3)); // Ptr
9337 Ops.push_back(Op.getOperand(4)); // Stride
9338 if (!IsUnmasked)
9339 Ops.push_back(Mask);
9340 Ops.push_back(VL);
9341
9342 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9343 Ops, Store->getMemoryVT(),
9344 Store->getMemOperand());
9345 }
9346 case Intrinsic::riscv_seg2_store:
9347 case Intrinsic::riscv_seg3_store:
9348 case Intrinsic::riscv_seg4_store:
9349 case Intrinsic::riscv_seg5_store:
9350 case Intrinsic::riscv_seg6_store:
9351 case Intrinsic::riscv_seg7_store:
9352 case Intrinsic::riscv_seg8_store: {
9353 SDLoc DL(Op);
9354 static const Intrinsic::ID VssegInts[] = {
9355 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9356 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9357 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9358 Intrinsic::riscv_vsseg8};
9359 // Operands are (chain, int_id, vec*, ptr, vl)
9360 unsigned NF = Op->getNumOperands() - 4;
9361 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9362 MVT XLenVT = Subtarget.getXLenVT();
9363 MVT VT = Op->getOperand(2).getSimpleValueType();
9364 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9365
9366 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9367 Subtarget);
9368 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9369 SDValue Ptr = Op->getOperand(NF + 2);
9370
9371 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9372 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9373 for (unsigned i = 0; i < NF; i++)
9375 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9376 Ops.append({Ptr, VL});
9377
9378 return DAG.getMemIntrinsicNode(
9379 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9380 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9381 }
9382 case Intrinsic::riscv_sf_vc_xv_se:
9384 case Intrinsic::riscv_sf_vc_iv_se:
9386 case Intrinsic::riscv_sf_vc_vv_se:
9388 case Intrinsic::riscv_sf_vc_fv_se:
9390 case Intrinsic::riscv_sf_vc_xvv_se:
9392 case Intrinsic::riscv_sf_vc_ivv_se:
9394 case Intrinsic::riscv_sf_vc_vvv_se:
9396 case Intrinsic::riscv_sf_vc_fvv_se:
9398 case Intrinsic::riscv_sf_vc_xvw_se:
9400 case Intrinsic::riscv_sf_vc_ivw_se:
9402 case Intrinsic::riscv_sf_vc_vvw_se:
9404 case Intrinsic::riscv_sf_vc_fvw_se:
9406 }
9407
9408 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9409}
9410
9411static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9412 switch (ISDOpcode) {
9413 default:
9414 llvm_unreachable("Unhandled reduction");
9415 case ISD::VP_REDUCE_ADD:
9416 case ISD::VECREDUCE_ADD:
9418 case ISD::VP_REDUCE_UMAX:
9421 case ISD::VP_REDUCE_SMAX:
9424 case ISD::VP_REDUCE_UMIN:
9427 case ISD::VP_REDUCE_SMIN:
9430 case ISD::VP_REDUCE_AND:
9431 case ISD::VECREDUCE_AND:
9433 case ISD::VP_REDUCE_OR:
9434 case ISD::VECREDUCE_OR:
9436 case ISD::VP_REDUCE_XOR:
9437 case ISD::VECREDUCE_XOR:
9439 case ISD::VP_REDUCE_FADD:
9441 case ISD::VP_REDUCE_SEQ_FADD:
9443 case ISD::VP_REDUCE_FMAX:
9445 case ISD::VP_REDUCE_FMIN:
9447 }
9448
9449}
9450
9451SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9452 SelectionDAG &DAG,
9453 bool IsVP) const {
9454 SDLoc DL(Op);
9455 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9456 MVT VecVT = Vec.getSimpleValueType();
9457 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9458 Op.getOpcode() == ISD::VECREDUCE_OR ||
9459 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9460 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9461 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9462 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9463 "Unexpected reduction lowering");
9464
9465 MVT XLenVT = Subtarget.getXLenVT();
9466
9467 MVT ContainerVT = VecVT;
9468 if (VecVT.isFixedLengthVector()) {
9469 ContainerVT = getContainerForFixedLengthVector(VecVT);
9470 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9471 }
9472
9473 SDValue Mask, VL;
9474 if (IsVP) {
9475 Mask = Op.getOperand(2);
9476 VL = Op.getOperand(3);
9477 } else {
9478 std::tie(Mask, VL) =
9479 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9480 }
9481
9482 unsigned BaseOpc;
9484 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9485
9486 switch (Op.getOpcode()) {
9487 default:
9488 llvm_unreachable("Unhandled reduction");
9489 case ISD::VECREDUCE_AND:
9490 case ISD::VP_REDUCE_AND: {
9491 // vcpop ~x == 0
9492 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9493 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9494 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9495 CC = ISD::SETEQ;
9496 BaseOpc = ISD::AND;
9497 break;
9498 }
9499 case ISD::VECREDUCE_OR:
9500 case ISD::VP_REDUCE_OR:
9501 // vcpop x != 0
9502 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9503 CC = ISD::SETNE;
9504 BaseOpc = ISD::OR;
9505 break;
9506 case ISD::VECREDUCE_XOR:
9507 case ISD::VP_REDUCE_XOR: {
9508 // ((vcpop x) & 1) != 0
9509 SDValue One = DAG.getConstant(1, DL, XLenVT);
9510 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9511 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9512 CC = ISD::SETNE;
9513 BaseOpc = ISD::XOR;
9514 break;
9515 }
9516 }
9517
9518 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9519 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9520
9521 if (!IsVP)
9522 return SetCC;
9523
9524 // Now include the start value in the operation.
9525 // Note that we must return the start value when no elements are operated
9526 // upon. The vcpop instructions we've emitted in each case above will return
9527 // 0 for an inactive vector, and so we've already received the neutral value:
9528 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9529 // can simply include the start value.
9530 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9531}
9532
9533static bool isNonZeroAVL(SDValue AVL) {
9534 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9535 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9536 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9537 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9538}
9539
9540/// Helper to lower a reduction sequence of the form:
9541/// scalar = reduce_op vec, scalar_start
9542static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9543 SDValue StartValue, SDValue Vec, SDValue Mask,
9544 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9545 const RISCVSubtarget &Subtarget) {
9546 const MVT VecVT = Vec.getSimpleValueType();
9547 const MVT M1VT = getLMUL1VT(VecVT);
9548 const MVT XLenVT = Subtarget.getXLenVT();
9549 const bool NonZeroAVL = isNonZeroAVL(VL);
9550
9551 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9552 // or the original VT if fractional.
9553 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9554 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9555 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9556 // be the result of the reduction operation.
9557 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9558 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9559 DAG, Subtarget);
9560 if (M1VT != InnerVT)
9561 InitialValue =
9562 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9563 InitialValue, DAG.getVectorIdxConstant(0, DL));
9564 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9565 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9566 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9567 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9568 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9569 DAG.getVectorIdxConstant(0, DL));
9570}
9571
9572SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9573 SelectionDAG &DAG) const {
9574 SDLoc DL(Op);
9575 SDValue Vec = Op.getOperand(0);
9576 EVT VecEVT = Vec.getValueType();
9577
9578 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9579
9580 // Due to ordering in legalize types we may have a vector type that needs to
9581 // be split. Do that manually so we can get down to a legal type.
9582 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9584 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9585 VecEVT = Lo.getValueType();
9586 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9587 }
9588
9589 // TODO: The type may need to be widened rather than split. Or widened before
9590 // it can be split.
9591 if (!isTypeLegal(VecEVT))
9592 return SDValue();
9593
9594 MVT VecVT = VecEVT.getSimpleVT();
9595 MVT VecEltVT = VecVT.getVectorElementType();
9596 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9597
9598 MVT ContainerVT = VecVT;
9599 if (VecVT.isFixedLengthVector()) {
9600 ContainerVT = getContainerForFixedLengthVector(VecVT);
9601 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9602 }
9603
9604 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9605
9606 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9607 switch (BaseOpc) {
9608 case ISD::AND:
9609 case ISD::OR:
9610 case ISD::UMAX:
9611 case ISD::UMIN:
9612 case ISD::SMAX:
9613 case ISD::SMIN:
9614 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9615 DAG.getVectorIdxConstant(0, DL));
9616 }
9617 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9618 Mask, VL, DL, DAG, Subtarget);
9619}
9620
9621// Given a reduction op, this function returns the matching reduction opcode,
9622// the vector SDValue and the scalar SDValue required to lower this to a
9623// RISCVISD node.
9624static std::tuple<unsigned, SDValue, SDValue>
9626 const RISCVSubtarget &Subtarget) {
9627 SDLoc DL(Op);
9628 auto Flags = Op->getFlags();
9629 unsigned Opcode = Op.getOpcode();
9630 switch (Opcode) {
9631 default:
9632 llvm_unreachable("Unhandled reduction");
9633 case ISD::VECREDUCE_FADD: {
9634 // Use positive zero if we can. It is cheaper to materialize.
9635 SDValue Zero =
9636 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9637 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9638 }
9640 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9641 Op.getOperand(0));
9645 case ISD::VECREDUCE_FMAX: {
9646 SDValue Front =
9647 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9648 DAG.getVectorIdxConstant(0, DL));
9649 unsigned RVVOpc =
9650 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9653 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9654 }
9655 }
9656}
9657
9658SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9659 SelectionDAG &DAG) const {
9660 SDLoc DL(Op);
9661 MVT VecEltVT = Op.getSimpleValueType();
9662
9663 unsigned RVVOpcode;
9664 SDValue VectorVal, ScalarVal;
9665 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9666 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9667 MVT VecVT = VectorVal.getSimpleValueType();
9668
9669 MVT ContainerVT = VecVT;
9670 if (VecVT.isFixedLengthVector()) {
9671 ContainerVT = getContainerForFixedLengthVector(VecVT);
9672 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9673 }
9674
9675 MVT ResVT = Op.getSimpleValueType();
9676 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9677 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9678 VL, DL, DAG, Subtarget);
9679 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9680 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9681 return Res;
9682
9683 if (Op->getFlags().hasNoNaNs())
9684 return Res;
9685
9686 // Force output to NaN if any element is Nan.
9687 SDValue IsNan =
9688 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9689 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9690 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9691 MVT XLenVT = Subtarget.getXLenVT();
9692 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9693 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9694 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9695 return DAG.getSelect(
9696 DL, ResVT, NoNaNs, Res,
9698 ResVT));
9699}
9700
9701SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9702 SelectionDAG &DAG) const {
9703 SDLoc DL(Op);
9704 SDValue Vec = Op.getOperand(1);
9705 EVT VecEVT = Vec.getValueType();
9706
9707 // TODO: The type may need to be widened rather than split. Or widened before
9708 // it can be split.
9709 if (!isTypeLegal(VecEVT))
9710 return SDValue();
9711
9712 MVT VecVT = VecEVT.getSimpleVT();
9713 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9714
9715 if (VecVT.isFixedLengthVector()) {
9716 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9717 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9718 }
9719
9720 SDValue VL = Op.getOperand(3);
9721 SDValue Mask = Op.getOperand(2);
9722 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9723 Vec, Mask, VL, DL, DAG, Subtarget);
9724}
9725
9726SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9727 SelectionDAG &DAG) const {
9728 SDValue Vec = Op.getOperand(0);
9729 SDValue SubVec = Op.getOperand(1);
9730 MVT VecVT = Vec.getSimpleValueType();
9731 MVT SubVecVT = SubVec.getSimpleValueType();
9732
9733 SDLoc DL(Op);
9734 MVT XLenVT = Subtarget.getXLenVT();
9735 unsigned OrigIdx = Op.getConstantOperandVal(2);
9736 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9737
9738 // We don't have the ability to slide mask vectors up indexed by their i1
9739 // elements; the smallest we can do is i8. Often we are able to bitcast to
9740 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9741 // into a scalable one, we might not necessarily have enough scalable
9742 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9743 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9744 (OrigIdx != 0 || !Vec.isUndef())) {
9745 if (VecVT.getVectorMinNumElements() >= 8 &&
9746 SubVecVT.getVectorMinNumElements() >= 8) {
9747 assert(OrigIdx % 8 == 0 && "Invalid index");
9748 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9749 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9750 "Unexpected mask vector lowering");
9751 OrigIdx /= 8;
9752 SubVecVT =
9753 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9754 SubVecVT.isScalableVector());
9755 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9756 VecVT.isScalableVector());
9757 Vec = DAG.getBitcast(VecVT, Vec);
9758 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9759 } else {
9760 // We can't slide this mask vector up indexed by its i1 elements.
9761 // This poses a problem when we wish to insert a scalable vector which
9762 // can't be re-expressed as a larger type. Just choose the slow path and
9763 // extend to a larger type, then truncate back down.
9764 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9765 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9766 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9767 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9768 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9769 Op.getOperand(2));
9770 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9771 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9772 }
9773 }
9774
9775 // If the subvector vector is a fixed-length type, we cannot use subregister
9776 // manipulation to simplify the codegen; we don't know which register of a
9777 // LMUL group contains the specific subvector as we only know the minimum
9778 // register size. Therefore we must slide the vector group up the full
9779 // amount.
9780 if (SubVecVT.isFixedLengthVector()) {
9781 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9782 return Op;
9783 MVT ContainerVT = VecVT;
9784 if (VecVT.isFixedLengthVector()) {
9785 ContainerVT = getContainerForFixedLengthVector(VecVT);
9786 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9787 }
9788
9789 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9790 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9791 DAG.getUNDEF(ContainerVT), SubVec,
9792 DAG.getVectorIdxConstant(0, DL));
9793 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9794 return DAG.getBitcast(Op.getValueType(), SubVec);
9795 }
9796
9797 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9798 DAG.getUNDEF(ContainerVT), SubVec,
9799 DAG.getVectorIdxConstant(0, DL));
9800 SDValue Mask =
9801 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9802 // Set the vector length to only the number of elements we care about. Note
9803 // that for slideup this includes the offset.
9804 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9805 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9806
9807 // Use tail agnostic policy if we're inserting over Vec's tail.
9809 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9810 Policy = RISCVII::TAIL_AGNOSTIC;
9811
9812 // If we're inserting into the lowest elements, use a tail undisturbed
9813 // vmv.v.v.
9814 if (OrigIdx == 0) {
9815 SubVec =
9816 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9817 } else {
9818 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9819 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9820 SlideupAmt, Mask, VL, Policy);
9821 }
9822
9823 if (VecVT.isFixedLengthVector())
9824 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9825 return DAG.getBitcast(Op.getValueType(), SubVec);
9826 }
9827
9828 unsigned SubRegIdx, RemIdx;
9829 std::tie(SubRegIdx, RemIdx) =
9831 VecVT, SubVecVT, OrigIdx, TRI);
9832
9833 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9834 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9835 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9836 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9837
9838 // 1. If the Idx has been completely eliminated and this subvector's size is
9839 // a vector register or a multiple thereof, or the surrounding elements are
9840 // undef, then this is a subvector insert which naturally aligns to a vector
9841 // register. These can easily be handled using subregister manipulation.
9842 // 2. If the subvector is smaller than a vector register, then the insertion
9843 // must preserve the undisturbed elements of the register. We do this by
9844 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9845 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9846 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9847 // LMUL=1 type back into the larger vector (resolving to another subregister
9848 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9849 // to avoid allocating a large register group to hold our subvector.
9850 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9851 return Op;
9852
9853 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9854 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9855 // (in our case undisturbed). This means we can set up a subvector insertion
9856 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9857 // size of the subvector.
9858 MVT InterSubVT = VecVT;
9859 SDValue AlignedExtract = Vec;
9860 unsigned AlignedIdx = OrigIdx - RemIdx;
9861 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9862 InterSubVT = getLMUL1VT(VecVT);
9863 // Extract a subvector equal to the nearest full vector register type. This
9864 // should resolve to a EXTRACT_SUBREG instruction.
9865 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9866 DAG.getVectorIdxConstant(AlignedIdx, DL));
9867 }
9868
9869 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9870 DAG.getUNDEF(InterSubVT), SubVec,
9871 DAG.getVectorIdxConstant(0, DL));
9872
9873 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9874
9875 ElementCount EndIndex =
9877 VL = computeVLMax(SubVecVT, DL, DAG);
9878
9879 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9881 if (EndIndex == InterSubVT.getVectorElementCount())
9882 Policy = RISCVII::TAIL_AGNOSTIC;
9883
9884 // If we're inserting into the lowest elements, use a tail undisturbed
9885 // vmv.v.v.
9886 if (RemIdx == 0) {
9887 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9888 SubVec, VL);
9889 } else {
9890 SDValue SlideupAmt =
9891 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9892
9893 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9894 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9895
9896 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9897 SlideupAmt, Mask, VL, Policy);
9898 }
9899
9900 // If required, insert this subvector back into the correct vector register.
9901 // This should resolve to an INSERT_SUBREG instruction.
9902 if (VecVT.bitsGT(InterSubVT))
9903 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9904 DAG.getVectorIdxConstant(AlignedIdx, DL));
9905
9906 // We might have bitcast from a mask type: cast back to the original type if
9907 // required.
9908 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9909}
9910
9911SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9912 SelectionDAG &DAG) const {
9913 SDValue Vec = Op.getOperand(0);
9914 MVT SubVecVT = Op.getSimpleValueType();
9915 MVT VecVT = Vec.getSimpleValueType();
9916
9917 SDLoc DL(Op);
9918 MVT XLenVT = Subtarget.getXLenVT();
9919 unsigned OrigIdx = Op.getConstantOperandVal(1);
9920 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9921
9922 // We don't have the ability to slide mask vectors down indexed by their i1
9923 // elements; the smallest we can do is i8. Often we are able to bitcast to
9924 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9925 // from a scalable one, we might not necessarily have enough scalable
9926 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9927 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9928 if (VecVT.getVectorMinNumElements() >= 8 &&
9929 SubVecVT.getVectorMinNumElements() >= 8) {
9930 assert(OrigIdx % 8 == 0 && "Invalid index");
9931 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9932 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9933 "Unexpected mask vector lowering");
9934 OrigIdx /= 8;
9935 SubVecVT =
9936 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9937 SubVecVT.isScalableVector());
9938 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9939 VecVT.isScalableVector());
9940 Vec = DAG.getBitcast(VecVT, Vec);
9941 } else {
9942 // We can't slide this mask vector down, indexed by its i1 elements.
9943 // This poses a problem when we wish to extract a scalable vector which
9944 // can't be re-expressed as a larger type. Just choose the slow path and
9945 // extend to a larger type, then truncate back down.
9946 // TODO: We could probably improve this when extracting certain fixed
9947 // from fixed, where we can extract as i8 and shift the correct element
9948 // right to reach the desired subvector?
9949 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9950 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9951 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9952 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9953 Op.getOperand(1));
9954 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9955 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9956 }
9957 }
9958
9959 // With an index of 0 this is a cast-like subvector, which can be performed
9960 // with subregister operations.
9961 if (OrigIdx == 0)
9962 return Op;
9963
9964 const auto VLen = Subtarget.getRealVLen();
9965
9966 // If the subvector vector is a fixed-length type and we don't know VLEN
9967 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9968 // don't know which register of a LMUL group contains the specific subvector
9969 // as we only know the minimum register size. Therefore we must slide the
9970 // vector group down the full amount.
9971 if (SubVecVT.isFixedLengthVector() && !VLen) {
9972 MVT ContainerVT = VecVT;
9973 if (VecVT.isFixedLengthVector()) {
9974 ContainerVT = getContainerForFixedLengthVector(VecVT);
9975 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9976 }
9977
9978 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9979 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9980 if (auto ShrunkVT =
9981 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9982 ContainerVT = *ShrunkVT;
9983 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9984 DAG.getVectorIdxConstant(0, DL));
9985 }
9986
9987 SDValue Mask =
9988 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9989 // Set the vector length to only the number of elements we care about. This
9990 // avoids sliding down elements we're going to discard straight away.
9991 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9992 Subtarget);
9993 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9994 SDValue Slidedown =
9995 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9996 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9997 // Now we can use a cast-like subvector extract to get the result.
9998 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9999 DAG.getVectorIdxConstant(0, DL));
10000 return DAG.getBitcast(Op.getValueType(), Slidedown);
10001 }
10002
10003 if (VecVT.isFixedLengthVector()) {
10004 VecVT = getContainerForFixedLengthVector(VecVT);
10005 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10006 }
10007
10008 MVT ContainerSubVecVT = SubVecVT;
10009 if (SubVecVT.isFixedLengthVector())
10010 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10011
10012 unsigned SubRegIdx;
10013 ElementCount RemIdx;
10014 // extract_subvector scales the index by vscale if the subvector is scalable,
10015 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10016 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10017 if (SubVecVT.isFixedLengthVector()) {
10018 assert(VLen);
10019 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10020 auto Decompose =
10022 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10023 SubRegIdx = Decompose.first;
10024 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10025 (OrigIdx % Vscale));
10026 } else {
10027 auto Decompose =
10029 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10030 SubRegIdx = Decompose.first;
10031 RemIdx = ElementCount::getScalable(Decompose.second);
10032 }
10033
10034 // If the Idx has been completely eliminated then this is a subvector extract
10035 // which naturally aligns to a vector register. These can easily be handled
10036 // using subregister manipulation.
10037 if (RemIdx.isZero()) {
10038 if (SubVecVT.isFixedLengthVector()) {
10039 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10040 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10041 }
10042 return Op;
10043 }
10044
10045 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10046 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10047 // divide exactly.
10048 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10049 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10050
10051 // If the vector type is an LMUL-group type, extract a subvector equal to the
10052 // nearest full vector register type.
10053 MVT InterSubVT = VecVT;
10054 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10055 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10056 // we should have successfully decomposed the extract into a subregister.
10057 assert(SubRegIdx != RISCV::NoSubRegister);
10058 InterSubVT = getLMUL1VT(VecVT);
10059 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10060 }
10061
10062 // Slide this vector register down by the desired number of elements in order
10063 // to place the desired subvector starting at element 0.
10064 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10065 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10066 if (SubVecVT.isFixedLengthVector())
10067 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10068 Subtarget);
10069 SDValue Slidedown =
10070 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10071 Vec, SlidedownAmt, Mask, VL);
10072
10073 // Now the vector is in the right position, extract our final subvector. This
10074 // should resolve to a COPY.
10075 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10076 DAG.getVectorIdxConstant(0, DL));
10077
10078 // We might have bitcast from a mask type: cast back to the original type if
10079 // required.
10080 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10081}
10082
10083// Widen a vector's operands to i8, then truncate its results back to the
10084// original type, typically i1. All operand and result types must be the same.
10086 SelectionDAG &DAG) {
10087 MVT VT = N.getSimpleValueType();
10088 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10090 for (SDValue Op : N->ops()) {
10091 assert(Op.getSimpleValueType() == VT &&
10092 "Operands and result must be same type");
10093 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10094 }
10095
10096 unsigned NumVals = N->getNumValues();
10097
10099 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10100 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10101 SmallVector<SDValue, 4> TruncVals;
10102 for (unsigned I = 0; I < NumVals; I++) {
10103 TruncVals.push_back(
10104 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10105 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10106 }
10107
10108 if (TruncVals.size() > 1)
10109 return DAG.getMergeValues(TruncVals, DL);
10110 return TruncVals.front();
10111}
10112
10113SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10114 SelectionDAG &DAG) const {
10115 SDLoc DL(Op);
10116 MVT VecVT = Op.getSimpleValueType();
10117
10118 assert(VecVT.isScalableVector() &&
10119 "vector_interleave on non-scalable vector!");
10120
10121 // 1 bit element vectors need to be widened to e8
10122 if (VecVT.getVectorElementType() == MVT::i1)
10123 return widenVectorOpsToi8(Op, DL, DAG);
10124
10125 // If the VT is LMUL=8, we need to split and reassemble.
10126 if (VecVT.getSizeInBits().getKnownMinValue() ==
10127 (8 * RISCV::RVVBitsPerBlock)) {
10128 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10129 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10130 EVT SplitVT = Op0Lo.getValueType();
10131
10133 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10135 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10136
10137 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10138 ResLo.getValue(0), ResHi.getValue(0));
10139 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10140 ResHi.getValue(1));
10141 return DAG.getMergeValues({Even, Odd}, DL);
10142 }
10143
10144 // Concatenate the two vectors as one vector to deinterleave
10145 MVT ConcatVT =
10148 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10149 Op.getOperand(0), Op.getOperand(1));
10150
10151 // We want to operate on all lanes, so get the mask and VL and mask for it
10152 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10153 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10154
10155 // We can deinterleave through vnsrl.wi if the element type is smaller than
10156 // ELEN
10157 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10158 SDValue Even =
10159 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10160 SDValue Odd =
10161 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10162 return DAG.getMergeValues({Even, Odd}, DL);
10163 }
10164
10165 // For the indices, use the same SEW to avoid an extra vsetvli
10166 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10167 // Create a vector of even indices {0, 2, 4, ...}
10168 SDValue EvenIdx =
10169 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10170 // Create a vector of odd indices {1, 3, 5, ... }
10171 SDValue OddIdx =
10172 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10173
10174 // Gather the even and odd elements into two separate vectors
10175 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10176 Concat, EvenIdx, Passthru, Mask, VL);
10177 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10178 Concat, OddIdx, Passthru, Mask, VL);
10179
10180 // Extract the result half of the gather for even and odd
10181 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10182 DAG.getVectorIdxConstant(0, DL));
10183 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10184 DAG.getVectorIdxConstant(0, DL));
10185
10186 return DAG.getMergeValues({Even, Odd}, DL);
10187}
10188
10189SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10190 SelectionDAG &DAG) const {
10191 SDLoc DL(Op);
10192 MVT VecVT = Op.getSimpleValueType();
10193
10194 assert(VecVT.isScalableVector() &&
10195 "vector_interleave on non-scalable vector!");
10196
10197 // i1 vectors need to be widened to i8
10198 if (VecVT.getVectorElementType() == MVT::i1)
10199 return widenVectorOpsToi8(Op, DL, DAG);
10200
10201 MVT XLenVT = Subtarget.getXLenVT();
10202 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10203
10204 // If the VT is LMUL=8, we need to split and reassemble.
10205 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10206 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10207 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10208 EVT SplitVT = Op0Lo.getValueType();
10209
10211 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10213 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10214
10215 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10216 ResLo.getValue(0), ResLo.getValue(1));
10217 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10218 ResHi.getValue(0), ResHi.getValue(1));
10219 return DAG.getMergeValues({Lo, Hi}, DL);
10220 }
10221
10222 SDValue Interleaved;
10223
10224 // If the element type is smaller than ELEN, then we can interleave with
10225 // vwaddu.vv and vwmaccu.vx
10226 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10227 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10228 DAG, Subtarget);
10229 } else {
10230 // Otherwise, fallback to using vrgathere16.vv
10231 MVT ConcatVT =
10234 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10235 Op.getOperand(0), Op.getOperand(1));
10236
10237 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10238
10239 // 0 1 2 3 4 5 6 7 ...
10240 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10241
10242 // 1 1 1 1 1 1 1 1 ...
10243 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10244
10245 // 1 0 1 0 1 0 1 0 ...
10246 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10247 OddMask = DAG.getSetCC(
10248 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10249 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10251
10252 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10253
10254 // Build up the index vector for interleaving the concatenated vector
10255 // 0 0 1 1 2 2 3 3 ...
10256 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10257 // 0 n 1 n+1 2 n+2 3 n+3 ...
10258 Idx =
10259 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10260
10261 // Then perform the interleave
10262 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10263 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10264 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10265 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10266 }
10267
10268 // Extract the two halves from the interleaved result
10269 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10270 DAG.getVectorIdxConstant(0, DL));
10271 SDValue Hi = DAG.getNode(
10272 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10274
10275 return DAG.getMergeValues({Lo, Hi}, DL);
10276}
10277
10278// Lower step_vector to the vid instruction. Any non-identity step value must
10279// be accounted for my manual expansion.
10280SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10281 SelectionDAG &DAG) const {
10282 SDLoc DL(Op);
10283 MVT VT = Op.getSimpleValueType();
10284 assert(VT.isScalableVector() && "Expected scalable vector");
10285 MVT XLenVT = Subtarget.getXLenVT();
10286 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10287 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10288 uint64_t StepValImm = Op.getConstantOperandVal(0);
10289 if (StepValImm != 1) {
10290 if (isPowerOf2_64(StepValImm)) {
10291 SDValue StepVal =
10292 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10293 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10294 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10295 } else {
10296 SDValue StepVal = lowerScalarSplat(
10297 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10298 VL, VT, DL, DAG, Subtarget);
10299 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10300 }
10301 }
10302 return StepVec;
10303}
10304
10305// Implement vector_reverse using vrgather.vv with indices determined by
10306// subtracting the id of each element from (VLMAX-1). This will convert
10307// the indices like so:
10308// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10309// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10310SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10311 SelectionDAG &DAG) const {
10312 SDLoc DL(Op);
10313 MVT VecVT = Op.getSimpleValueType();
10314 if (VecVT.getVectorElementType() == MVT::i1) {
10315 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10316 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10317 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10318 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10319 }
10320 unsigned EltSize = VecVT.getScalarSizeInBits();
10321 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10322 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10323 unsigned MaxVLMAX =
10324 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10325
10326 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10327 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10328
10329 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10330 // to use vrgatherei16.vv.
10331 // TODO: It's also possible to use vrgatherei16.vv for other types to
10332 // decrease register width for the index calculation.
10333 if (MaxVLMAX > 256 && EltSize == 8) {
10334 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10335 // Reverse each half, then reassemble them in reverse order.
10336 // NOTE: It's also possible that after splitting that VLMAX no longer
10337 // requires vrgatherei16.vv.
10338 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10339 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10340 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10341 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10342 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10343 // Reassemble the low and high pieces reversed.
10344 // FIXME: This is a CONCAT_VECTORS.
10345 SDValue Res =
10346 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10347 DAG.getVectorIdxConstant(0, DL));
10348 return DAG.getNode(
10349 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10350 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10351 }
10352
10353 // Just promote the int type to i16 which will double the LMUL.
10354 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10355 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10356 }
10357
10358 MVT XLenVT = Subtarget.getXLenVT();
10359 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10360
10361 // Calculate VLMAX-1 for the desired SEW.
10362 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10363 computeVLMax(VecVT, DL, DAG),
10364 DAG.getConstant(1, DL, XLenVT));
10365
10366 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10367 bool IsRV32E64 =
10368 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10369 SDValue SplatVL;
10370 if (!IsRV32E64)
10371 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10372 else
10373 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10374 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10375
10376 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10377 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10378 DAG.getUNDEF(IntVT), Mask, VL);
10379
10380 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10381 DAG.getUNDEF(VecVT), Mask, VL);
10382}
10383
10384SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10385 SelectionDAG &DAG) const {
10386 SDLoc DL(Op);
10387 SDValue V1 = Op.getOperand(0);
10388 SDValue V2 = Op.getOperand(1);
10389 MVT XLenVT = Subtarget.getXLenVT();
10390 MVT VecVT = Op.getSimpleValueType();
10391
10392 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10393
10394 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10395 SDValue DownOffset, UpOffset;
10396 if (ImmValue >= 0) {
10397 // The operand is a TargetConstant, we need to rebuild it as a regular
10398 // constant.
10399 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10400 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10401 } else {
10402 // The operand is a TargetConstant, we need to rebuild it as a regular
10403 // constant rather than negating the original operand.
10404 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10405 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10406 }
10407
10408 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10409
10410 SDValue SlideDown =
10411 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10412 DownOffset, TrueMask, UpOffset);
10413 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10414 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10416}
10417
10418SDValue
10419RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10420 SelectionDAG &DAG) const {
10421 SDLoc DL(Op);
10422 auto *Load = cast<LoadSDNode>(Op);
10423
10425 Load->getMemoryVT(),
10426 *Load->getMemOperand()) &&
10427 "Expecting a correctly-aligned load");
10428
10429 MVT VT = Op.getSimpleValueType();
10430 MVT XLenVT = Subtarget.getXLenVT();
10431 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10432
10433 // If we know the exact VLEN and our fixed length vector completely fills
10434 // the container, use a whole register load instead.
10435 const auto [MinVLMAX, MaxVLMAX] =
10436 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10437 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10438 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10439 MachineMemOperand *MMO = Load->getMemOperand();
10440 SDValue NewLoad =
10441 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10442 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10443 MMO->getAAInfo(), MMO->getRanges());
10444 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10445 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10446 }
10447
10448 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10449
10450 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10451 SDValue IntID = DAG.getTargetConstant(
10452 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10453 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10454 if (!IsMaskOp)
10455 Ops.push_back(DAG.getUNDEF(ContainerVT));
10456 Ops.push_back(Load->getBasePtr());
10457 Ops.push_back(VL);
10458 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10459 SDValue NewLoad =
10461 Load->getMemoryVT(), Load->getMemOperand());
10462
10463 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10464 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10465}
10466
10467SDValue
10468RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10469 SelectionDAG &DAG) const {
10470 SDLoc DL(Op);
10471 auto *Store = cast<StoreSDNode>(Op);
10472
10474 Store->getMemoryVT(),
10475 *Store->getMemOperand()) &&
10476 "Expecting a correctly-aligned store");
10477
10478 SDValue StoreVal = Store->getValue();
10479 MVT VT = StoreVal.getSimpleValueType();
10480 MVT XLenVT = Subtarget.getXLenVT();
10481
10482 // If the size less than a byte, we need to pad with zeros to make a byte.
10483 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10484 VT = MVT::v8i1;
10485 StoreVal =
10486 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10487 StoreVal, DAG.getVectorIdxConstant(0, DL));
10488 }
10489
10490 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10491
10492 SDValue NewValue =
10493 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10494
10495
10496 // If we know the exact VLEN and our fixed length vector completely fills
10497 // the container, use a whole register store instead.
10498 const auto [MinVLMAX, MaxVLMAX] =
10499 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10500 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10501 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10502 MachineMemOperand *MMO = Store->getMemOperand();
10503 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10504 MMO->getPointerInfo(), MMO->getBaseAlign(),
10505 MMO->getFlags(), MMO->getAAInfo());
10506 }
10507
10508 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10509 Subtarget);
10510
10511 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10512 SDValue IntID = DAG.getTargetConstant(
10513 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10514 return DAG.getMemIntrinsicNode(
10515 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10516 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10517 Store->getMemoryVT(), Store->getMemOperand());
10518}
10519
10520SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10521 SelectionDAG &DAG) const {
10522 SDLoc DL(Op);
10523 MVT VT = Op.getSimpleValueType();
10524
10525 const auto *MemSD = cast<MemSDNode>(Op);
10526 EVT MemVT = MemSD->getMemoryVT();
10527 MachineMemOperand *MMO = MemSD->getMemOperand();
10528 SDValue Chain = MemSD->getChain();
10529 SDValue BasePtr = MemSD->getBasePtr();
10530
10531 SDValue Mask, PassThru, VL;
10532 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10533 Mask = VPLoad->getMask();
10534 PassThru = DAG.getUNDEF(VT);
10535 VL = VPLoad->getVectorLength();
10536 } else {
10537 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10538 Mask = MLoad->getMask();
10539 PassThru = MLoad->getPassThru();
10540 }
10541
10542 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10543
10544 MVT XLenVT = Subtarget.getXLenVT();
10545
10546 MVT ContainerVT = VT;
10547 if (VT.isFixedLengthVector()) {
10548 ContainerVT = getContainerForFixedLengthVector(VT);
10549 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10550 if (!IsUnmasked) {
10551 MVT MaskVT = getMaskTypeFor(ContainerVT);
10552 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10553 }
10554 }
10555
10556 if (!VL)
10557 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10558
10559 unsigned IntID =
10560 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10561 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10562 if (IsUnmasked)
10563 Ops.push_back(DAG.getUNDEF(ContainerVT));
10564 else
10565 Ops.push_back(PassThru);
10566 Ops.push_back(BasePtr);
10567 if (!IsUnmasked)
10568 Ops.push_back(Mask);
10569 Ops.push_back(VL);
10570 if (!IsUnmasked)
10572
10573 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10574
10575 SDValue Result =
10576 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10577 Chain = Result.getValue(1);
10578
10579 if (VT.isFixedLengthVector())
10580 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10581
10582 return DAG.getMergeValues({Result, Chain}, DL);
10583}
10584
10585SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10586 SelectionDAG &DAG) const {
10587 SDLoc DL(Op);
10588
10589 const auto *MemSD = cast<MemSDNode>(Op);
10590 EVT MemVT = MemSD->getMemoryVT();
10591 MachineMemOperand *MMO = MemSD->getMemOperand();
10592 SDValue Chain = MemSD->getChain();
10593 SDValue BasePtr = MemSD->getBasePtr();
10594 SDValue Val, Mask, VL;
10595
10596 bool IsCompressingStore = false;
10597 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10598 Val = VPStore->getValue();
10599 Mask = VPStore->getMask();
10600 VL = VPStore->getVectorLength();
10601 } else {
10602 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10603 Val = MStore->getValue();
10604 Mask = MStore->getMask();
10605 IsCompressingStore = MStore->isCompressingStore();
10606 }
10607
10608 bool IsUnmasked =
10609 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10610
10611 MVT VT = Val.getSimpleValueType();
10612 MVT XLenVT = Subtarget.getXLenVT();
10613
10614 MVT ContainerVT = VT;
10615 if (VT.isFixedLengthVector()) {
10616 ContainerVT = getContainerForFixedLengthVector(VT);
10617
10618 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10619 if (!IsUnmasked || IsCompressingStore) {
10620 MVT MaskVT = getMaskTypeFor(ContainerVT);
10621 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10622 }
10623 }
10624
10625 if (!VL)
10626 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10627
10628 if (IsCompressingStore) {
10629 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10630 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10631 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10632 VL =
10633 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10634 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10635 }
10636
10637 unsigned IntID =
10638 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10639 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10640 Ops.push_back(Val);
10641 Ops.push_back(BasePtr);
10642 if (!IsUnmasked)
10643 Ops.push_back(Mask);
10644 Ops.push_back(VL);
10645
10647 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10648}
10649
10650SDValue
10651RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10652 SelectionDAG &DAG) const {
10653 MVT InVT = Op.getOperand(0).getSimpleValueType();
10654 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10655
10656 MVT VT = Op.getSimpleValueType();
10657
10658 SDValue Op1 =
10659 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10660 SDValue Op2 =
10661 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10662
10663 SDLoc DL(Op);
10664 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10665 DAG, Subtarget);
10666 MVT MaskVT = getMaskTypeFor(ContainerVT);
10667
10668 SDValue Cmp =
10669 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10670 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10671
10672 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10673}
10674
10675SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10676 SelectionDAG &DAG) const {
10677 unsigned Opc = Op.getOpcode();
10678 SDLoc DL(Op);
10679 SDValue Chain = Op.getOperand(0);
10680 SDValue Op1 = Op.getOperand(1);
10681 SDValue Op2 = Op.getOperand(2);
10682 SDValue CC = Op.getOperand(3);
10683 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10684 MVT VT = Op.getSimpleValueType();
10685 MVT InVT = Op1.getSimpleValueType();
10686
10687 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10688 // condition code.
10689 if (Opc == ISD::STRICT_FSETCCS) {
10690 // Expand strict_fsetccs(x, oeq) to
10691 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10692 SDVTList VTList = Op->getVTList();
10693 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10694 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10695 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10696 Op2, OLECCVal);
10697 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10698 Op1, OLECCVal);
10699 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10700 Tmp1.getValue(1), Tmp2.getValue(1));
10701 // Tmp1 and Tmp2 might be the same node.
10702 if (Tmp1 != Tmp2)
10703 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10704 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10705 }
10706
10707 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10708 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10709 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10710 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10711 Op2, OEQCCVal);
10712 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10713 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10714 }
10715 }
10716
10717 MVT ContainerInVT = InVT;
10718 if (InVT.isFixedLengthVector()) {
10719 ContainerInVT = getContainerForFixedLengthVector(InVT);
10720 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10721 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10722 }
10723 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10724
10725 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10726
10727 SDValue Res;
10728 if (Opc == ISD::STRICT_FSETCC &&
10729 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10730 CCVal == ISD::SETOLE)) {
10731 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10732 // active when both input elements are ordered.
10733 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10734 SDValue OrderMask1 = DAG.getNode(
10735 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10736 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10737 True, VL});
10738 SDValue OrderMask2 = DAG.getNode(
10739 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10740 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10741 True, VL});
10742 Mask =
10743 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10744 // Use Mask as the merge operand to let the result be 0 if either of the
10745 // inputs is unordered.
10747 DAG.getVTList(MaskVT, MVT::Other),
10748 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10749 } else {
10750 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10752 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10753 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10754 }
10755
10756 if (VT.isFixedLengthVector()) {
10757 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10758 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10759 }
10760 return Res;
10761}
10762
10763// Lower vector ABS to smax(X, sub(0, X)).
10764SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10765 SDLoc DL(Op);
10766 MVT VT = Op.getSimpleValueType();
10767 SDValue X = Op.getOperand(0);
10768
10769 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10770 "Unexpected type for ISD::ABS");
10771
10772 MVT ContainerVT = VT;
10773 if (VT.isFixedLengthVector()) {
10774 ContainerVT = getContainerForFixedLengthVector(VT);
10775 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10776 }
10777
10778 SDValue Mask, VL;
10779 if (Op->getOpcode() == ISD::VP_ABS) {
10780 Mask = Op->getOperand(1);
10781 if (VT.isFixedLengthVector())
10782 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10783 Subtarget);
10784 VL = Op->getOperand(2);
10785 } else
10786 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10787
10788 SDValue SplatZero = DAG.getNode(
10789 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10790 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10791 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10792 DAG.getUNDEF(ContainerVT), Mask, VL);
10793 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10794 DAG.getUNDEF(ContainerVT), Mask, VL);
10795
10796 if (VT.isFixedLengthVector())
10797 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10798 return Max;
10799}
10800
10801SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10802 SDValue Op, SelectionDAG &DAG) const {
10803 SDLoc DL(Op);
10804 MVT VT = Op.getSimpleValueType();
10805 SDValue Mag = Op.getOperand(0);
10806 SDValue Sign = Op.getOperand(1);
10807 assert(Mag.getValueType() == Sign.getValueType() &&
10808 "Can only handle COPYSIGN with matching types.");
10809
10810 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10811 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10812 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10813
10814 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10815
10816 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10817 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10818
10819 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10820}
10821
10822SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10823 SDValue Op, SelectionDAG &DAG) const {
10824 MVT VT = Op.getSimpleValueType();
10825 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10826
10827 MVT I1ContainerVT =
10828 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10829
10830 SDValue CC =
10831 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10832 SDValue Op1 =
10833 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10834 SDValue Op2 =
10835 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10836
10837 SDLoc DL(Op);
10838 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10839
10840 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10841 Op2, DAG.getUNDEF(ContainerVT), VL);
10842
10843 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10844}
10845
10846SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10847 SelectionDAG &DAG) const {
10848 unsigned NewOpc = getRISCVVLOp(Op);
10849 bool HasMergeOp = hasMergeOp(NewOpc);
10850 bool HasMask = hasMaskOp(NewOpc);
10851
10852 MVT VT = Op.getSimpleValueType();
10853 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10854
10855 // Create list of operands by converting existing ones to scalable types.
10857 for (const SDValue &V : Op->op_values()) {
10858 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10859
10860 // Pass through non-vector operands.
10861 if (!V.getValueType().isVector()) {
10862 Ops.push_back(V);
10863 continue;
10864 }
10865
10866 // "cast" fixed length vector to a scalable vector.
10867 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10868 "Only fixed length vectors are supported!");
10869 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10870 }
10871
10872 SDLoc DL(Op);
10873 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10874 if (HasMergeOp)
10875 Ops.push_back(DAG.getUNDEF(ContainerVT));
10876 if (HasMask)
10877 Ops.push_back(Mask);
10878 Ops.push_back(VL);
10879
10880 // StrictFP operations have two result values. Their lowered result should
10881 // have same result count.
10882 if (Op->isStrictFPOpcode()) {
10883 SDValue ScalableRes =
10884 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10885 Op->getFlags());
10886 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10887 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10888 }
10889
10890 SDValue ScalableRes =
10891 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10892 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10893}
10894
10895// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10896// * Operands of each node are assumed to be in the same order.
10897// * The EVL operand is promoted from i32 to i64 on RV64.
10898// * Fixed-length vectors are converted to their scalable-vector container
10899// types.
10900SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10901 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10902 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10903
10904 SDLoc DL(Op);
10905 MVT VT = Op.getSimpleValueType();
10907
10908 MVT ContainerVT = VT;
10909 if (VT.isFixedLengthVector())
10910 ContainerVT = getContainerForFixedLengthVector(VT);
10911
10912 for (const auto &OpIdx : enumerate(Op->ops())) {
10913 SDValue V = OpIdx.value();
10914 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10915 // Add dummy merge value before the mask. Or if there isn't a mask, before
10916 // EVL.
10917 if (HasMergeOp) {
10918 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10919 if (MaskIdx) {
10920 if (*MaskIdx == OpIdx.index())
10921 Ops.push_back(DAG.getUNDEF(ContainerVT));
10922 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10923 OpIdx.index()) {
10924 if (Op.getOpcode() == ISD::VP_MERGE) {
10925 // For VP_MERGE, copy the false operand instead of an undef value.
10926 Ops.push_back(Ops.back());
10927 } else {
10928 assert(Op.getOpcode() == ISD::VP_SELECT);
10929 // For VP_SELECT, add an undef value.
10930 Ops.push_back(DAG.getUNDEF(ContainerVT));
10931 }
10932 }
10933 }
10934 // Pass through operands which aren't fixed-length vectors.
10935 if (!V.getValueType().isFixedLengthVector()) {
10936 Ops.push_back(V);
10937 continue;
10938 }
10939 // "cast" fixed length vector to a scalable vector.
10940 MVT OpVT = V.getSimpleValueType();
10941 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10942 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10943 "Only fixed length vectors are supported!");
10944 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10945 }
10946
10947 if (!VT.isFixedLengthVector())
10948 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10949
10950 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10951
10952 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10953}
10954
10955SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10956 SelectionDAG &DAG) const {
10957 SDLoc DL(Op);
10958 MVT VT = Op.getSimpleValueType();
10959
10960 SDValue Src = Op.getOperand(0);
10961 // NOTE: Mask is dropped.
10962 SDValue VL = Op.getOperand(2);
10963
10964 MVT ContainerVT = VT;
10965 if (VT.isFixedLengthVector()) {
10966 ContainerVT = getContainerForFixedLengthVector(VT);
10967 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10968 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10969 }
10970
10971 MVT XLenVT = Subtarget.getXLenVT();
10972 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10973 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10974 DAG.getUNDEF(ContainerVT), Zero, VL);
10975
10976 SDValue SplatValue = DAG.getConstant(
10977 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10978 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10979 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10980
10981 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10982 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10983 if (!VT.isFixedLengthVector())
10984 return Result;
10985 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10986}
10987
10988SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10989 SelectionDAG &DAG) const {
10990 SDLoc DL(Op);
10991 MVT VT = Op.getSimpleValueType();
10992
10993 SDValue Op1 = Op.getOperand(0);
10994 SDValue Op2 = Op.getOperand(1);
10995 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10996 // NOTE: Mask is dropped.
10997 SDValue VL = Op.getOperand(4);
10998
10999 MVT ContainerVT = VT;
11000 if (VT.isFixedLengthVector()) {
11001 ContainerVT = getContainerForFixedLengthVector(VT);
11002 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11003 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11004 }
11005
11007 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11008
11009 switch (Condition) {
11010 default:
11011 break;
11012 // X != Y --> (X^Y)
11013 case ISD::SETNE:
11014 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11015 break;
11016 // X == Y --> ~(X^Y)
11017 case ISD::SETEQ: {
11018 SDValue Temp =
11019 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11020 Result =
11021 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11022 break;
11023 }
11024 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11025 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11026 case ISD::SETGT:
11027 case ISD::SETULT: {
11028 SDValue Temp =
11029 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11030 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11031 break;
11032 }
11033 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11034 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11035 case ISD::SETLT:
11036 case ISD::SETUGT: {
11037 SDValue Temp =
11038 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11039 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11040 break;
11041 }
11042 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11043 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11044 case ISD::SETGE:
11045 case ISD::SETULE: {
11046 SDValue Temp =
11047 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11048 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11049 break;
11050 }
11051 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11052 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11053 case ISD::SETLE:
11054 case ISD::SETUGE: {
11055 SDValue Temp =
11056 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11057 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11058 break;
11059 }
11060 }
11061
11062 if (!VT.isFixedLengthVector())
11063 return Result;
11064 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11065}
11066
11067// Lower Floating-Point/Integer Type-Convert VP SDNodes
11068SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11069 SelectionDAG &DAG) const {
11070 SDLoc DL(Op);
11071
11072 SDValue Src = Op.getOperand(0);
11073 SDValue Mask = Op.getOperand(1);
11074 SDValue VL = Op.getOperand(2);
11075 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11076
11077 MVT DstVT = Op.getSimpleValueType();
11078 MVT SrcVT = Src.getSimpleValueType();
11079 if (DstVT.isFixedLengthVector()) {
11080 DstVT = getContainerForFixedLengthVector(DstVT);
11081 SrcVT = getContainerForFixedLengthVector(SrcVT);
11082 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11083 MVT MaskVT = getMaskTypeFor(DstVT);
11084 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11085 }
11086
11087 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11088 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11089
11091 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11092 if (SrcVT.isInteger()) {
11093 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11094
11095 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11098
11099 // Do we need to do any pre-widening before converting?
11100 if (SrcEltSize == 1) {
11101 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11102 MVT XLenVT = Subtarget.getXLenVT();
11103 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11104 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11105 DAG.getUNDEF(IntVT), Zero, VL);
11106 SDValue One = DAG.getConstant(
11107 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11108 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11109 DAG.getUNDEF(IntVT), One, VL);
11110 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11111 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11112 } else if (DstEltSize > (2 * SrcEltSize)) {
11113 // Widen before converting.
11114 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11115 DstVT.getVectorElementCount());
11116 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11117 }
11118
11119 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11120 } else {
11121 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11122 "Wrong input/output vector types");
11123
11124 // Convert f16 to f32 then convert f32 to i64.
11125 if (DstEltSize > (2 * SrcEltSize)) {
11126 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11127 MVT InterimFVT =
11128 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11129 Src =
11130 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11131 }
11132
11133 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11134 }
11135 } else { // Narrowing + Conversion
11136 if (SrcVT.isInteger()) {
11137 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11138 // First do a narrowing convert to an FP type half the size, then round
11139 // the FP type to a small FP type if needed.
11140
11141 MVT InterimFVT = DstVT;
11142 if (SrcEltSize > (2 * DstEltSize)) {
11143 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11144 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11145 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11146 }
11147
11148 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11149
11150 if (InterimFVT != DstVT) {
11151 Src = Result;
11152 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11153 }
11154 } else {
11155 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11156 "Wrong input/output vector types");
11157 // First do a narrowing conversion to an integer half the size, then
11158 // truncate if needed.
11159
11160 if (DstEltSize == 1) {
11161 // First convert to the same size integer, then convert to mask using
11162 // setcc.
11163 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11164 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11165 DstVT.getVectorElementCount());
11166 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11167
11168 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11169 // otherwise the conversion was undefined.
11170 MVT XLenVT = Subtarget.getXLenVT();
11171 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11172 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11173 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11174 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11175 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11176 DAG.getUNDEF(DstVT), Mask, VL});
11177 } else {
11178 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11179 DstVT.getVectorElementCount());
11180
11181 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11182
11183 while (InterimIVT != DstVT) {
11184 SrcEltSize /= 2;
11185 Src = Result;
11186 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11187 DstVT.getVectorElementCount());
11188 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11189 Src, Mask, VL);
11190 }
11191 }
11192 }
11193 }
11194
11195 MVT VT = Op.getSimpleValueType();
11196 if (!VT.isFixedLengthVector())
11197 return Result;
11198 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11199}
11200
11201SDValue
11202RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11203 SelectionDAG &DAG) const {
11204 SDLoc DL(Op);
11205
11206 SDValue Op1 = Op.getOperand(0);
11207 SDValue Op2 = Op.getOperand(1);
11208 SDValue Offset = Op.getOperand(2);
11209 SDValue Mask = Op.getOperand(3);
11210 SDValue EVL1 = Op.getOperand(4);
11211 SDValue EVL2 = Op.getOperand(5);
11212
11213 const MVT XLenVT = Subtarget.getXLenVT();
11214 MVT VT = Op.getSimpleValueType();
11215 MVT ContainerVT = VT;
11216 if (VT.isFixedLengthVector()) {
11217 ContainerVT = getContainerForFixedLengthVector(VT);
11218 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11219 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11220 MVT MaskVT = getMaskTypeFor(ContainerVT);
11221 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11222 }
11223
11224 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11225 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11226
11227 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11228 if (IsMaskVector) {
11229 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11230
11231 // Expand input operands
11232 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11233 DAG.getUNDEF(ContainerVT),
11234 DAG.getConstant(1, DL, XLenVT), EVL1);
11235 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11236 DAG.getUNDEF(ContainerVT),
11237 DAG.getConstant(0, DL, XLenVT), EVL1);
11238 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11239 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11240
11241 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11242 DAG.getUNDEF(ContainerVT),
11243 DAG.getConstant(1, DL, XLenVT), EVL2);
11244 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11245 DAG.getUNDEF(ContainerVT),
11246 DAG.getConstant(0, DL, XLenVT), EVL2);
11247 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11248 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11249 }
11250
11251 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11252 SDValue DownOffset, UpOffset;
11253 if (ImmValue >= 0) {
11254 // The operand is a TargetConstant, we need to rebuild it as a regular
11255 // constant.
11256 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11257 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11258 } else {
11259 // The operand is a TargetConstant, we need to rebuild it as a regular
11260 // constant rather than negating the original operand.
11261 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11262 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11263 }
11264
11265 SDValue SlideDown =
11266 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11267 Op1, DownOffset, Mask, UpOffset);
11268 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11269 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11270
11271 if (IsMaskVector) {
11272 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11273 Result = DAG.getNode(
11274 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11275 {Result, DAG.getConstant(0, DL, ContainerVT),
11276 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11277 Mask, EVL2});
11278 }
11279
11280 if (!VT.isFixedLengthVector())
11281 return Result;
11282 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11283}
11284
11285SDValue
11286RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11287 SelectionDAG &DAG) const {
11288 SDLoc DL(Op);
11289 MVT VT = Op.getSimpleValueType();
11290 MVT XLenVT = Subtarget.getXLenVT();
11291
11292 SDValue Op1 = Op.getOperand(0);
11293 SDValue Mask = Op.getOperand(1);
11294 SDValue EVL = Op.getOperand(2);
11295
11296 MVT ContainerVT = VT;
11297 if (VT.isFixedLengthVector()) {
11298 ContainerVT = getContainerForFixedLengthVector(VT);
11299 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11300 MVT MaskVT = getMaskTypeFor(ContainerVT);
11301 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11302 }
11303
11304 MVT GatherVT = ContainerVT;
11305 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11306 // Check if we are working with mask vectors
11307 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11308 if (IsMaskVector) {
11309 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11310
11311 // Expand input operand
11312 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11313 DAG.getUNDEF(IndicesVT),
11314 DAG.getConstant(1, DL, XLenVT), EVL);
11315 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11316 DAG.getUNDEF(IndicesVT),
11317 DAG.getConstant(0, DL, XLenVT), EVL);
11318 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11319 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11320 }
11321
11322 unsigned EltSize = GatherVT.getScalarSizeInBits();
11323 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11324 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11325 unsigned MaxVLMAX =
11326 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11327
11328 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11329 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11330 // to use vrgatherei16.vv.
11331 // TODO: It's also possible to use vrgatherei16.vv for other types to
11332 // decrease register width for the index calculation.
11333 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11334 if (MaxVLMAX > 256 && EltSize == 8) {
11335 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11336 // Split the vector in half and reverse each half using a full register
11337 // reverse.
11338 // Swap the halves and concatenate them.
11339 // Slide the concatenated result by (VLMax - VL).
11340 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11341 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11342 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11343
11344 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11345 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11346
11347 // Reassemble the low and high pieces reversed.
11348 // NOTE: this Result is unmasked (because we do not need masks for
11349 // shuffles). If in the future this has to change, we can use a SELECT_VL
11350 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11351 SDValue Result =
11352 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11353
11354 // Slide off any elements from past EVL that were reversed into the low
11355 // elements.
11356 unsigned MinElts = GatherVT.getVectorMinNumElements();
11357 SDValue VLMax =
11358 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11359 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11360
11361 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11362 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11363
11364 if (IsMaskVector) {
11365 // Truncate Result back to a mask vector
11366 Result =
11367 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11368 {Result, DAG.getConstant(0, DL, GatherVT),
11370 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11371 }
11372
11373 if (!VT.isFixedLengthVector())
11374 return Result;
11375 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11376 }
11377
11378 // Just promote the int type to i16 which will double the LMUL.
11379 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11380 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11381 }
11382
11383 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11384 SDValue VecLen =
11385 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11386 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11387 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11388 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11389 DAG.getUNDEF(IndicesVT), Mask, EVL);
11390 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11391 DAG.getUNDEF(GatherVT), Mask, EVL);
11392
11393 if (IsMaskVector) {
11394 // Truncate Result back to a mask vector
11395 Result = DAG.getNode(
11396 RISCVISD::SETCC_VL, DL, ContainerVT,
11397 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11398 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11399 }
11400
11401 if (!VT.isFixedLengthVector())
11402 return Result;
11403 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11404}
11405
11406SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11407 SelectionDAG &DAG) const {
11408 MVT VT = Op.getSimpleValueType();
11409 if (VT.getVectorElementType() != MVT::i1)
11410 return lowerVPOp(Op, DAG);
11411
11412 // It is safe to drop mask parameter as masked-off elements are undef.
11413 SDValue Op1 = Op->getOperand(0);
11414 SDValue Op2 = Op->getOperand(1);
11415 SDValue VL = Op->getOperand(3);
11416
11417 MVT ContainerVT = VT;
11418 const bool IsFixed = VT.isFixedLengthVector();
11419 if (IsFixed) {
11420 ContainerVT = getContainerForFixedLengthVector(VT);
11421 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11422 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11423 }
11424
11425 SDLoc DL(Op);
11426 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11427 if (!IsFixed)
11428 return Val;
11429 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11430}
11431
11432SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11433 SelectionDAG &DAG) const {
11434 SDLoc DL(Op);
11435 MVT XLenVT = Subtarget.getXLenVT();
11436 MVT VT = Op.getSimpleValueType();
11437 MVT ContainerVT = VT;
11438 if (VT.isFixedLengthVector())
11439 ContainerVT = getContainerForFixedLengthVector(VT);
11440
11441 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11442
11443 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11444 // Check if the mask is known to be all ones
11445 SDValue Mask = VPNode->getMask();
11446 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11447
11448 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11449 : Intrinsic::riscv_vlse_mask,
11450 DL, XLenVT);
11451 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11452 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11453 VPNode->getStride()};
11454 if (!IsUnmasked) {
11455 if (VT.isFixedLengthVector()) {
11456 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11457 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11458 }
11459 Ops.push_back(Mask);
11460 }
11461 Ops.push_back(VPNode->getVectorLength());
11462 if (!IsUnmasked) {
11463 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11464 Ops.push_back(Policy);
11465 }
11466
11467 SDValue Result =
11469 VPNode->getMemoryVT(), VPNode->getMemOperand());
11470 SDValue Chain = Result.getValue(1);
11471
11472 if (VT.isFixedLengthVector())
11473 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11474
11475 return DAG.getMergeValues({Result, Chain}, DL);
11476}
11477
11478SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11479 SelectionDAG &DAG) const {
11480 SDLoc DL(Op);
11481 MVT XLenVT = Subtarget.getXLenVT();
11482
11483 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11484 SDValue StoreVal = VPNode->getValue();
11485 MVT VT = StoreVal.getSimpleValueType();
11486 MVT ContainerVT = VT;
11487 if (VT.isFixedLengthVector()) {
11488 ContainerVT = getContainerForFixedLengthVector(VT);
11489 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11490 }
11491
11492 // Check if the mask is known to be all ones
11493 SDValue Mask = VPNode->getMask();
11494 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11495
11496 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11497 : Intrinsic::riscv_vsse_mask,
11498 DL, XLenVT);
11499 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11500 VPNode->getBasePtr(), VPNode->getStride()};
11501 if (!IsUnmasked) {
11502 if (VT.isFixedLengthVector()) {
11503 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11504 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11505 }
11506 Ops.push_back(Mask);
11507 }
11508 Ops.push_back(VPNode->getVectorLength());
11509
11510 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11511 Ops, VPNode->getMemoryVT(),
11512 VPNode->getMemOperand());
11513}
11514
11515// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11516// matched to a RVV indexed load. The RVV indexed load instructions only
11517// support the "unsigned unscaled" addressing mode; indices are implicitly
11518// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11519// signed or scaled indexing is extended to the XLEN value type and scaled
11520// accordingly.
11521SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11522 SelectionDAG &DAG) const {
11523 SDLoc DL(Op);
11524 MVT VT = Op.getSimpleValueType();
11525
11526 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11527 EVT MemVT = MemSD->getMemoryVT();
11528 MachineMemOperand *MMO = MemSD->getMemOperand();
11529 SDValue Chain = MemSD->getChain();
11530 SDValue BasePtr = MemSD->getBasePtr();
11531
11532 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11533 SDValue Index, Mask, PassThru, VL;
11534
11535 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11536 Index = VPGN->getIndex();
11537 Mask = VPGN->getMask();
11538 PassThru = DAG.getUNDEF(VT);
11539 VL = VPGN->getVectorLength();
11540 // VP doesn't support extending loads.
11542 } else {
11543 // Else it must be a MGATHER.
11544 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11545 Index = MGN->getIndex();
11546 Mask = MGN->getMask();
11547 PassThru = MGN->getPassThru();
11548 LoadExtType = MGN->getExtensionType();
11549 }
11550
11551 MVT IndexVT = Index.getSimpleValueType();
11552 MVT XLenVT = Subtarget.getXLenVT();
11553
11555 "Unexpected VTs!");
11556 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11557 // Targets have to explicitly opt-in for extending vector loads.
11558 assert(LoadExtType == ISD::NON_EXTLOAD &&
11559 "Unexpected extending MGATHER/VP_GATHER");
11560
11561 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11562 // the selection of the masked intrinsics doesn't do this for us.
11563 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11564
11565 MVT ContainerVT = VT;
11566 if (VT.isFixedLengthVector()) {
11567 ContainerVT = getContainerForFixedLengthVector(VT);
11568 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11569 ContainerVT.getVectorElementCount());
11570
11571 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11572
11573 if (!IsUnmasked) {
11574 MVT MaskVT = getMaskTypeFor(ContainerVT);
11575 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11576 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11577 }
11578 }
11579
11580 if (!VL)
11581 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11582
11583 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11584 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11585 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11586 }
11587
11588 unsigned IntID =
11589 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11590 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11591 if (IsUnmasked)
11592 Ops.push_back(DAG.getUNDEF(ContainerVT));
11593 else
11594 Ops.push_back(PassThru);
11595 Ops.push_back(BasePtr);
11596 Ops.push_back(Index);
11597 if (!IsUnmasked)
11598 Ops.push_back(Mask);
11599 Ops.push_back(VL);
11600 if (!IsUnmasked)
11602
11603 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11604 SDValue Result =
11605 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11606 Chain = Result.getValue(1);
11607
11608 if (VT.isFixedLengthVector())
11609 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11610
11611 return DAG.getMergeValues({Result, Chain}, DL);
11612}
11613
11614// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11615// matched to a RVV indexed store. The RVV indexed store instructions only
11616// support the "unsigned unscaled" addressing mode; indices are implicitly
11617// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11618// signed or scaled indexing is extended to the XLEN value type and scaled
11619// accordingly.
11620SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11621 SelectionDAG &DAG) const {
11622 SDLoc DL(Op);
11623 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11624 EVT MemVT = MemSD->getMemoryVT();
11625 MachineMemOperand *MMO = MemSD->getMemOperand();
11626 SDValue Chain = MemSD->getChain();
11627 SDValue BasePtr = MemSD->getBasePtr();
11628
11629 [[maybe_unused]] bool IsTruncatingStore = false;
11630 SDValue Index, Mask, Val, VL;
11631
11632 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11633 Index = VPSN->getIndex();
11634 Mask = VPSN->getMask();
11635 Val = VPSN->getValue();
11636 VL = VPSN->getVectorLength();
11637 // VP doesn't support truncating stores.
11638 IsTruncatingStore = false;
11639 } else {
11640 // Else it must be a MSCATTER.
11641 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11642 Index = MSN->getIndex();
11643 Mask = MSN->getMask();
11644 Val = MSN->getValue();
11645 IsTruncatingStore = MSN->isTruncatingStore();
11646 }
11647
11648 MVT VT = Val.getSimpleValueType();
11649 MVT IndexVT = Index.getSimpleValueType();
11650 MVT XLenVT = Subtarget.getXLenVT();
11651
11653 "Unexpected VTs!");
11654 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11655 // Targets have to explicitly opt-in for extending vector loads and
11656 // truncating vector stores.
11657 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11658
11659 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11660 // the selection of the masked intrinsics doesn't do this for us.
11661 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11662
11663 MVT ContainerVT = VT;
11664 if (VT.isFixedLengthVector()) {
11665 ContainerVT = getContainerForFixedLengthVector(VT);
11666 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11667 ContainerVT.getVectorElementCount());
11668
11669 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11670 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11671
11672 if (!IsUnmasked) {
11673 MVT MaskVT = getMaskTypeFor(ContainerVT);
11674 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11675 }
11676 }
11677
11678 if (!VL)
11679 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11680
11681 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11682 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11683 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11684 }
11685
11686 unsigned IntID =
11687 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11688 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11689 Ops.push_back(Val);
11690 Ops.push_back(BasePtr);
11691 Ops.push_back(Index);
11692 if (!IsUnmasked)
11693 Ops.push_back(Mask);
11694 Ops.push_back(VL);
11695
11697 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11698}
11699
11700SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11701 SelectionDAG &DAG) const {
11702 const MVT XLenVT = Subtarget.getXLenVT();
11703 SDLoc DL(Op);
11704 SDValue Chain = Op->getOperand(0);
11705 SDValue SysRegNo = DAG.getTargetConstant(
11706 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11707 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11708 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11709
11710 // Encoding used for rounding mode in RISC-V differs from that used in
11711 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11712 // table, which consists of a sequence of 4-bit fields, each representing
11713 // corresponding FLT_ROUNDS mode.
11714 static const int Table =
11720
11721 SDValue Shift =
11722 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11723 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11724 DAG.getConstant(Table, DL, XLenVT), Shift);
11725 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11726 DAG.getConstant(7, DL, XLenVT));
11727
11728 return DAG.getMergeValues({Masked, Chain}, DL);
11729}
11730
11731SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11732 SelectionDAG &DAG) const {
11733 const MVT XLenVT = Subtarget.getXLenVT();
11734 SDLoc DL(Op);
11735 SDValue Chain = Op->getOperand(0);
11736 SDValue RMValue = Op->getOperand(1);
11737 SDValue SysRegNo = DAG.getTargetConstant(
11738 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11739
11740 // Encoding used for rounding mode in RISC-V differs from that used in
11741 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11742 // a table, which consists of a sequence of 4-bit fields, each representing
11743 // corresponding RISC-V mode.
11744 static const unsigned Table =
11750
11751 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11752
11753 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11754 DAG.getConstant(2, DL, XLenVT));
11755 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11756 DAG.getConstant(Table, DL, XLenVT), Shift);
11757 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11758 DAG.getConstant(0x7, DL, XLenVT));
11759 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11760 RMValue);
11761}
11762
11763SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11764 SelectionDAG &DAG) const {
11766
11767 bool isRISCV64 = Subtarget.is64Bit();
11768 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11769
11770 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11771 return DAG.getFrameIndex(FI, PtrVT);
11772}
11773
11774// Returns the opcode of the target-specific SDNode that implements the 32-bit
11775// form of the given Opcode.
11776static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11777 switch (Opcode) {
11778 default:
11779 llvm_unreachable("Unexpected opcode");
11780 case ISD::SHL:
11781 return RISCVISD::SLLW;
11782 case ISD::SRA:
11783 return RISCVISD::SRAW;
11784 case ISD::SRL:
11785 return RISCVISD::SRLW;
11786 case ISD::SDIV:
11787 return RISCVISD::DIVW;
11788 case ISD::UDIV:
11789 return RISCVISD::DIVUW;
11790 case ISD::UREM:
11791 return RISCVISD::REMUW;
11792 case ISD::ROTL:
11793 return RISCVISD::ROLW;
11794 case ISD::ROTR:
11795 return RISCVISD::RORW;
11796 }
11797}
11798
11799// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11800// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11801// otherwise be promoted to i64, making it difficult to select the
11802// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11803// type i8/i16/i32 is lost.
11805 unsigned ExtOpc = ISD::ANY_EXTEND) {
11806 SDLoc DL(N);
11807 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11808 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11809 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11810 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11811 // ReplaceNodeResults requires we maintain the same type for the return value.
11812 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11813}
11814
11815// Converts the given 32-bit operation to a i64 operation with signed extension
11816// semantic to reduce the signed extension instructions.
11818 SDLoc DL(N);
11819 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11820 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11821 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11822 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11823 DAG.getValueType(MVT::i32));
11824 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11825}
11826
11829 SelectionDAG &DAG) const {
11830 SDLoc DL(N);
11831 switch (N->getOpcode()) {
11832 default:
11833 llvm_unreachable("Don't know how to custom type legalize this operation!");
11836 case ISD::FP_TO_SINT:
11837 case ISD::FP_TO_UINT: {
11838 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11839 "Unexpected custom legalisation");
11840 bool IsStrict = N->isStrictFPOpcode();
11841 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11842 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11843 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11844 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11846 if (!isTypeLegal(Op0.getValueType()))
11847 return;
11848 if (IsStrict) {
11849 SDValue Chain = N->getOperand(0);
11850 // In absense of Zfh, promote f16 to f32, then convert.
11851 if (Op0.getValueType() == MVT::f16 &&
11852 !Subtarget.hasStdExtZfhOrZhinx()) {
11853 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11854 {Chain, Op0});
11855 Chain = Op0.getValue(1);
11856 }
11857 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11859 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11860 SDValue Res = DAG.getNode(
11861 Opc, DL, VTs, Chain, Op0,
11862 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11863 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11864 Results.push_back(Res.getValue(1));
11865 return;
11866 }
11867 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11868 // convert.
11869 if ((Op0.getValueType() == MVT::f16 &&
11870 !Subtarget.hasStdExtZfhOrZhinx()) ||
11871 Op0.getValueType() == MVT::bf16)
11872 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11873
11874 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11875 SDValue Res =
11876 DAG.getNode(Opc, DL, MVT::i64, Op0,
11877 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11878 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11879 return;
11880 }
11881 // If the FP type needs to be softened, emit a library call using the 'si'
11882 // version. If we left it to default legalization we'd end up with 'di'. If
11883 // the FP type doesn't need to be softened just let generic type
11884 // legalization promote the result type.
11885 RTLIB::Libcall LC;
11886 if (IsSigned)
11887 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11888 else
11889 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11890 MakeLibCallOptions CallOptions;
11891 EVT OpVT = Op0.getValueType();
11892 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11893 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11894 SDValue Result;
11895 std::tie(Result, Chain) =
11896 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11897 Results.push_back(Result);
11898 if (IsStrict)
11899 Results.push_back(Chain);
11900 break;
11901 }
11902 case ISD::LROUND: {
11903 SDValue Op0 = N->getOperand(0);
11904 EVT Op0VT = Op0.getValueType();
11905 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11907 if (!isTypeLegal(Op0VT))
11908 return;
11909
11910 // In absense of Zfh, promote f16 to f32, then convert.
11911 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11912 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11913
11914 SDValue Res =
11915 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11916 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11917 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11918 return;
11919 }
11920 // If the FP type needs to be softened, emit a library call to lround. We'll
11921 // need to truncate the result. We assume any value that doesn't fit in i32
11922 // is allowed to return an unspecified value.
11923 RTLIB::Libcall LC =
11924 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11925 MakeLibCallOptions CallOptions;
11926 EVT OpVT = Op0.getValueType();
11927 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11928 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11929 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11930 Results.push_back(Result);
11931 break;
11932 }
11935 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11936 "has custom type legalization on riscv32");
11937
11938 SDValue LoCounter, HiCounter;
11939 MVT XLenVT = Subtarget.getXLenVT();
11940 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11941 LoCounter = DAG.getTargetConstant(
11942 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11943 HiCounter = DAG.getTargetConstant(
11944 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11945 } else {
11946 LoCounter = DAG.getTargetConstant(
11947 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11948 HiCounter = DAG.getTargetConstant(
11949 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11950 }
11951 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11953 N->getOperand(0), LoCounter, HiCounter);
11954
11955 Results.push_back(
11956 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11957 Results.push_back(RCW.getValue(2));
11958 break;
11959 }
11960 case ISD::LOAD: {
11961 if (!ISD::isNON_EXTLoad(N))
11962 return;
11963
11964 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11965 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11966 LoadSDNode *Ld = cast<LoadSDNode>(N);
11967
11968 SDLoc dl(N);
11969 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11970 Ld->getBasePtr(), Ld->getMemoryVT(),
11971 Ld->getMemOperand());
11972 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11973 Results.push_back(Res.getValue(1));
11974 return;
11975 }
11976 case ISD::MUL: {
11977 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11978 unsigned XLen = Subtarget.getXLen();
11979 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11980 if (Size > XLen) {
11981 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11982 SDValue LHS = N->getOperand(0);
11983 SDValue RHS = N->getOperand(1);
11984 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11985
11986 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11987 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11988 // We need exactly one side to be unsigned.
11989 if (LHSIsU == RHSIsU)
11990 return;
11991
11992 auto MakeMULPair = [&](SDValue S, SDValue U) {
11993 MVT XLenVT = Subtarget.getXLenVT();
11994 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11995 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11996 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11997 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11998 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11999 };
12000
12001 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12002 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12003
12004 // The other operand should be signed, but still prefer MULH when
12005 // possible.
12006 if (RHSIsU && LHSIsS && !RHSIsS)
12007 Results.push_back(MakeMULPair(LHS, RHS));
12008 else if (LHSIsU && RHSIsS && !LHSIsS)
12009 Results.push_back(MakeMULPair(RHS, LHS));
12010
12011 return;
12012 }
12013 [[fallthrough]];
12014 }
12015 case ISD::ADD:
12016 case ISD::SUB:
12017 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12018 "Unexpected custom legalisation");
12019 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12020 break;
12021 case ISD::SHL:
12022 case ISD::SRA:
12023 case ISD::SRL:
12024 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12025 "Unexpected custom legalisation");
12026 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12027 // If we can use a BSET instruction, allow default promotion to apply.
12028 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12029 isOneConstant(N->getOperand(0)))
12030 break;
12031 Results.push_back(customLegalizeToWOp(N, DAG));
12032 break;
12033 }
12034
12035 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12036 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12037 // shift amount.
12038 if (N->getOpcode() == ISD::SHL) {
12039 SDLoc DL(N);
12040 SDValue NewOp0 =
12041 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12042 SDValue NewOp1 =
12043 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12044 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12045 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12046 DAG.getValueType(MVT::i32));
12047 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12048 }
12049
12050 break;
12051 case ISD::ROTL:
12052 case ISD::ROTR:
12053 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12054 "Unexpected custom legalisation");
12055 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12056 Subtarget.hasVendorXTHeadBb()) &&
12057 "Unexpected custom legalization");
12058 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12059 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12060 return;
12061 Results.push_back(customLegalizeToWOp(N, DAG));
12062 break;
12063 case ISD::CTTZ:
12065 case ISD::CTLZ:
12066 case ISD::CTLZ_ZERO_UNDEF: {
12067 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12068 "Unexpected custom legalisation");
12069
12070 SDValue NewOp0 =
12071 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12072 bool IsCTZ =
12073 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12074 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12075 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12076 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12077 return;
12078 }
12079 case ISD::SDIV:
12080 case ISD::UDIV:
12081 case ISD::UREM: {
12082 MVT VT = N->getSimpleValueType(0);
12083 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12084 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12085 "Unexpected custom legalisation");
12086 // Don't promote division/remainder by constant since we should expand those
12087 // to multiply by magic constant.
12089 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12090 !isIntDivCheap(N->getValueType(0), Attr))
12091 return;
12092
12093 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12094 // the upper 32 bits. For other types we need to sign or zero extend
12095 // based on the opcode.
12096 unsigned ExtOpc = ISD::ANY_EXTEND;
12097 if (VT != MVT::i32)
12098 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12100
12101 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12102 break;
12103 }
12104 case ISD::SADDO: {
12105 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12106 "Unexpected custom legalisation");
12107
12108 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12109 // use the default legalization.
12110 if (!isa<ConstantSDNode>(N->getOperand(1)))
12111 return;
12112
12113 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12114 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12115 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12116 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12117 DAG.getValueType(MVT::i32));
12118
12119 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12120
12121 // For an addition, the result should be less than one of the operands (LHS)
12122 // if and only if the other operand (RHS) is negative, otherwise there will
12123 // be overflow.
12124 // For a subtraction, the result should be less than one of the operands
12125 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12126 // otherwise there will be overflow.
12127 EVT OType = N->getValueType(1);
12128 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12129 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12130
12131 SDValue Overflow =
12132 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12133 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12134 Results.push_back(Overflow);
12135 return;
12136 }
12137 case ISD::UADDO:
12138 case ISD::USUBO: {
12139 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12140 "Unexpected custom legalisation");
12141 bool IsAdd = N->getOpcode() == ISD::UADDO;
12142 // Create an ADDW or SUBW.
12143 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12144 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12145 SDValue Res =
12146 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12147 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12148 DAG.getValueType(MVT::i32));
12149
12150 SDValue Overflow;
12151 if (IsAdd && isOneConstant(RHS)) {
12152 // Special case uaddo X, 1 overflowed if the addition result is 0.
12153 // The general case (X + C) < C is not necessarily beneficial. Although we
12154 // reduce the live range of X, we may introduce the materialization of
12155 // constant C, especially when the setcc result is used by branch. We have
12156 // no compare with constant and branch instructions.
12157 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12158 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12159 } else if (IsAdd && isAllOnesConstant(RHS)) {
12160 // Special case uaddo X, -1 overflowed if X != 0.
12161 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12162 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12163 } else {
12164 // Sign extend the LHS and perform an unsigned compare with the ADDW
12165 // result. Since the inputs are sign extended from i32, this is equivalent
12166 // to comparing the lower 32 bits.
12167 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12168 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12169 IsAdd ? ISD::SETULT : ISD::SETUGT);
12170 }
12171
12172 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12173 Results.push_back(Overflow);
12174 return;
12175 }
12176 case ISD::UADDSAT:
12177 case ISD::USUBSAT: {
12178 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12179 "Unexpected custom legalisation");
12180 if (Subtarget.hasStdExtZbb()) {
12181 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12182 // sign extend allows overflow of the lower 32 bits to be detected on
12183 // the promoted size.
12184 SDValue LHS =
12185 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12186 SDValue RHS =
12187 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12188 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12189 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12190 return;
12191 }
12192
12193 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12194 // promotion for UADDO/USUBO.
12195 Results.push_back(expandAddSubSat(N, DAG));
12196 return;
12197 }
12198 case ISD::SADDSAT:
12199 case ISD::SSUBSAT: {
12200 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12201 "Unexpected custom legalisation");
12202 Results.push_back(expandAddSubSat(N, DAG));
12203 return;
12204 }
12205 case ISD::ABS: {
12206 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12207 "Unexpected custom legalisation");
12208
12209 if (Subtarget.hasStdExtZbb()) {
12210 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12211 // This allows us to remember that the result is sign extended. Expanding
12212 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12213 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12214 N->getOperand(0));
12215 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12216 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12217 return;
12218 }
12219
12220 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12221 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12222
12223 // Freeze the source so we can increase it's use count.
12224 Src = DAG.getFreeze(Src);
12225
12226 // Copy sign bit to all bits using the sraiw pattern.
12227 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12228 DAG.getValueType(MVT::i32));
12229 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12230 DAG.getConstant(31, DL, MVT::i64));
12231
12232 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12233 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12234
12235 // NOTE: The result is only required to be anyextended, but sext is
12236 // consistent with type legalization of sub.
12237 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12238 DAG.getValueType(MVT::i32));
12239 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12240 return;
12241 }
12242 case ISD::BITCAST: {
12243 EVT VT = N->getValueType(0);
12244 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12245 SDValue Op0 = N->getOperand(0);
12246 EVT Op0VT = Op0.getValueType();
12247 MVT XLenVT = Subtarget.getXLenVT();
12248 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12249 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12250 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12251 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12252 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12253 Subtarget.hasStdExtZfbfmin()) {
12254 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12255 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12256 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12257 Subtarget.hasStdExtFOrZfinx()) {
12258 SDValue FPConv =
12259 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12260 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12261 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12262 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12263 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12264 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12265 NewReg.getValue(0), NewReg.getValue(1));
12266 Results.push_back(RetReg);
12267 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12268 isTypeLegal(Op0VT)) {
12269 // Custom-legalize bitcasts from fixed-length vector types to illegal
12270 // scalar types in order to improve codegen. Bitcast the vector to a
12271 // one-element vector type whose element type is the same as the result
12272 // type, and extract the first element.
12273 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12274 if (isTypeLegal(BVT)) {
12275 SDValue BVec = DAG.getBitcast(BVT, Op0);
12276 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12277 DAG.getVectorIdxConstant(0, DL)));
12278 }
12279 }
12280 break;
12281 }
12282 case RISCVISD::BREV8: {
12283 MVT VT = N->getSimpleValueType(0);
12284 MVT XLenVT = Subtarget.getXLenVT();
12285 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12286 "Unexpected custom legalisation");
12287 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12288 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12289 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12290 // ReplaceNodeResults requires we maintain the same type for the return
12291 // value.
12292 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12293 break;
12294 }
12296 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12297 // type is illegal (currently only vXi64 RV32).
12298 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12299 // transferred to the destination register. We issue two of these from the
12300 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12301 // first element.
12302 SDValue Vec = N->getOperand(0);
12303 SDValue Idx = N->getOperand(1);
12304
12305 // The vector type hasn't been legalized yet so we can't issue target
12306 // specific nodes if it needs legalization.
12307 // FIXME: We would manually legalize if it's important.
12308 if (!isTypeLegal(Vec.getValueType()))
12309 return;
12310
12311 MVT VecVT = Vec.getSimpleValueType();
12312
12313 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12314 VecVT.getVectorElementType() == MVT::i64 &&
12315 "Unexpected EXTRACT_VECTOR_ELT legalization");
12316
12317 // If this is a fixed vector, we need to convert it to a scalable vector.
12318 MVT ContainerVT = VecVT;
12319 if (VecVT.isFixedLengthVector()) {
12320 ContainerVT = getContainerForFixedLengthVector(VecVT);
12321 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12322 }
12323
12324 MVT XLenVT = Subtarget.getXLenVT();
12325
12326 // Use a VL of 1 to avoid processing more elements than we need.
12327 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12328
12329 // Unless the index is known to be 0, we must slide the vector down to get
12330 // the desired element into index 0.
12331 if (!isNullConstant(Idx)) {
12332 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12333 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12334 }
12335
12336 // Extract the lower XLEN bits of the correct vector element.
12337 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12338
12339 // To extract the upper XLEN bits of the vector element, shift the first
12340 // element right by 32 bits and re-extract the lower XLEN bits.
12341 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12342 DAG.getUNDEF(ContainerVT),
12343 DAG.getConstant(32, DL, XLenVT), VL);
12344 SDValue LShr32 =
12345 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12346 DAG.getUNDEF(ContainerVT), Mask, VL);
12347
12348 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12349
12350 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12351 break;
12352 }
12354 unsigned IntNo = N->getConstantOperandVal(0);
12355 switch (IntNo) {
12356 default:
12358 "Don't know how to custom type legalize this intrinsic!");
12359 case Intrinsic::experimental_get_vector_length: {
12360 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12361 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12362 return;
12363 }
12364 case Intrinsic::experimental_cttz_elts: {
12365 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12366 Results.push_back(
12367 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12368 return;
12369 }
12370 case Intrinsic::riscv_orc_b:
12371 case Intrinsic::riscv_brev8:
12372 case Intrinsic::riscv_sha256sig0:
12373 case Intrinsic::riscv_sha256sig1:
12374 case Intrinsic::riscv_sha256sum0:
12375 case Intrinsic::riscv_sha256sum1:
12376 case Intrinsic::riscv_sm3p0:
12377 case Intrinsic::riscv_sm3p1: {
12378 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12379 return;
12380 unsigned Opc;
12381 switch (IntNo) {
12382 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12383 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12384 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12385 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12386 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12387 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12388 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12389 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12390 }
12391
12392 SDValue NewOp =
12393 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12394 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12395 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12396 return;
12397 }
12398 case Intrinsic::riscv_sm4ks:
12399 case Intrinsic::riscv_sm4ed: {
12400 unsigned Opc =
12401 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12402 SDValue NewOp0 =
12403 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12404 SDValue NewOp1 =
12405 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12406 SDValue Res =
12407 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12408 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12409 return;
12410 }
12411 case Intrinsic::riscv_mopr: {
12412 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12413 return;
12414 SDValue NewOp =
12415 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12416 SDValue Res = DAG.getNode(
12417 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12418 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12419 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12420 return;
12421 }
12422 case Intrinsic::riscv_moprr: {
12423 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12424 return;
12425 SDValue NewOp0 =
12426 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12427 SDValue NewOp1 =
12428 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12429 SDValue Res = DAG.getNode(
12430 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12431 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12432 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12433 return;
12434 }
12435 case Intrinsic::riscv_clmul: {
12436 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12437 return;
12438
12439 SDValue NewOp0 =
12440 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12441 SDValue NewOp1 =
12442 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12443 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12444 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12445 return;
12446 }
12447 case Intrinsic::riscv_clmulh:
12448 case Intrinsic::riscv_clmulr: {
12449 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12450 return;
12451
12452 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12453 // to the full 128-bit clmul result of multiplying two xlen values.
12454 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12455 // upper 32 bits.
12456 //
12457 // The alternative is to mask the inputs to 32 bits and use clmul, but
12458 // that requires two shifts to mask each input without zext.w.
12459 // FIXME: If the inputs are known zero extended or could be freely
12460 // zero extended, the mask form would be better.
12461 SDValue NewOp0 =
12462 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12463 SDValue NewOp1 =
12464 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12465 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12466 DAG.getConstant(32, DL, MVT::i64));
12467 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12468 DAG.getConstant(32, DL, MVT::i64));
12469 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12471 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12472 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12473 DAG.getConstant(32, DL, MVT::i64));
12474 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12475 return;
12476 }
12477 case Intrinsic::riscv_vmv_x_s: {
12478 EVT VT = N->getValueType(0);
12479 MVT XLenVT = Subtarget.getXLenVT();
12480 if (VT.bitsLT(XLenVT)) {
12481 // Simple case just extract using vmv.x.s and truncate.
12482 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12483 Subtarget.getXLenVT(), N->getOperand(1));
12484 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12485 return;
12486 }
12487
12488 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12489 "Unexpected custom legalization");
12490
12491 // We need to do the move in two steps.
12492 SDValue Vec = N->getOperand(1);
12493 MVT VecVT = Vec.getSimpleValueType();
12494
12495 // First extract the lower XLEN bits of the element.
12496 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12497
12498 // To extract the upper XLEN bits of the vector element, shift the first
12499 // element right by 32 bits and re-extract the lower XLEN bits.
12500 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12501
12502 SDValue ThirtyTwoV =
12503 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12504 DAG.getConstant(32, DL, XLenVT), VL);
12505 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12506 DAG.getUNDEF(VecVT), Mask, VL);
12507 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12508
12509 Results.push_back(
12510 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12511 break;
12512 }
12513 }
12514 break;
12515 }
12516 case ISD::VECREDUCE_ADD:
12517 case ISD::VECREDUCE_AND:
12518 case ISD::VECREDUCE_OR:
12519 case ISD::VECREDUCE_XOR:
12524 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12525 Results.push_back(V);
12526 break;
12527 case ISD::VP_REDUCE_ADD:
12528 case ISD::VP_REDUCE_AND:
12529 case ISD::VP_REDUCE_OR:
12530 case ISD::VP_REDUCE_XOR:
12531 case ISD::VP_REDUCE_SMAX:
12532 case ISD::VP_REDUCE_UMAX:
12533 case ISD::VP_REDUCE_SMIN:
12534 case ISD::VP_REDUCE_UMIN:
12535 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12536 Results.push_back(V);
12537 break;
12538 case ISD::GET_ROUNDING: {
12539 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12540 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12541 Results.push_back(Res.getValue(0));
12542 Results.push_back(Res.getValue(1));
12543 break;
12544 }
12545 }
12546}
12547
12548/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12549/// which corresponds to it.
12550static unsigned getVecReduceOpcode(unsigned Opc) {
12551 switch (Opc) {
12552 default:
12553 llvm_unreachable("Unhandled binary to transfrom reduction");
12554 case ISD::ADD:
12555 return ISD::VECREDUCE_ADD;
12556 case ISD::UMAX:
12557 return ISD::VECREDUCE_UMAX;
12558 case ISD::SMAX:
12559 return ISD::VECREDUCE_SMAX;
12560 case ISD::UMIN:
12561 return ISD::VECREDUCE_UMIN;
12562 case ISD::SMIN:
12563 return ISD::VECREDUCE_SMIN;
12564 case ISD::AND:
12565 return ISD::VECREDUCE_AND;
12566 case ISD::OR:
12567 return ISD::VECREDUCE_OR;
12568 case ISD::XOR:
12569 return ISD::VECREDUCE_XOR;
12570 case ISD::FADD:
12571 // Note: This is the associative form of the generic reduction opcode.
12572 return ISD::VECREDUCE_FADD;
12573 }
12574}
12575
12576/// Perform two related transforms whose purpose is to incrementally recognize
12577/// an explode_vector followed by scalar reduction as a vector reduction node.
12578/// This exists to recover from a deficiency in SLP which can't handle
12579/// forests with multiple roots sharing common nodes. In some cases, one
12580/// of the trees will be vectorized, and the other will remain (unprofitably)
12581/// scalarized.
12582static SDValue
12584 const RISCVSubtarget &Subtarget) {
12585
12586 // This transforms need to run before all integer types have been legalized
12587 // to i64 (so that the vector element type matches the add type), and while
12588 // it's safe to introduce odd sized vector types.
12590 return SDValue();
12591
12592 // Without V, this transform isn't useful. We could form the (illegal)
12593 // operations and let them be scalarized again, but there's really no point.
12594 if (!Subtarget.hasVInstructions())
12595 return SDValue();
12596
12597 const SDLoc DL(N);
12598 const EVT VT = N->getValueType(0);
12599 const unsigned Opc = N->getOpcode();
12600
12601 // For FADD, we only handle the case with reassociation allowed. We
12602 // could handle strict reduction order, but at the moment, there's no
12603 // known reason to, and the complexity isn't worth it.
12604 // TODO: Handle fminnum and fmaxnum here
12605 if (!VT.isInteger() &&
12606 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12607 return SDValue();
12608
12609 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12610 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12611 "Inconsistent mappings");
12612 SDValue LHS = N->getOperand(0);
12613 SDValue RHS = N->getOperand(1);
12614
12615 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12616 return SDValue();
12617
12618 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12619 std::swap(LHS, RHS);
12620
12621 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12622 !isa<ConstantSDNode>(RHS.getOperand(1)))
12623 return SDValue();
12624
12625 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12626 SDValue SrcVec = RHS.getOperand(0);
12627 EVT SrcVecVT = SrcVec.getValueType();
12628 assert(SrcVecVT.getVectorElementType() == VT);
12629 if (SrcVecVT.isScalableVector())
12630 return SDValue();
12631
12632 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12633 return SDValue();
12634
12635 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12636 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12637 // root of our reduction tree. TODO: We could extend this to any two
12638 // adjacent aligned constant indices if desired.
12639 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12640 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12641 uint64_t LHSIdx =
12642 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12643 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12644 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12645 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12646 DAG.getVectorIdxConstant(0, DL));
12647 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12648 }
12649 }
12650
12651 // Match (binop (reduce (extract_subvector V, 0),
12652 // (extract_vector_elt V, sizeof(SubVec))))
12653 // into a reduction of one more element from the original vector V.
12654 if (LHS.getOpcode() != ReduceOpc)
12655 return SDValue();
12656
12657 SDValue ReduceVec = LHS.getOperand(0);
12658 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12659 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12660 isNullConstant(ReduceVec.getOperand(1)) &&
12661 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12662 // For illegal types (e.g. 3xi32), most will be combined again into a
12663 // wider (hopefully legal) type. If this is a terminal state, we are
12664 // relying on type legalization here to produce something reasonable
12665 // and this lowering quality could probably be improved. (TODO)
12666 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12667 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12668 DAG.getVectorIdxConstant(0, DL));
12669 auto Flags = ReduceVec->getFlags();
12670 Flags.intersectWith(N->getFlags());
12671 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12672 }
12673
12674 return SDValue();
12675}
12676
12677
12678// Try to fold (<bop> x, (reduction.<bop> vec, start))
12680 const RISCVSubtarget &Subtarget) {
12681 auto BinOpToRVVReduce = [](unsigned Opc) {
12682 switch (Opc) {
12683 default:
12684 llvm_unreachable("Unhandled binary to transfrom reduction");
12685 case ISD::ADD:
12687 case ISD::UMAX:
12689 case ISD::SMAX:
12691 case ISD::UMIN:
12693 case ISD::SMIN:
12695 case ISD::AND:
12697 case ISD::OR:
12699 case ISD::XOR:
12701 case ISD::FADD:
12703 case ISD::FMAXNUM:
12705 case ISD::FMINNUM:
12707 }
12708 };
12709
12710 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12711 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12712 isNullConstant(V.getOperand(1)) &&
12713 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12714 };
12715
12716 unsigned Opc = N->getOpcode();
12717 unsigned ReduceIdx;
12718 if (IsReduction(N->getOperand(0), Opc))
12719 ReduceIdx = 0;
12720 else if (IsReduction(N->getOperand(1), Opc))
12721 ReduceIdx = 1;
12722 else
12723 return SDValue();
12724
12725 // Skip if FADD disallows reassociation but the combiner needs.
12726 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12727 return SDValue();
12728
12729 SDValue Extract = N->getOperand(ReduceIdx);
12730 SDValue Reduce = Extract.getOperand(0);
12731 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12732 return SDValue();
12733
12734 SDValue ScalarV = Reduce.getOperand(2);
12735 EVT ScalarVT = ScalarV.getValueType();
12736 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12737 ScalarV.getOperand(0)->isUndef() &&
12738 isNullConstant(ScalarV.getOperand(2)))
12739 ScalarV = ScalarV.getOperand(1);
12740
12741 // Make sure that ScalarV is a splat with VL=1.
12742 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12743 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12744 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12745 return SDValue();
12746
12747 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12748 return SDValue();
12749
12750 // Check the scalar of ScalarV is neutral element
12751 // TODO: Deal with value other than neutral element.
12752 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12753 0))
12754 return SDValue();
12755
12756 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12757 // FIXME: We might be able to improve this if operand 0 is undef.
12758 if (!isNonZeroAVL(Reduce.getOperand(5)))
12759 return SDValue();
12760
12761 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12762
12763 SDLoc DL(N);
12764 SDValue NewScalarV =
12765 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12766 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12767
12768 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12769 if (ScalarVT != ScalarV.getValueType())
12770 NewScalarV =
12771 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12772 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12773
12774 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12775 NewScalarV, Reduce.getOperand(3),
12776 Reduce.getOperand(4), Reduce.getOperand(5)};
12777 SDValue NewReduce =
12778 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12779 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12780 Extract.getOperand(1));
12781}
12782
12783// Optimize (add (shl x, c0), (shl y, c1)) ->
12784// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12786 const RISCVSubtarget &Subtarget) {
12787 // Perform this optimization only in the zba extension.
12788 if (!Subtarget.hasStdExtZba())
12789 return SDValue();
12790
12791 // Skip for vector types and larger types.
12792 EVT VT = N->getValueType(0);
12793 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12794 return SDValue();
12795
12796 // The two operand nodes must be SHL and have no other use.
12797 SDValue N0 = N->getOperand(0);
12798 SDValue N1 = N->getOperand(1);
12799 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12800 !N0->hasOneUse() || !N1->hasOneUse())
12801 return SDValue();
12802
12803 // Check c0 and c1.
12804 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12805 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12806 if (!N0C || !N1C)
12807 return SDValue();
12808 int64_t C0 = N0C->getSExtValue();
12809 int64_t C1 = N1C->getSExtValue();
12810 if (C0 <= 0 || C1 <= 0)
12811 return SDValue();
12812
12813 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12814 int64_t Bits = std::min(C0, C1);
12815 int64_t Diff = std::abs(C0 - C1);
12816 if (Diff != 1 && Diff != 2 && Diff != 3)
12817 return SDValue();
12818
12819 // Build nodes.
12820 SDLoc DL(N);
12821 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12822 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12823 SDValue NA0 =
12824 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12825 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12826 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12827}
12828
12829// Combine a constant select operand into its use:
12830//
12831// (and (select cond, -1, c), x)
12832// -> (select cond, x, (and x, c)) [AllOnes=1]
12833// (or (select cond, 0, c), x)
12834// -> (select cond, x, (or x, c)) [AllOnes=0]
12835// (xor (select cond, 0, c), x)
12836// -> (select cond, x, (xor x, c)) [AllOnes=0]
12837// (add (select cond, 0, c), x)
12838// -> (select cond, x, (add x, c)) [AllOnes=0]
12839// (sub x, (select cond, 0, c))
12840// -> (select cond, x, (sub x, c)) [AllOnes=0]
12842 SelectionDAG &DAG, bool AllOnes,
12843 const RISCVSubtarget &Subtarget) {
12844 EVT VT = N->getValueType(0);
12845
12846 // Skip vectors.
12847 if (VT.isVector())
12848 return SDValue();
12849
12850 if (!Subtarget.hasConditionalMoveFusion()) {
12851 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12852 if ((!Subtarget.hasStdExtZicond() &&
12853 !Subtarget.hasVendorXVentanaCondOps()) ||
12854 N->getOpcode() != ISD::AND)
12855 return SDValue();
12856
12857 // Maybe harmful when condition code has multiple use.
12858 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12859 return SDValue();
12860
12861 // Maybe harmful when VT is wider than XLen.
12862 if (VT.getSizeInBits() > Subtarget.getXLen())
12863 return SDValue();
12864 }
12865
12866 if ((Slct.getOpcode() != ISD::SELECT &&
12867 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12868 !Slct.hasOneUse())
12869 return SDValue();
12870
12871 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12873 };
12874
12875 bool SwapSelectOps;
12876 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12877 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12878 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12879 SDValue NonConstantVal;
12880 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12881 SwapSelectOps = false;
12882 NonConstantVal = FalseVal;
12883 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12884 SwapSelectOps = true;
12885 NonConstantVal = TrueVal;
12886 } else
12887 return SDValue();
12888
12889 // Slct is now know to be the desired identity constant when CC is true.
12890 TrueVal = OtherOp;
12891 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12892 // Unless SwapSelectOps says the condition should be false.
12893 if (SwapSelectOps)
12894 std::swap(TrueVal, FalseVal);
12895
12896 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12897 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12898 {Slct.getOperand(0), Slct.getOperand(1),
12899 Slct.getOperand(2), TrueVal, FalseVal});
12900
12901 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12902 {Slct.getOperand(0), TrueVal, FalseVal});
12903}
12904
12905// Attempt combineSelectAndUse on each operand of a commutative operator N.
12907 bool AllOnes,
12908 const RISCVSubtarget &Subtarget) {
12909 SDValue N0 = N->getOperand(0);
12910 SDValue N1 = N->getOperand(1);
12911 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12912 return Result;
12913 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12914 return Result;
12915 return SDValue();
12916}
12917
12918// Transform (add (mul x, c0), c1) ->
12919// (add (mul (add x, c1/c0), c0), c1%c0).
12920// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12921// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12922// to an infinite loop in DAGCombine if transformed.
12923// Or transform (add (mul x, c0), c1) ->
12924// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12925// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12926// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12927// lead to an infinite loop in DAGCombine if transformed.
12928// Or transform (add (mul x, c0), c1) ->
12929// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12930// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12931// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12932// lead to an infinite loop in DAGCombine if transformed.
12933// Or transform (add (mul x, c0), c1) ->
12934// (mul (add x, c1/c0), c0).
12935// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12937 const RISCVSubtarget &Subtarget) {
12938 // Skip for vector types and larger types.
12939 EVT VT = N->getValueType(0);
12940 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12941 return SDValue();
12942 // The first operand node must be a MUL and has no other use.
12943 SDValue N0 = N->getOperand(0);
12944 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12945 return SDValue();
12946 // Check if c0 and c1 match above conditions.
12947 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12948 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12949 if (!N0C || !N1C)
12950 return SDValue();
12951 // If N0C has multiple uses it's possible one of the cases in
12952 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12953 // in an infinite loop.
12954 if (!N0C->hasOneUse())
12955 return SDValue();
12956 int64_t C0 = N0C->getSExtValue();
12957 int64_t C1 = N1C->getSExtValue();
12958 int64_t CA, CB;
12959 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12960 return SDValue();
12961 // Search for proper CA (non-zero) and CB that both are simm12.
12962 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12963 !isInt<12>(C0 * (C1 / C0))) {
12964 CA = C1 / C0;
12965 CB = C1 % C0;
12966 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12967 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12968 CA = C1 / C0 + 1;
12969 CB = C1 % C0 - C0;
12970 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12971 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12972 CA = C1 / C0 - 1;
12973 CB = C1 % C0 + C0;
12974 } else
12975 return SDValue();
12976 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12977 SDLoc DL(N);
12978 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12979 DAG.getConstant(CA, DL, VT));
12980 SDValue New1 =
12981 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12982 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12983}
12984
12985// add (zext, zext) -> zext (add (zext, zext))
12986// sub (zext, zext) -> sext (sub (zext, zext))
12987// mul (zext, zext) -> zext (mul (zext, zext))
12988// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12989// udiv (zext, zext) -> zext (udiv (zext, zext))
12990// srem (zext, zext) -> zext (srem (zext, zext))
12991// urem (zext, zext) -> zext (urem (zext, zext))
12992//
12993// where the sum of the extend widths match, and the the range of the bin op
12994// fits inside the width of the narrower bin op. (For profitability on rvv, we
12995// use a power of two for both inner and outer extend.)
12997
12998 EVT VT = N->getValueType(0);
12999 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13000 return SDValue();
13001
13002 SDValue N0 = N->getOperand(0);
13003 SDValue N1 = N->getOperand(1);
13005 return SDValue();
13006 if (!N0.hasOneUse() || !N1.hasOneUse())
13007 return SDValue();
13008
13009 SDValue Src0 = N0.getOperand(0);
13010 SDValue Src1 = N1.getOperand(0);
13011 EVT SrcVT = Src0.getValueType();
13012 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13013 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13014 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13015 return SDValue();
13016
13017 LLVMContext &C = *DAG.getContext();
13019 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13020
13021 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13022 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13023
13024 // Src0 and Src1 are zero extended, so they're always positive if signed.
13025 //
13026 // sub can produce a negative from two positive operands, so it needs sign
13027 // extended. Other nodes produce a positive from two positive operands, so
13028 // zero extend instead.
13029 unsigned OuterExtend =
13030 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13031
13032 return DAG.getNode(
13033 OuterExtend, SDLoc(N), VT,
13034 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13035}
13036
13037// Try to turn (add (xor bool, 1) -1) into (neg bool).
13039 SDValue N0 = N->getOperand(0);
13040 SDValue N1 = N->getOperand(1);
13041 EVT VT = N->getValueType(0);
13042 SDLoc DL(N);
13043
13044 // RHS should be -1.
13045 if (!isAllOnesConstant(N1))
13046 return SDValue();
13047
13048 // Look for (xor X, 1).
13049 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13050 return SDValue();
13051
13052 // First xor input should be 0 or 1.
13054 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13055 return SDValue();
13056
13057 // Emit a negate of the setcc.
13058 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13059 N0.getOperand(0));
13060}
13061
13063 const RISCVSubtarget &Subtarget) {
13064 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13065 return V;
13066 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13067 return V;
13068 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13069 return V;
13070 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13071 return V;
13072 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13073 return V;
13074 if (SDValue V = combineBinOpOfZExt(N, DAG))
13075 return V;
13076
13077 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13078 // (select lhs, rhs, cc, x, (add x, y))
13079 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13080}
13081
13082// Try to turn a sub boolean RHS and constant LHS into an addi.
13084 SDValue N0 = N->getOperand(0);
13085 SDValue N1 = N->getOperand(1);
13086 EVT VT = N->getValueType(0);
13087 SDLoc DL(N);
13088
13089 // Require a constant LHS.
13090 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13091 if (!N0C)
13092 return SDValue();
13093
13094 // All our optimizations involve subtracting 1 from the immediate and forming
13095 // an ADDI. Make sure the new immediate is valid for an ADDI.
13096 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13097 if (!ImmValMinus1.isSignedIntN(12))
13098 return SDValue();
13099
13100 SDValue NewLHS;
13101 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13102 // (sub constant, (setcc x, y, eq/neq)) ->
13103 // (add (setcc x, y, neq/eq), constant - 1)
13104 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13105 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13106 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13107 return SDValue();
13108 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13109 NewLHS =
13110 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13111 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13112 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13113 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13114 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13115 NewLHS = N1.getOperand(0);
13116 } else
13117 return SDValue();
13118
13119 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13120 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13121}
13122
13124 const RISCVSubtarget &Subtarget) {
13125 if (SDValue V = combineSubOfBoolean(N, DAG))
13126 return V;
13127
13128 EVT VT = N->getValueType(0);
13129 SDValue N0 = N->getOperand(0);
13130 SDValue N1 = N->getOperand(1);
13131 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13132 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13133 isNullConstant(N1.getOperand(1))) {
13134 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13135 if (CCVal == ISD::SETLT) {
13136 SDLoc DL(N);
13137 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13138 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13139 DAG.getConstant(ShAmt, DL, VT));
13140 }
13141 }
13142
13143 if (SDValue V = combineBinOpOfZExt(N, DAG))
13144 return V;
13145
13146 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13147 // (select lhs, rhs, cc, x, (sub x, y))
13148 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13149}
13150
13151// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13152// Legalizing setcc can introduce xors like this. Doing this transform reduces
13153// the number of xors and may allow the xor to fold into a branch condition.
13155 SDValue N0 = N->getOperand(0);
13156 SDValue N1 = N->getOperand(1);
13157 bool IsAnd = N->getOpcode() == ISD::AND;
13158
13159 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13160 return SDValue();
13161
13162 if (!N0.hasOneUse() || !N1.hasOneUse())
13163 return SDValue();
13164
13165 SDValue N01 = N0.getOperand(1);
13166 SDValue N11 = N1.getOperand(1);
13167
13168 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13169 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13170 // operation is And, allow one of the Xors to use -1.
13171 if (isOneConstant(N01)) {
13172 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13173 return SDValue();
13174 } else if (isOneConstant(N11)) {
13175 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13176 if (!(IsAnd && isAllOnesConstant(N01)))
13177 return SDValue();
13178 } else
13179 return SDValue();
13180
13181 EVT VT = N->getValueType(0);
13182
13183 SDValue N00 = N0.getOperand(0);
13184 SDValue N10 = N1.getOperand(0);
13185
13186 // The LHS of the xors needs to be 0/1.
13188 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13189 return SDValue();
13190
13191 // Invert the opcode and insert a new xor.
13192 SDLoc DL(N);
13193 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13194 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13195 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13196}
13197
13199 const RISCVSubtarget &Subtarget) {
13200 SDValue N0 = N->getOperand(0);
13201 EVT VT = N->getValueType(0);
13202
13203 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13204 // extending X. This is safe since we only need the LSB after the shift and
13205 // shift amounts larger than 31 would produce poison. If we wait until
13206 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13207 // to use a BEXT instruction.
13208 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13209 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13210 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13211 SDLoc DL(N0);
13212 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13213 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13214 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13215 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13216 }
13217
13218 return SDValue();
13219}
13220
13221// Combines two comparison operation and logic operation to one selection
13222// operation(min, max) and logic operation. Returns new constructed Node if
13223// conditions for optimization are satisfied.
13226 const RISCVSubtarget &Subtarget) {
13227 SelectionDAG &DAG = DCI.DAG;
13228
13229 SDValue N0 = N->getOperand(0);
13230 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13231 // extending X. This is safe since we only need the LSB after the shift and
13232 // shift amounts larger than 31 would produce poison. If we wait until
13233 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13234 // to use a BEXT instruction.
13235 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13236 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13237 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13238 N0.hasOneUse()) {
13239 SDLoc DL(N);
13240 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13241 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13242 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13243 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13244 DAG.getConstant(1, DL, MVT::i64));
13245 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13246 }
13247
13248 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13249 return V;
13250 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13251 return V;
13252
13253 if (DCI.isAfterLegalizeDAG())
13254 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13255 return V;
13256
13257 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13258 // (select lhs, rhs, cc, x, (and x, y))
13259 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13260}
13261
13262// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13263// FIXME: Generalize to other binary operators with same operand.
13265 SelectionDAG &DAG) {
13266 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13267
13268 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13270 !N0.hasOneUse() || !N1.hasOneUse())
13271 return SDValue();
13272
13273 // Should have the same condition.
13274 SDValue Cond = N0.getOperand(1);
13275 if (Cond != N1.getOperand(1))
13276 return SDValue();
13277
13278 SDValue TrueV = N0.getOperand(0);
13279 SDValue FalseV = N1.getOperand(0);
13280
13281 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13282 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13283 !isOneConstant(TrueV.getOperand(1)) ||
13284 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13285 return SDValue();
13286
13287 EVT VT = N->getValueType(0);
13288 SDLoc DL(N);
13289
13290 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13291 Cond);
13292 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13293 Cond);
13294 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13295 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13296}
13297
13299 const RISCVSubtarget &Subtarget) {
13300 SelectionDAG &DAG = DCI.DAG;
13301
13302 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13303 return V;
13304 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13305 return V;
13306
13307 if (DCI.isAfterLegalizeDAG())
13308 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13309 return V;
13310
13311 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13312 // We may be able to pull a common operation out of the true and false value.
13313 SDValue N0 = N->getOperand(0);
13314 SDValue N1 = N->getOperand(1);
13315 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13316 return V;
13317 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13318 return V;
13319
13320 // fold (or (select cond, 0, y), x) ->
13321 // (select cond, x, (or x, y))
13322 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13323}
13324
13326 const RISCVSubtarget &Subtarget) {
13327 SDValue N0 = N->getOperand(0);
13328 SDValue N1 = N->getOperand(1);
13329
13330 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13331 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13332 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13333 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13334 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13335 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13336 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13337 SDLoc DL(N);
13338 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13339 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13340 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13341 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13342 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13343 }
13344
13345 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13346 // NOTE: Assumes ROL being legal means ROLW is legal.
13347 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13348 if (N0.getOpcode() == RISCVISD::SLLW &&
13350 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13351 SDLoc DL(N);
13352 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13353 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13354 }
13355
13356 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13357 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13358 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13359 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13360 if (ConstN00 && CC == ISD::SETLT) {
13361 EVT VT = N0.getValueType();
13362 SDLoc DL(N0);
13363 const APInt &Imm = ConstN00->getAPIntValue();
13364 if ((Imm + 1).isSignedIntN(12))
13365 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13366 DAG.getConstant(Imm + 1, DL, VT), CC);
13367 }
13368 }
13369
13370 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13371 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13372 // would have been promoted to i32, but the setcc would have i64 result.
13373 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13374 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13375 SDValue N00 = N0.getOperand(0);
13376 SDLoc DL(N);
13377 SDValue LHS = N00.getOperand(0);
13378 SDValue RHS = N00.getOperand(1);
13379 SDValue CC = N00.getOperand(2);
13380 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13381 LHS.getValueType());
13382 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13383 LHS, RHS, NotCC);
13384 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13385 }
13386
13387 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13388 return V;
13389 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13390 return V;
13391
13392 // fold (xor (select cond, 0, y), x) ->
13393 // (select cond, x, (xor x, y))
13394 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13395}
13396
13397// Try to expand a scalar multiply to a faster sequence.
13400 const RISCVSubtarget &Subtarget) {
13401
13402 EVT VT = N->getValueType(0);
13403
13404 // LI + MUL is usually smaller than the alternative sequence.
13406 return SDValue();
13407
13408 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13409 return SDValue();
13410
13411 if (VT != Subtarget.getXLenVT())
13412 return SDValue();
13413
13414 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13415 return SDValue();
13416
13417 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13418 if (!CNode)
13419 return SDValue();
13420 uint64_t MulAmt = CNode->getZExtValue();
13421
13422 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13423 // We're adding additional uses of X here, and in principle, we should be freezing
13424 // X before doing so. However, adding freeze here causes real regressions, and no
13425 // other target properly freezes X in these cases either.
13426 SDValue X = N->getOperand(0);
13427
13428 for (uint64_t Divisor : {3, 5, 9}) {
13429 if (MulAmt % Divisor != 0)
13430 continue;
13431 uint64_t MulAmt2 = MulAmt / Divisor;
13432 // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13433 // Matched in tablegen, avoid perturbing patterns.
13434 if (isPowerOf2_64(MulAmt2))
13435 return SDValue();
13436
13437 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13438 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13439 SDLoc DL(N);
13440 SDValue Mul359 =
13441 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13442 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13443 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13444 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13445 Mul359);
13446 }
13447 }
13448
13449 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13450 // shXadd. First check if this a sum of two power of 2s because that's
13451 // easy. Then count how many zeros are up to the first bit.
13452 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13453 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13454 if (ScaleShift >= 1 && ScaleShift < 4) {
13455 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13456 SDLoc DL(N);
13457 SDValue Shift1 =
13458 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13459 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13460 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13461 }
13462 }
13463
13464 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13465 // This is the two instruction form, there are also three instruction
13466 // variants we could implement. e.g.
13467 // (2^(1,2,3) * 3,5,9 + 1) << C2
13468 // 2^(C1>3) * 3,5,9 +/- 1
13469 for (uint64_t Divisor : {3, 5, 9}) {
13470 uint64_t C = MulAmt - 1;
13471 if (C <= Divisor)
13472 continue;
13473 unsigned TZ = llvm::countr_zero(C);
13474 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13475 SDLoc DL(N);
13476 SDValue Mul359 =
13477 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13478 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13479 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13480 DAG.getConstant(TZ, DL, VT), X);
13481 }
13482 }
13483
13484 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13485 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13486 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13487 if (ScaleShift >= 1 && ScaleShift < 4) {
13488 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13489 SDLoc DL(N);
13490 SDValue Shift1 =
13491 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13492 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13493 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13494 DAG.getConstant(ScaleShift, DL, VT), X));
13495 }
13496 }
13497
13498 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13499 for (uint64_t Offset : {3, 5, 9}) {
13500 if (isPowerOf2_64(MulAmt + Offset)) {
13501 SDLoc DL(N);
13502 SDValue Shift1 =
13503 DAG.getNode(ISD::SHL, DL, VT, X,
13504 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13505 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13506 DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13507 X);
13508 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13509 }
13510 }
13511
13512 return SDValue();
13513}
13514
13515
13518 const RISCVSubtarget &Subtarget) {
13519 EVT VT = N->getValueType(0);
13520 if (!VT.isVector())
13521 return expandMul(N, DAG, DCI, Subtarget);
13522
13523 SDLoc DL(N);
13524 SDValue N0 = N->getOperand(0);
13525 SDValue N1 = N->getOperand(1);
13526 SDValue MulOper;
13527 unsigned AddSubOpc;
13528
13529 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13530 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13531 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13532 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13533 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13534 AddSubOpc = V->getOpcode();
13535 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13536 SDValue Opnd = V->getOperand(1);
13537 MulOper = V->getOperand(0);
13538 if (AddSubOpc == ISD::SUB)
13539 std::swap(Opnd, MulOper);
13540 if (isOneOrOneSplat(Opnd))
13541 return true;
13542 }
13543 return false;
13544 };
13545
13546 if (IsAddSubWith1(N0)) {
13547 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13548 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13549 }
13550
13551 if (IsAddSubWith1(N1)) {
13552 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13553 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13554 }
13555
13556 if (SDValue V = combineBinOpOfZExt(N, DAG))
13557 return V;
13558
13559 return SDValue();
13560}
13561
13562/// According to the property that indexed load/store instructions zero-extend
13563/// their indices, try to narrow the type of index operand.
13564static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13565 if (isIndexTypeSigned(IndexType))
13566 return false;
13567
13568 if (!N->hasOneUse())
13569 return false;
13570
13571 EVT VT = N.getValueType();
13572 SDLoc DL(N);
13573
13574 // In general, what we're doing here is seeing if we can sink a truncate to
13575 // a smaller element type into the expression tree building our index.
13576 // TODO: We can generalize this and handle a bunch more cases if useful.
13577
13578 // Narrow a buildvector to the narrowest element type. This requires less
13579 // work and less register pressure at high LMUL, and creates smaller constants
13580 // which may be cheaper to materialize.
13581 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13582 KnownBits Known = DAG.computeKnownBits(N);
13583 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13584 LLVMContext &C = *DAG.getContext();
13585 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13586 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13587 N = DAG.getNode(ISD::TRUNCATE, DL,
13588 VT.changeVectorElementType(ResultVT), N);
13589 return true;
13590 }
13591 }
13592
13593 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13594 if (N.getOpcode() != ISD::SHL)
13595 return false;
13596
13597 SDValue N0 = N.getOperand(0);
13598 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13600 return false;
13601 if (!N0->hasOneUse())
13602 return false;
13603
13604 APInt ShAmt;
13605 SDValue N1 = N.getOperand(1);
13606 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13607 return false;
13608
13609 SDValue Src = N0.getOperand(0);
13610 EVT SrcVT = Src.getValueType();
13611 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13612 unsigned ShAmtV = ShAmt.getZExtValue();
13613 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13614 NewElen = std::max(NewElen, 8U);
13615
13616 // Skip if NewElen is not narrower than the original extended type.
13617 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13618 return false;
13619
13620 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13621 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13622
13623 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13624 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13625 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13626 return true;
13627}
13628
13629// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13630// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13631// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13632// can become a sext.w instead of a shift pair.
13634 const RISCVSubtarget &Subtarget) {
13635 SDValue N0 = N->getOperand(0);
13636 SDValue N1 = N->getOperand(1);
13637 EVT VT = N->getValueType(0);
13638 EVT OpVT = N0.getValueType();
13639
13640 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13641 return SDValue();
13642
13643 // RHS needs to be a constant.
13644 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13645 if (!N1C)
13646 return SDValue();
13647
13648 // LHS needs to be (and X, 0xffffffff).
13649 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13650 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13651 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13652 return SDValue();
13653
13654 // Looking for an equality compare.
13655 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13656 if (!isIntEqualitySetCC(Cond))
13657 return SDValue();
13658
13659 // Don't do this if the sign bit is provably zero, it will be turned back into
13660 // an AND.
13661 APInt SignMask = APInt::getOneBitSet(64, 31);
13662 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13663 return SDValue();
13664
13665 const APInt &C1 = N1C->getAPIntValue();
13666
13667 SDLoc dl(N);
13668 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13669 // to be equal.
13670 if (C1.getActiveBits() > 32)
13671 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13672
13673 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13674 N0.getOperand(0), DAG.getValueType(MVT::i32));
13675 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13676 dl, OpVT), Cond);
13677}
13678
13679static SDValue
13681 const RISCVSubtarget &Subtarget) {
13682 SDValue Src = N->getOperand(0);
13683 EVT VT = N->getValueType(0);
13684
13685 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13686 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13687 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13688 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13689 Src.getOperand(0));
13690
13691 return SDValue();
13692}
13693
13694namespace {
13695// Forward declaration of the structure holding the necessary information to
13696// apply a combine.
13697struct CombineResult;
13698
13699enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13700/// Helper class for folding sign/zero extensions.
13701/// In particular, this class is used for the following combines:
13702/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13703/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13704/// mul | mul_vl -> vwmul(u) | vwmul_su
13705/// shl | shl_vl -> vwsll
13706/// fadd -> vfwadd | vfwadd_w
13707/// fsub -> vfwsub | vfwsub_w
13708/// fmul -> vfwmul
13709/// An object of this class represents an operand of the operation we want to
13710/// combine.
13711/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13712/// NodeExtensionHelper for `a` and one for `b`.
13713///
13714/// This class abstracts away how the extension is materialized and
13715/// how its number of users affect the combines.
13716///
13717/// In particular:
13718/// - VWADD_W is conceptually == add(op0, sext(op1))
13719/// - VWADDU_W == add(op0, zext(op1))
13720/// - VWSUB_W == sub(op0, sext(op1))
13721/// - VWSUBU_W == sub(op0, zext(op1))
13722/// - VFWADD_W == fadd(op0, fpext(op1))
13723/// - VFWSUB_W == fsub(op0, fpext(op1))
13724/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13725/// zext|sext(smaller_value).
13726struct NodeExtensionHelper {
13727 /// Records if this operand is like being zero extended.
13728 bool SupportsZExt;
13729 /// Records if this operand is like being sign extended.
13730 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13731 /// instance, a splat constant (e.g., 3), would support being both sign and
13732 /// zero extended.
13733 bool SupportsSExt;
13734 /// Records if this operand is like being floating-Point extended.
13735 bool SupportsFPExt;
13736 /// This boolean captures whether we care if this operand would still be
13737 /// around after the folding happens.
13738 bool EnforceOneUse;
13739 /// Original value that this NodeExtensionHelper represents.
13740 SDValue OrigOperand;
13741
13742 /// Get the value feeding the extension or the value itself.
13743 /// E.g., for zext(a), this would return a.
13744 SDValue getSource() const {
13745 switch (OrigOperand.getOpcode()) {
13746 case ISD::ZERO_EXTEND:
13747 case ISD::SIGN_EXTEND:
13748 case RISCVISD::VSEXT_VL:
13749 case RISCVISD::VZEXT_VL:
13751 return OrigOperand.getOperand(0);
13752 default:
13753 return OrigOperand;
13754 }
13755 }
13756
13757 /// Check if this instance represents a splat.
13758 bool isSplat() const {
13759 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13760 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13761 }
13762
13763 /// Get the extended opcode.
13764 unsigned getExtOpc(ExtKind SupportsExt) const {
13765 switch (SupportsExt) {
13766 case ExtKind::SExt:
13767 return RISCVISD::VSEXT_VL;
13768 case ExtKind::ZExt:
13769 return RISCVISD::VZEXT_VL;
13770 case ExtKind::FPExt:
13772 }
13773 llvm_unreachable("Unknown ExtKind enum");
13774 }
13775
13776 /// Get or create a value that can feed \p Root with the given extension \p
13777 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13778 /// operand. \see ::getSource().
13779 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13780 const RISCVSubtarget &Subtarget,
13781 std::optional<ExtKind> SupportsExt) const {
13782 if (!SupportsExt.has_value())
13783 return OrigOperand;
13784
13785 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13786
13787 SDValue Source = getSource();
13788 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13789 if (Source.getValueType() == NarrowVT)
13790 return Source;
13791
13792 unsigned ExtOpc = getExtOpc(*SupportsExt);
13793
13794 // If we need an extension, we should be changing the type.
13795 SDLoc DL(OrigOperand);
13796 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13797 switch (OrigOperand.getOpcode()) {
13798 case ISD::ZERO_EXTEND:
13799 case ISD::SIGN_EXTEND:
13800 case RISCVISD::VSEXT_VL:
13801 case RISCVISD::VZEXT_VL:
13803 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13804 case ISD::SPLAT_VECTOR:
13805 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13807 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13808 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13809 default:
13810 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13811 // and that operand should already have the right NarrowVT so no
13812 // extension should be required at this point.
13813 llvm_unreachable("Unsupported opcode");
13814 }
13815 }
13816
13817 /// Helper function to get the narrow type for \p Root.
13818 /// The narrow type is the type of \p Root where we divided the size of each
13819 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13820 /// \pre Both the narrow type and the original type should be legal.
13821 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13822 MVT VT = Root->getSimpleValueType(0);
13823
13824 // Determine the narrow size.
13825 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13826
13827 MVT EltVT = SupportsExt == ExtKind::FPExt
13828 ? MVT::getFloatingPointVT(NarrowSize)
13829 : MVT::getIntegerVT(NarrowSize);
13830
13831 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13832 "Trying to extend something we can't represent");
13833 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13834 return NarrowVT;
13835 }
13836
13837 /// Get the opcode to materialize:
13838 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13839 static unsigned getSExtOpcode(unsigned Opcode) {
13840 switch (Opcode) {
13841 case ISD::ADD:
13842 case RISCVISD::ADD_VL:
13845 case ISD::OR:
13846 return RISCVISD::VWADD_VL;
13847 case ISD::SUB:
13848 case RISCVISD::SUB_VL:
13851 return RISCVISD::VWSUB_VL;
13852 case ISD::MUL:
13853 case RISCVISD::MUL_VL:
13854 return RISCVISD::VWMUL_VL;
13855 default:
13856 llvm_unreachable("Unexpected opcode");
13857 }
13858 }
13859
13860 /// Get the opcode to materialize:
13861 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13862 static unsigned getZExtOpcode(unsigned Opcode) {
13863 switch (Opcode) {
13864 case ISD::ADD:
13865 case RISCVISD::ADD_VL:
13868 case ISD::OR:
13869 return RISCVISD::VWADDU_VL;
13870 case ISD::SUB:
13871 case RISCVISD::SUB_VL:
13874 return RISCVISD::VWSUBU_VL;
13875 case ISD::MUL:
13876 case RISCVISD::MUL_VL:
13877 return RISCVISD::VWMULU_VL;
13878 case ISD::SHL:
13879 case RISCVISD::SHL_VL:
13880 return RISCVISD::VWSLL_VL;
13881 default:
13882 llvm_unreachable("Unexpected opcode");
13883 }
13884 }
13885
13886 /// Get the opcode to materialize:
13887 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13888 static unsigned getFPExtOpcode(unsigned Opcode) {
13889 switch (Opcode) {
13890 case RISCVISD::FADD_VL:
13892 return RISCVISD::VFWADD_VL;
13893 case RISCVISD::FSUB_VL:
13895 return RISCVISD::VFWSUB_VL;
13896 case RISCVISD::FMUL_VL:
13897 return RISCVISD::VFWMUL_VL;
13898 default:
13899 llvm_unreachable("Unexpected opcode");
13900 }
13901 }
13902
13903 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13904 /// newOpcode(a, b).
13905 static unsigned getSUOpcode(unsigned Opcode) {
13906 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13907 "SU is only supported for MUL");
13908 return RISCVISD::VWMULSU_VL;
13909 }
13910
13911 /// Get the opcode to materialize
13912 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
13913 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13914 switch (Opcode) {
13915 case ISD::ADD:
13916 case RISCVISD::ADD_VL:
13917 case ISD::OR:
13918 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13920 case ISD::SUB:
13921 case RISCVISD::SUB_VL:
13922 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13924 case RISCVISD::FADD_VL:
13925 return RISCVISD::VFWADD_W_VL;
13926 case RISCVISD::FSUB_VL:
13927 return RISCVISD::VFWSUB_W_VL;
13928 default:
13929 llvm_unreachable("Unexpected opcode");
13930 }
13931 }
13932
13933 using CombineToTry = std::function<std::optional<CombineResult>(
13934 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13935 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13936 const RISCVSubtarget &)>;
13937
13938 /// Check if this node needs to be fully folded or extended for all users.
13939 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13940
13941 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13942 const RISCVSubtarget &Subtarget) {
13943 unsigned Opc = OrigOperand.getOpcode();
13944 MVT VT = OrigOperand.getSimpleValueType();
13945
13946 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13947 "Unexpected Opcode");
13948
13949 // The pasthru must be undef for tail agnostic.
13950 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
13951 return;
13952
13953 // Get the scalar value.
13954 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
13955 : OrigOperand.getOperand(1);
13956
13957 // See if we have enough sign bits or zero bits in the scalar to use a
13958 // widening opcode by splatting to smaller element size.
13959 unsigned EltBits = VT.getScalarSizeInBits();
13960 unsigned ScalarBits = Op.getValueSizeInBits();
13961 // Make sure we're getting all element bits from the scalar register.
13962 // FIXME: Support implicit sign extension of vmv.v.x?
13963 if (ScalarBits < EltBits)
13964 return;
13965
13966 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13967 // If the narrow type cannot be expressed with a legal VMV,
13968 // this is not a valid candidate.
13969 if (NarrowSize < 8)
13970 return;
13971
13972 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13973 SupportsSExt = true;
13974
13975 if (DAG.MaskedValueIsZero(Op,
13976 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13977 SupportsZExt = true;
13978
13979 EnforceOneUse = false;
13980 }
13981
13982 /// Helper method to set the various fields of this struct based on the
13983 /// type of \p Root.
13984 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13985 const RISCVSubtarget &Subtarget) {
13986 SupportsZExt = false;
13987 SupportsSExt = false;
13988 SupportsFPExt = false;
13989 EnforceOneUse = true;
13990 unsigned Opc = OrigOperand.getOpcode();
13991 // For the nodes we handle below, we end up using their inputs directly: see
13992 // getSource(). However since they either don't have a passthru or we check
13993 // that their passthru is undef, we can safely ignore their mask and VL.
13994 switch (Opc) {
13995 case ISD::ZERO_EXTEND:
13996 case ISD::SIGN_EXTEND: {
13997 MVT VT = OrigOperand.getSimpleValueType();
13998 if (!VT.isVector())
13999 break;
14000
14001 SDValue NarrowElt = OrigOperand.getOperand(0);
14002 MVT NarrowVT = NarrowElt.getSimpleValueType();
14003 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14004 if (NarrowVT.getVectorElementType() == MVT::i1)
14005 break;
14006
14007 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14008 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14009 break;
14010 }
14011 case RISCVISD::VZEXT_VL:
14012 SupportsZExt = true;
14013 break;
14014 case RISCVISD::VSEXT_VL:
14015 SupportsSExt = true;
14016 break;
14018 SupportsFPExt = true;
14019 break;
14020 case ISD::SPLAT_VECTOR:
14022 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14023 break;
14024 default:
14025 break;
14026 }
14027 }
14028
14029 /// Check if \p Root supports any extension folding combines.
14030 static bool isSupportedRoot(const SDNode *Root,
14031 const RISCVSubtarget &Subtarget) {
14032 switch (Root->getOpcode()) {
14033 case ISD::ADD:
14034 case ISD::SUB:
14035 case ISD::MUL: {
14036 return Root->getValueType(0).isScalableVector();
14037 }
14038 case ISD::OR: {
14039 return Root->getValueType(0).isScalableVector() &&
14040 Root->getFlags().hasDisjoint();
14041 }
14042 // Vector Widening Integer Add/Sub/Mul Instructions
14043 case RISCVISD::ADD_VL:
14044 case RISCVISD::MUL_VL:
14047 case RISCVISD::SUB_VL:
14050 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14051 case RISCVISD::FADD_VL:
14052 case RISCVISD::FSUB_VL:
14053 case RISCVISD::FMUL_VL:
14056 return true;
14057 case ISD::SHL:
14058 return Root->getValueType(0).isScalableVector() &&
14059 Subtarget.hasStdExtZvbb();
14060 case RISCVISD::SHL_VL:
14061 return Subtarget.hasStdExtZvbb();
14062 default:
14063 return false;
14064 }
14065 }
14066
14067 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14068 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14069 const RISCVSubtarget &Subtarget) {
14070 assert(isSupportedRoot(Root, Subtarget) &&
14071 "Trying to build an helper with an "
14072 "unsupported root");
14073 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14075 OrigOperand = Root->getOperand(OperandIdx);
14076
14077 unsigned Opc = Root->getOpcode();
14078 switch (Opc) {
14079 // We consider
14080 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14081 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14082 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14089 if (OperandIdx == 1) {
14090 SupportsZExt =
14092 SupportsSExt =
14094 SupportsFPExt =
14096 // There's no existing extension here, so we don't have to worry about
14097 // making sure it gets removed.
14098 EnforceOneUse = false;
14099 break;
14100 }
14101 [[fallthrough]];
14102 default:
14103 fillUpExtensionSupport(Root, DAG, Subtarget);
14104 break;
14105 }
14106 }
14107
14108 /// Helper function to get the Mask and VL from \p Root.
14109 static std::pair<SDValue, SDValue>
14110 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14111 const RISCVSubtarget &Subtarget) {
14112 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14113 switch (Root->getOpcode()) {
14114 case ISD::ADD:
14115 case ISD::SUB:
14116 case ISD::MUL:
14117 case ISD::OR:
14118 case ISD::SHL: {
14119 SDLoc DL(Root);
14120 MVT VT = Root->getSimpleValueType(0);
14121 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14122 }
14123 default:
14124 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14125 }
14126 }
14127
14128 /// Helper function to check if \p N is commutative with respect to the
14129 /// foldings that are supported by this class.
14130 static bool isCommutative(const SDNode *N) {
14131 switch (N->getOpcode()) {
14132 case ISD::ADD:
14133 case ISD::MUL:
14134 case ISD::OR:
14135 case RISCVISD::ADD_VL:
14136 case RISCVISD::MUL_VL:
14139 case RISCVISD::FADD_VL:
14140 case RISCVISD::FMUL_VL:
14142 return true;
14143 case ISD::SUB:
14144 case RISCVISD::SUB_VL:
14147 case RISCVISD::FSUB_VL:
14149 case ISD::SHL:
14150 case RISCVISD::SHL_VL:
14151 return false;
14152 default:
14153 llvm_unreachable("Unexpected opcode");
14154 }
14155 }
14156
14157 /// Get a list of combine to try for folding extensions in \p Root.
14158 /// Note that each returned CombineToTry function doesn't actually modify
14159 /// anything. Instead they produce an optional CombineResult that if not None,
14160 /// need to be materialized for the combine to be applied.
14161 /// \see CombineResult::materialize.
14162 /// If the related CombineToTry function returns std::nullopt, that means the
14163 /// combine didn't match.
14164 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14165};
14166
14167/// Helper structure that holds all the necessary information to materialize a
14168/// combine that does some extension folding.
14169struct CombineResult {
14170 /// Opcode to be generated when materializing the combine.
14171 unsigned TargetOpcode;
14172 // No value means no extension is needed.
14173 std::optional<ExtKind> LHSExt;
14174 std::optional<ExtKind> RHSExt;
14175 /// Root of the combine.
14176 SDNode *Root;
14177 /// LHS of the TargetOpcode.
14178 NodeExtensionHelper LHS;
14179 /// RHS of the TargetOpcode.
14180 NodeExtensionHelper RHS;
14181
14182 CombineResult(unsigned TargetOpcode, SDNode *Root,
14183 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14184 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14185 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14186 LHS(LHS), RHS(RHS) {}
14187
14188 /// Return a value that uses TargetOpcode and that can be used to replace
14189 /// Root.
14190 /// The actual replacement is *not* done in that method.
14191 SDValue materialize(SelectionDAG &DAG,
14192 const RISCVSubtarget &Subtarget) const {
14193 SDValue Mask, VL, Merge;
14194 std::tie(Mask, VL) =
14195 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14196 switch (Root->getOpcode()) {
14197 default:
14198 Merge = Root->getOperand(2);
14199 break;
14200 case ISD::ADD:
14201 case ISD::SUB:
14202 case ISD::MUL:
14203 case ISD::OR:
14204 case ISD::SHL:
14205 Merge = DAG.getUNDEF(Root->getValueType(0));
14206 break;
14207 }
14208 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14209 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14210 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14211 Merge, Mask, VL);
14212 }
14213};
14214
14215/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14216/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14217/// are zext) and LHS and RHS can be folded into Root.
14218/// AllowExtMask define which form `ext` can take in this pattern.
14219///
14220/// \note If the pattern can match with both zext and sext, the returned
14221/// CombineResult will feature the zext result.
14222///
14223/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14224/// can be used to apply the pattern.
14225static std::optional<CombineResult>
14226canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14227 const NodeExtensionHelper &RHS,
14228 uint8_t AllowExtMask, SelectionDAG &DAG,
14229 const RISCVSubtarget &Subtarget) {
14230 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14231 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14232 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14233 /*RHSExt=*/{ExtKind::ZExt});
14234 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14235 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14236 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14237 /*RHSExt=*/{ExtKind::SExt});
14238 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14239 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14240 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14241 /*RHSExt=*/{ExtKind::FPExt});
14242 return std::nullopt;
14243}
14244
14245/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14246/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14247/// are zext) and LHS and RHS can be folded into Root.
14248///
14249/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14250/// can be used to apply the pattern.
14251static std::optional<CombineResult>
14252canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14253 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14254 const RISCVSubtarget &Subtarget) {
14255 return canFoldToVWWithSameExtensionImpl(
14256 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14257 Subtarget);
14258}
14259
14260/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14261///
14262/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14263/// can be used to apply the pattern.
14264static std::optional<CombineResult>
14265canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14266 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14267 const RISCVSubtarget &Subtarget) {
14268 if (RHS.SupportsFPExt)
14269 return CombineResult(
14270 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14271 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14272
14273 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14274 // sext/zext?
14275 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14276 // purposes.
14277 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14278 return CombineResult(
14279 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14280 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14281 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14282 return CombineResult(
14283 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14284 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14285 return std::nullopt;
14286}
14287
14288/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14289///
14290/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14291/// can be used to apply the pattern.
14292static std::optional<CombineResult>
14293canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14294 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14295 const RISCVSubtarget &Subtarget) {
14296 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14297 Subtarget);
14298}
14299
14300/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14301///
14302/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14303/// can be used to apply the pattern.
14304static std::optional<CombineResult>
14305canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14306 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14307 const RISCVSubtarget &Subtarget) {
14308 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14309 Subtarget);
14310}
14311
14312/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14313///
14314/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14315/// can be used to apply the pattern.
14316static std::optional<CombineResult>
14317canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14318 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14319 const RISCVSubtarget &Subtarget) {
14320 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14321 Subtarget);
14322}
14323
14324/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14325///
14326/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14327/// can be used to apply the pattern.
14328static std::optional<CombineResult>
14329canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14330 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14331 const RISCVSubtarget &Subtarget) {
14332
14333 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14334 return std::nullopt;
14335 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14336 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14337 /*RHSExt=*/{ExtKind::ZExt});
14338}
14339
14341NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14342 SmallVector<CombineToTry> Strategies;
14343 switch (Root->getOpcode()) {
14344 case ISD::ADD:
14345 case ISD::SUB:
14346 case ISD::OR:
14347 case RISCVISD::ADD_VL:
14348 case RISCVISD::SUB_VL:
14349 case RISCVISD::FADD_VL:
14350 case RISCVISD::FSUB_VL:
14351 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14352 Strategies.push_back(canFoldToVWWithSameExtension);
14353 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14354 Strategies.push_back(canFoldToVW_W);
14355 break;
14356 case RISCVISD::FMUL_VL:
14357 Strategies.push_back(canFoldToVWWithSameExtension);
14358 break;
14359 case ISD::MUL:
14360 case RISCVISD::MUL_VL:
14361 // mul -> vwmul(u)
14362 Strategies.push_back(canFoldToVWWithSameExtension);
14363 // mul -> vwmulsu
14364 Strategies.push_back(canFoldToVW_SU);
14365 break;
14366 case ISD::SHL:
14367 case RISCVISD::SHL_VL:
14368 // shl -> vwsll
14369 Strategies.push_back(canFoldToVWWithZEXT);
14370 break;
14373 // vwadd_w|vwsub_w -> vwadd|vwsub
14374 Strategies.push_back(canFoldToVWWithSEXT);
14375 break;
14378 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14379 Strategies.push_back(canFoldToVWWithZEXT);
14380 break;
14383 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14384 Strategies.push_back(canFoldToVWWithFPEXT);
14385 break;
14386 default:
14387 llvm_unreachable("Unexpected opcode");
14388 }
14389 return Strategies;
14390}
14391} // End anonymous namespace.
14392
14393/// Combine a binary operation to its equivalent VW or VW_W form.
14394/// The supported combines are:
14395/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14396/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14397/// mul | mul_vl -> vwmul(u) | vwmul_su
14398/// shl | shl_vl -> vwsll
14399/// fadd_vl -> vfwadd | vfwadd_w
14400/// fsub_vl -> vfwsub | vfwsub_w
14401/// fmul_vl -> vfwmul
14402/// vwadd_w(u) -> vwadd(u)
14403/// vwsub_w(u) -> vwsub(u)
14404/// vfwadd_w -> vfwadd
14405/// vfwsub_w -> vfwsub
14408 const RISCVSubtarget &Subtarget) {
14409 SelectionDAG &DAG = DCI.DAG;
14410 if (DCI.isBeforeLegalize())
14411 return SDValue();
14412
14413 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14414 return SDValue();
14415
14416 SmallVector<SDNode *> Worklist;
14417 SmallSet<SDNode *, 8> Inserted;
14418 Worklist.push_back(N);
14419 Inserted.insert(N);
14420 SmallVector<CombineResult> CombinesToApply;
14421
14422 while (!Worklist.empty()) {
14423 SDNode *Root = Worklist.pop_back_val();
14424 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14425 return SDValue();
14426
14427 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14428 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14429 auto AppendUsersIfNeeded = [&Worklist,
14430 &Inserted](const NodeExtensionHelper &Op) {
14431 if (Op.needToPromoteOtherUsers()) {
14432 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14433 if (Inserted.insert(TheUse).second)
14434 Worklist.push_back(TheUse);
14435 }
14436 }
14437 };
14438
14439 // Control the compile time by limiting the number of node we look at in
14440 // total.
14441 if (Inserted.size() > ExtensionMaxWebSize)
14442 return SDValue();
14443
14445 NodeExtensionHelper::getSupportedFoldings(N);
14446
14447 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14448 bool Matched = false;
14449 for (int Attempt = 0;
14450 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14451 ++Attempt) {
14452
14453 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14454 FoldingStrategies) {
14455 std::optional<CombineResult> Res =
14456 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14457 if (Res) {
14458 Matched = true;
14459 CombinesToApply.push_back(*Res);
14460 // All the inputs that are extended need to be folded, otherwise
14461 // we would be leaving the old input (since it is may still be used),
14462 // and the new one.
14463 if (Res->LHSExt.has_value())
14464 AppendUsersIfNeeded(LHS);
14465 if (Res->RHSExt.has_value())
14466 AppendUsersIfNeeded(RHS);
14467 break;
14468 }
14469 }
14470 std::swap(LHS, RHS);
14471 }
14472 // Right now we do an all or nothing approach.
14473 if (!Matched)
14474 return SDValue();
14475 }
14476 // Store the value for the replacement of the input node separately.
14477 SDValue InputRootReplacement;
14478 // We do the RAUW after we materialize all the combines, because some replaced
14479 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14480 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14481 // yet-to-be-visited CombinesToApply roots.
14483 ValuesToReplace.reserve(CombinesToApply.size());
14484 for (CombineResult Res : CombinesToApply) {
14485 SDValue NewValue = Res.materialize(DAG, Subtarget);
14486 if (!InputRootReplacement) {
14487 assert(Res.Root == N &&
14488 "First element is expected to be the current node");
14489 InputRootReplacement = NewValue;
14490 } else {
14491 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14492 }
14493 }
14494 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14495 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14496 DCI.AddToWorklist(OldNewValues.second.getNode());
14497 }
14498 return InputRootReplacement;
14499}
14500
14501// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14502// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14503// y will be the Passthru and cond will be the Mask.
14505 unsigned Opc = N->getOpcode();
14508
14509 SDValue Y = N->getOperand(0);
14510 SDValue MergeOp = N->getOperand(1);
14511 unsigned MergeOpc = MergeOp.getOpcode();
14512
14513 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14514 return SDValue();
14515
14516 SDValue X = MergeOp->getOperand(1);
14517
14518 if (!MergeOp.hasOneUse())
14519 return SDValue();
14520
14521 // Passthru should be undef
14522 SDValue Passthru = N->getOperand(2);
14523 if (!Passthru.isUndef())
14524 return SDValue();
14525
14526 // Mask should be all ones
14527 SDValue Mask = N->getOperand(3);
14528 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14529 return SDValue();
14530
14531 // False value of MergeOp should be all zeros
14532 SDValue Z = MergeOp->getOperand(2);
14533
14534 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14535 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14536 Z = Z.getOperand(1);
14537
14538 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14539 return SDValue();
14540
14541 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14542 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14543 N->getFlags());
14544}
14545
14548 const RISCVSubtarget &Subtarget) {
14549 [[maybe_unused]] unsigned Opc = N->getOpcode();
14552
14553 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14554 return V;
14555
14556 return combineVWADDSUBWSelect(N, DCI.DAG);
14557}
14558
14559// Helper function for performMemPairCombine.
14560// Try to combine the memory loads/stores LSNode1 and LSNode2
14561// into a single memory pair operation.
14563 LSBaseSDNode *LSNode2, SDValue BasePtr,
14564 uint64_t Imm) {
14566 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14567
14568 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14569 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14570 return SDValue();
14571
14573 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14574
14575 // The new operation has twice the width.
14576 MVT XLenVT = Subtarget.getXLenVT();
14577 EVT MemVT = LSNode1->getMemoryVT();
14578 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14579 MachineMemOperand *MMO = LSNode1->getMemOperand();
14581 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14582
14583 if (LSNode1->getOpcode() == ISD::LOAD) {
14584 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14585 unsigned Opcode;
14586 if (MemVT == MVT::i32)
14587 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14588 else
14589 Opcode = RISCVISD::TH_LDD;
14590
14591 SDValue Res = DAG.getMemIntrinsicNode(
14592 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14593 {LSNode1->getChain(), BasePtr,
14594 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14595 NewMemVT, NewMMO);
14596
14597 SDValue Node1 =
14598 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14599 SDValue Node2 =
14600 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14601
14602 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14603 return Node1;
14604 } else {
14605 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14606
14607 SDValue Res = DAG.getMemIntrinsicNode(
14608 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14609 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14610 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14611 NewMemVT, NewMMO);
14612
14613 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14614 return Res;
14615 }
14616}
14617
14618// Try to combine two adjacent loads/stores to a single pair instruction from
14619// the XTHeadMemPair vendor extension.
14622 SelectionDAG &DAG = DCI.DAG;
14624 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14625
14626 // Target does not support load/store pair.
14627 if (!Subtarget.hasVendorXTHeadMemPair())
14628 return SDValue();
14629
14630 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14631 EVT MemVT = LSNode1->getMemoryVT();
14632 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14633
14634 // No volatile, indexed or atomic loads/stores.
14635 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14636 return SDValue();
14637
14638 // Function to get a base + constant representation from a memory value.
14639 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14640 if (Ptr->getOpcode() == ISD::ADD)
14641 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14642 return {Ptr->getOperand(0), C1->getZExtValue()};
14643 return {Ptr, 0};
14644 };
14645
14646 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14647
14648 SDValue Chain = N->getOperand(0);
14649 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14650 UI != UE; ++UI) {
14651 SDUse &Use = UI.getUse();
14652 if (Use.getUser() != N && Use.getResNo() == 0 &&
14653 Use.getUser()->getOpcode() == N->getOpcode()) {
14654 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14655
14656 // No volatile, indexed or atomic loads/stores.
14657 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14658 continue;
14659
14660 // Check if LSNode1 and LSNode2 have the same type and extension.
14661 if (LSNode1->getOpcode() == ISD::LOAD)
14662 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14663 cast<LoadSDNode>(LSNode1)->getExtensionType())
14664 continue;
14665
14666 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14667 continue;
14668
14669 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14670
14671 // Check if the base pointer is the same for both instruction.
14672 if (Base1 != Base2)
14673 continue;
14674
14675 // Check if the offsets match the XTHeadMemPair encoding contraints.
14676 bool Valid = false;
14677 if (MemVT == MVT::i32) {
14678 // Check for adjacent i32 values and a 2-bit index.
14679 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14680 Valid = true;
14681 } else if (MemVT == MVT::i64) {
14682 // Check for adjacent i64 values and a 2-bit index.
14683 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14684 Valid = true;
14685 }
14686
14687 if (!Valid)
14688 continue;
14689
14690 // Try to combine.
14691 if (SDValue Res =
14692 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14693 return Res;
14694 }
14695 }
14696
14697 return SDValue();
14698}
14699
14700// Fold
14701// (fp_to_int (froundeven X)) -> fcvt X, rne
14702// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14703// (fp_to_int (ffloor X)) -> fcvt X, rdn
14704// (fp_to_int (fceil X)) -> fcvt X, rup
14705// (fp_to_int (fround X)) -> fcvt X, rmm
14706// (fp_to_int (frint X)) -> fcvt X
14709 const RISCVSubtarget &Subtarget) {
14710 SelectionDAG &DAG = DCI.DAG;
14711 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14712 MVT XLenVT = Subtarget.getXLenVT();
14713
14714 SDValue Src = N->getOperand(0);
14715
14716 // Don't do this for strict-fp Src.
14717 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14718 return SDValue();
14719
14720 // Ensure the FP type is legal.
14721 if (!TLI.isTypeLegal(Src.getValueType()))
14722 return SDValue();
14723
14724 // Don't do this for f16 with Zfhmin and not Zfh.
14725 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14726 return SDValue();
14727
14728 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14729 // If the result is invalid, we didn't find a foldable instruction.
14730 if (FRM == RISCVFPRndMode::Invalid)
14731 return SDValue();
14732
14733 SDLoc DL(N);
14734 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14735 EVT VT = N->getValueType(0);
14736
14737 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14738 MVT SrcVT = Src.getSimpleValueType();
14739 MVT SrcContainerVT = SrcVT;
14740 MVT ContainerVT = VT.getSimpleVT();
14741 SDValue XVal = Src.getOperand(0);
14742
14743 // For widening and narrowing conversions we just combine it into a
14744 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14745 // end up getting lowered to their appropriate pseudo instructions based on
14746 // their operand types
14747 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14748 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14749 return SDValue();
14750
14751 // Make fixed-length vectors scalable first
14752 if (SrcVT.isFixedLengthVector()) {
14753 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14754 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14755 ContainerVT =
14756 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14757 }
14758
14759 auto [Mask, VL] =
14760 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14761
14762 SDValue FpToInt;
14763 if (FRM == RISCVFPRndMode::RTZ) {
14764 // Use the dedicated trunc static rounding mode if we're truncating so we
14765 // don't need to generate calls to fsrmi/fsrm
14766 unsigned Opc =
14768 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14769 } else if (FRM == RISCVFPRndMode::DYN) {
14770 unsigned Opc =
14772 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14773 } else {
14774 unsigned Opc =
14776 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14777 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14778 }
14779
14780 // If converted from fixed-length to scalable, convert back
14781 if (VT.isFixedLengthVector())
14782 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14783
14784 return FpToInt;
14785 }
14786
14787 // Only handle XLen or i32 types. Other types narrower than XLen will
14788 // eventually be legalized to XLenVT.
14789 if (VT != MVT::i32 && VT != XLenVT)
14790 return SDValue();
14791
14792 unsigned Opc;
14793 if (VT == XLenVT)
14794 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14795 else
14797
14798 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14799 DAG.getTargetConstant(FRM, DL, XLenVT));
14800 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14801}
14802
14803// Fold
14804// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14805// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14806// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14807// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14808// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14809// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14812 const RISCVSubtarget &Subtarget) {
14813 SelectionDAG &DAG = DCI.DAG;
14814 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14815 MVT XLenVT = Subtarget.getXLenVT();
14816
14817 // Only handle XLen types. Other types narrower than XLen will eventually be
14818 // legalized to XLenVT.
14819 EVT DstVT = N->getValueType(0);
14820 if (DstVT != XLenVT)
14821 return SDValue();
14822
14823 SDValue Src = N->getOperand(0);
14824
14825 // Don't do this for strict-fp Src.
14826 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14827 return SDValue();
14828
14829 // Ensure the FP type is also legal.
14830 if (!TLI.isTypeLegal(Src.getValueType()))
14831 return SDValue();
14832
14833 // Don't do this for f16 with Zfhmin and not Zfh.
14834 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14835 return SDValue();
14836
14837 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14838
14839 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14840 if (FRM == RISCVFPRndMode::Invalid)
14841 return SDValue();
14842
14843 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14844
14845 unsigned Opc;
14846 if (SatVT == DstVT)
14847 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14848 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14850 else
14851 return SDValue();
14852 // FIXME: Support other SatVTs by clamping before or after the conversion.
14853
14854 Src = Src.getOperand(0);
14855
14856 SDLoc DL(N);
14857 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
14858 DAG.getTargetConstant(FRM, DL, XLenVT));
14859
14860 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14861 // extend.
14862 if (Opc == RISCVISD::FCVT_WU_RV64)
14863 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14864
14865 // RISC-V FP-to-int conversions saturate to the destination register size, but
14866 // don't produce 0 for nan.
14867 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14868 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14869}
14870
14871// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14872// smaller than XLenVT.
14874 const RISCVSubtarget &Subtarget) {
14875 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14876
14877 SDValue Src = N->getOperand(0);
14878 if (Src.getOpcode() != ISD::BSWAP)
14879 return SDValue();
14880
14881 EVT VT = N->getValueType(0);
14882 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14883 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14884 return SDValue();
14885
14886 SDLoc DL(N);
14887 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14888}
14889
14890// Convert from one FMA opcode to another based on whether we are negating the
14891// multiply result and/or the accumulator.
14892// NOTE: Only supports RVV operations with VL.
14893static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14894 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14895 if (NegMul) {
14896 // clang-format off
14897 switch (Opcode) {
14898 default: llvm_unreachable("Unexpected opcode");
14899 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14900 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14901 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14902 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14907 }
14908 // clang-format on
14909 }
14910
14911 // Negating the accumulator changes ADD<->SUB.
14912 if (NegAcc) {
14913 // clang-format off
14914 switch (Opcode) {
14915 default: llvm_unreachable("Unexpected opcode");
14916 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14917 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14918 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14919 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14924 }
14925 // clang-format on
14926 }
14927
14928 return Opcode;
14929}
14930
14932 // Fold FNEG_VL into FMA opcodes.
14933 // The first operand of strict-fp is chain.
14934 unsigned Offset = N->isTargetStrictFPOpcode();
14935 SDValue A = N->getOperand(0 + Offset);
14936 SDValue B = N->getOperand(1 + Offset);
14937 SDValue C = N->getOperand(2 + Offset);
14938 SDValue Mask = N->getOperand(3 + Offset);
14939 SDValue VL = N->getOperand(4 + Offset);
14940
14941 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14942 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
14943 V.getOperand(2) == VL) {
14944 // Return the negated input.
14945 V = V.getOperand(0);
14946 return true;
14947 }
14948
14949 return false;
14950 };
14951
14952 bool NegA = invertIfNegative(A);
14953 bool NegB = invertIfNegative(B);
14954 bool NegC = invertIfNegative(C);
14955
14956 // If no operands are negated, we're done.
14957 if (!NegA && !NegB && !NegC)
14958 return SDValue();
14959
14960 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
14961 if (N->isTargetStrictFPOpcode())
14962 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
14963 {N->getOperand(0), A, B, C, Mask, VL});
14964 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
14965 VL);
14966}
14967
14969 const RISCVSubtarget &Subtarget) {
14971 return V;
14972
14973 if (N->getValueType(0).isScalableVector() &&
14974 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14975 (Subtarget.hasVInstructionsF16Minimal() &&
14976 !Subtarget.hasVInstructionsF16())) {
14977 return SDValue();
14978 }
14979
14980 // FIXME: Ignore strict opcodes for now.
14981 if (N->isTargetStrictFPOpcode())
14982 return SDValue();
14983
14984 // Try to form widening FMA.
14985 SDValue Op0 = N->getOperand(0);
14986 SDValue Op1 = N->getOperand(1);
14987 SDValue Mask = N->getOperand(3);
14988 SDValue VL = N->getOperand(4);
14989
14990 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14992 return SDValue();
14993
14994 // TODO: Refactor to handle more complex cases similar to
14995 // combineBinOp_VLToVWBinOp_VL.
14996 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14997 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14998 return SDValue();
14999
15000 // Check the mask and VL are the same.
15001 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15002 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15003 return SDValue();
15004
15005 unsigned NewOpc;
15006 switch (N->getOpcode()) {
15007 default:
15008 llvm_unreachable("Unexpected opcode");
15010 NewOpc = RISCVISD::VFWMADD_VL;
15011 break;
15013 NewOpc = RISCVISD::VFWNMSUB_VL;
15014 break;
15016 NewOpc = RISCVISD::VFWNMADD_VL;
15017 break;
15019 NewOpc = RISCVISD::VFWMSUB_VL;
15020 break;
15021 }
15022
15023 Op0 = Op0.getOperand(0);
15024 Op1 = Op1.getOperand(0);
15025
15026 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15027 N->getOperand(2), Mask, VL);
15028}
15029
15031 const RISCVSubtarget &Subtarget) {
15032 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15033
15034 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15035 return SDValue();
15036
15037 if (!isa<ConstantSDNode>(N->getOperand(1)))
15038 return SDValue();
15039 uint64_t ShAmt = N->getConstantOperandVal(1);
15040 if (ShAmt > 32)
15041 return SDValue();
15042
15043 SDValue N0 = N->getOperand(0);
15044
15045 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15046 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15047 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15048 if (ShAmt < 32 &&
15049 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15050 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15051 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15052 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15053 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15054 if (LShAmt < 32) {
15055 SDLoc ShlDL(N0.getOperand(0));
15056 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15057 N0.getOperand(0).getOperand(0),
15058 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15059 SDLoc DL(N);
15060 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15061 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15062 }
15063 }
15064
15065 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15066 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15067 //
15068 // Also try these folds where an add or sub is in the middle.
15069 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15070 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15071 SDValue Shl;
15072 ConstantSDNode *AddC = nullptr;
15073
15074 // We might have an ADD or SUB between the SRA and SHL.
15075 bool IsAdd = N0.getOpcode() == ISD::ADD;
15076 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15077 // Other operand needs to be a constant we can modify.
15078 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15079 if (!AddC)
15080 return SDValue();
15081
15082 // AddC needs to have at least 32 trailing zeros.
15083 if (AddC->getAPIntValue().countr_zero() < 32)
15084 return SDValue();
15085
15086 // All users should be a shift by constant less than or equal to 32. This
15087 // ensures we'll do this optimization for each of them to produce an
15088 // add/sub+sext_inreg they can all share.
15089 for (SDNode *U : N0->uses()) {
15090 if (U->getOpcode() != ISD::SRA ||
15091 !isa<ConstantSDNode>(U->getOperand(1)) ||
15092 U->getConstantOperandVal(1) > 32)
15093 return SDValue();
15094 }
15095
15096 Shl = N0.getOperand(IsAdd ? 0 : 1);
15097 } else {
15098 // Not an ADD or SUB.
15099 Shl = N0;
15100 }
15101
15102 // Look for a shift left by 32.
15103 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15104 Shl.getConstantOperandVal(1) != 32)
15105 return SDValue();
15106
15107 // We if we didn't look through an add/sub, then the shl should have one use.
15108 // If we did look through an add/sub, the sext_inreg we create is free so
15109 // we're only creating 2 new instructions. It's enough to only remove the
15110 // original sra+add/sub.
15111 if (!AddC && !Shl.hasOneUse())
15112 return SDValue();
15113
15114 SDLoc DL(N);
15115 SDValue In = Shl.getOperand(0);
15116
15117 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15118 // constant.
15119 if (AddC) {
15120 SDValue ShiftedAddC =
15121 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15122 if (IsAdd)
15123 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15124 else
15125 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15126 }
15127
15128 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15129 DAG.getValueType(MVT::i32));
15130 if (ShAmt == 32)
15131 return SExt;
15132
15133 return DAG.getNode(
15134 ISD::SHL, DL, MVT::i64, SExt,
15135 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15136}
15137
15138// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15139// the result is used as the conditon of a br_cc or select_cc we can invert,
15140// inverting the setcc is free, and Z is 0/1. Caller will invert the
15141// br_cc/select_cc.
15143 bool IsAnd = Cond.getOpcode() == ISD::AND;
15144 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15145 return SDValue();
15146
15147 if (!Cond.hasOneUse())
15148 return SDValue();
15149
15150 SDValue Setcc = Cond.getOperand(0);
15151 SDValue Xor = Cond.getOperand(1);
15152 // Canonicalize setcc to LHS.
15153 if (Setcc.getOpcode() != ISD::SETCC)
15154 std::swap(Setcc, Xor);
15155 // LHS should be a setcc and RHS should be an xor.
15156 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15157 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15158 return SDValue();
15159
15160 // If the condition is an And, SimplifyDemandedBits may have changed
15161 // (xor Z, 1) to (not Z).
15162 SDValue Xor1 = Xor.getOperand(1);
15163 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15164 return SDValue();
15165
15166 EVT VT = Cond.getValueType();
15167 SDValue Xor0 = Xor.getOperand(0);
15168
15169 // The LHS of the xor needs to be 0/1.
15171 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15172 return SDValue();
15173
15174 // We can only invert integer setccs.
15175 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15176 if (!SetCCOpVT.isScalarInteger())
15177 return SDValue();
15178
15179 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15180 if (ISD::isIntEqualitySetCC(CCVal)) {
15181 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15182 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15183 Setcc.getOperand(1), CCVal);
15184 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15185 // Invert (setlt 0, X) by converting to (setlt X, 1).
15186 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15187 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15188 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15189 // (setlt X, 1) by converting to (setlt 0, X).
15190 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15191 DAG.getConstant(0, SDLoc(Setcc), VT),
15192 Setcc.getOperand(0), CCVal);
15193 } else
15194 return SDValue();
15195
15196 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15197 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15198}
15199
15200// Perform common combines for BR_CC and SELECT_CC condtions.
15201static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15202 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15203 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15204
15205 // As far as arithmetic right shift always saves the sign,
15206 // shift can be omitted.
15207 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15208 // setge (sra X, N), 0 -> setge X, 0
15209 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15210 LHS.getOpcode() == ISD::SRA) {
15211 LHS = LHS.getOperand(0);
15212 return true;
15213 }
15214
15215 if (!ISD::isIntEqualitySetCC(CCVal))
15216 return false;
15217
15218 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15219 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15220 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15221 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15222 // If we're looking for eq 0 instead of ne 0, we need to invert the
15223 // condition.
15224 bool Invert = CCVal == ISD::SETEQ;
15225 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15226 if (Invert)
15227 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15228
15229 RHS = LHS.getOperand(1);
15230 LHS = LHS.getOperand(0);
15231 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15232
15233 CC = DAG.getCondCode(CCVal);
15234 return true;
15235 }
15236
15237 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15238 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15239 RHS = LHS.getOperand(1);
15240 LHS = LHS.getOperand(0);
15241 return true;
15242 }
15243
15244 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15245 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15246 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15247 SDValue LHS0 = LHS.getOperand(0);
15248 if (LHS0.getOpcode() == ISD::AND &&
15249 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15250 uint64_t Mask = LHS0.getConstantOperandVal(1);
15251 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15252 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15253 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15254 CC = DAG.getCondCode(CCVal);
15255
15256 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15257 LHS = LHS0.getOperand(0);
15258 if (ShAmt != 0)
15259 LHS =
15260 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15261 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15262 return true;
15263 }
15264 }
15265 }
15266
15267 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15268 // This can occur when legalizing some floating point comparisons.
15269 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15270 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15271 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15272 CC = DAG.getCondCode(CCVal);
15273 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15274 return true;
15275 }
15276
15277 if (isNullConstant(RHS)) {
15278 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15279 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15280 CC = DAG.getCondCode(CCVal);
15281 LHS = NewCond;
15282 return true;
15283 }
15284 }
15285
15286 return false;
15287}
15288
15289// Fold
15290// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15291// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15292// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15293// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15295 SDValue TrueVal, SDValue FalseVal,
15296 bool Swapped) {
15297 bool Commutative = true;
15298 unsigned Opc = TrueVal.getOpcode();
15299 switch (Opc) {
15300 default:
15301 return SDValue();
15302 case ISD::SHL:
15303 case ISD::SRA:
15304 case ISD::SRL:
15305 case ISD::SUB:
15306 Commutative = false;
15307 break;
15308 case ISD::ADD:
15309 case ISD::OR:
15310 case ISD::XOR:
15311 break;
15312 }
15313
15314 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15315 return SDValue();
15316
15317 unsigned OpToFold;
15318 if (FalseVal == TrueVal.getOperand(0))
15319 OpToFold = 0;
15320 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15321 OpToFold = 1;
15322 else
15323 return SDValue();
15324
15325 EVT VT = N->getValueType(0);
15326 SDLoc DL(N);
15327 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15328 EVT OtherOpVT = OtherOp->getValueType(0);
15329 SDValue IdentityOperand =
15330 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15331 if (!Commutative)
15332 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15333 assert(IdentityOperand && "No identity operand!");
15334
15335 if (Swapped)
15336 std::swap(OtherOp, IdentityOperand);
15337 SDValue NewSel =
15338 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15339 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15340}
15341
15342// This tries to get rid of `select` and `icmp` that are being used to handle
15343// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15345 SDValue Cond = N->getOperand(0);
15346
15347 // This represents either CTTZ or CTLZ instruction.
15348 SDValue CountZeroes;
15349
15350 SDValue ValOnZero;
15351
15352 if (Cond.getOpcode() != ISD::SETCC)
15353 return SDValue();
15354
15355 if (!isNullConstant(Cond->getOperand(1)))
15356 return SDValue();
15357
15358 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15359 if (CCVal == ISD::CondCode::SETEQ) {
15360 CountZeroes = N->getOperand(2);
15361 ValOnZero = N->getOperand(1);
15362 } else if (CCVal == ISD::CondCode::SETNE) {
15363 CountZeroes = N->getOperand(1);
15364 ValOnZero = N->getOperand(2);
15365 } else {
15366 return SDValue();
15367 }
15368
15369 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15370 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15371 CountZeroes = CountZeroes.getOperand(0);
15372
15373 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15374 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15375 CountZeroes.getOpcode() != ISD::CTLZ &&
15376 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15377 return SDValue();
15378
15379 if (!isNullConstant(ValOnZero))
15380 return SDValue();
15381
15382 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15383 if (Cond->getOperand(0) != CountZeroesArgument)
15384 return SDValue();
15385
15386 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15387 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15388 CountZeroes.getValueType(), CountZeroesArgument);
15389 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15390 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15391 CountZeroes.getValueType(), CountZeroesArgument);
15392 }
15393
15394 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15395 SDValue BitWidthMinusOne =
15396 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15397
15398 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15399 CountZeroes, BitWidthMinusOne);
15400 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15401}
15402
15404 const RISCVSubtarget &Subtarget) {
15405 SDValue Cond = N->getOperand(0);
15406 SDValue True = N->getOperand(1);
15407 SDValue False = N->getOperand(2);
15408 SDLoc DL(N);
15409 EVT VT = N->getValueType(0);
15410 EVT CondVT = Cond.getValueType();
15411
15412 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15413 return SDValue();
15414
15415 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15416 // BEXTI, where C is power of 2.
15417 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15418 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15419 SDValue LHS = Cond.getOperand(0);
15420 SDValue RHS = Cond.getOperand(1);
15421 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15422 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15423 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15424 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15425 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15426 return DAG.getSelect(DL, VT,
15427 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15428 False, True);
15429 }
15430 }
15431 return SDValue();
15432}
15433
15435 const RISCVSubtarget &Subtarget) {
15436 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15437 return Folded;
15438
15439 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15440 return V;
15441
15442 if (Subtarget.hasConditionalMoveFusion())
15443 return SDValue();
15444
15445 SDValue TrueVal = N->getOperand(1);
15446 SDValue FalseVal = N->getOperand(2);
15447 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15448 return V;
15449 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15450}
15451
15452/// If we have a build_vector where each lane is binop X, C, where C
15453/// is a constant (but not necessarily the same constant on all lanes),
15454/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15455/// We assume that materializing a constant build vector will be no more
15456/// expensive that performing O(n) binops.
15458 const RISCVSubtarget &Subtarget,
15459 const RISCVTargetLowering &TLI) {
15460 SDLoc DL(N);
15461 EVT VT = N->getValueType(0);
15462
15463 assert(!VT.isScalableVector() && "unexpected build vector");
15464
15465 if (VT.getVectorNumElements() == 1)
15466 return SDValue();
15467
15468 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15469 if (!TLI.isBinOp(Opcode))
15470 return SDValue();
15471
15472 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15473 return SDValue();
15474
15475 // This BUILD_VECTOR involves an implicit truncation, and sinking
15476 // truncates through binops is non-trivial.
15477 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15478 return SDValue();
15479
15480 SmallVector<SDValue> LHSOps;
15481 SmallVector<SDValue> RHSOps;
15482 for (SDValue Op : N->ops()) {
15483 if (Op.isUndef()) {
15484 // We can't form a divide or remainder from undef.
15485 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15486 return SDValue();
15487
15488 LHSOps.push_back(Op);
15489 RHSOps.push_back(Op);
15490 continue;
15491 }
15492
15493 // TODO: We can handle operations which have an neutral rhs value
15494 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15495 // of profit in a more explicit manner.
15496 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15497 return SDValue();
15498
15499 LHSOps.push_back(Op.getOperand(0));
15500 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15501 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15502 return SDValue();
15503 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15504 // have different LHS and RHS types.
15505 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15506 return SDValue();
15507
15508 RHSOps.push_back(Op.getOperand(1));
15509 }
15510
15511 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15512 DAG.getBuildVector(VT, DL, RHSOps));
15513}
15514
15516 const RISCVSubtarget &Subtarget,
15517 const RISCVTargetLowering &TLI) {
15518 SDValue InVec = N->getOperand(0);
15519 SDValue InVal = N->getOperand(1);
15520 SDValue EltNo = N->getOperand(2);
15521 SDLoc DL(N);
15522
15523 EVT VT = InVec.getValueType();
15524 if (VT.isScalableVector())
15525 return SDValue();
15526
15527 if (!InVec.hasOneUse())
15528 return SDValue();
15529
15530 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15531 // move the insert_vector_elts into the arms of the binop. Note that
15532 // the new RHS must be a constant.
15533 const unsigned InVecOpcode = InVec->getOpcode();
15534 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15535 InVal.hasOneUse()) {
15536 SDValue InVecLHS = InVec->getOperand(0);
15537 SDValue InVecRHS = InVec->getOperand(1);
15538 SDValue InValLHS = InVal->getOperand(0);
15539 SDValue InValRHS = InVal->getOperand(1);
15540
15542 return SDValue();
15543 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15544 return SDValue();
15545 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15546 // have different LHS and RHS types.
15547 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15548 return SDValue();
15550 InVecLHS, InValLHS, EltNo);
15552 InVecRHS, InValRHS, EltNo);
15553 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15554 }
15555
15556 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15557 // move the insert_vector_elt to the source operand of the concat_vector.
15558 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15559 return SDValue();
15560
15561 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15562 if (!IndexC)
15563 return SDValue();
15564 unsigned Elt = IndexC->getZExtValue();
15565
15566 EVT ConcatVT = InVec.getOperand(0).getValueType();
15567 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15568 return SDValue();
15569 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15570 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15571
15572 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15573 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15574 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15575 ConcatOp, InVal, NewIdx);
15576
15577 SmallVector<SDValue> ConcatOps;
15578 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15579 ConcatOps[ConcatOpIdx] = ConcatOp;
15580 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15581}
15582
15583// If we're concatenating a series of vector loads like
15584// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15585// Then we can turn this into a strided load by widening the vector elements
15586// vlse32 p, stride=n
15588 const RISCVSubtarget &Subtarget,
15589 const RISCVTargetLowering &TLI) {
15590 SDLoc DL(N);
15591 EVT VT = N->getValueType(0);
15592
15593 // Only perform this combine on legal MVTs.
15594 if (!TLI.isTypeLegal(VT))
15595 return SDValue();
15596
15597 // TODO: Potentially extend this to scalable vectors
15598 if (VT.isScalableVector())
15599 return SDValue();
15600
15601 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15602 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15603 !SDValue(BaseLd, 0).hasOneUse())
15604 return SDValue();
15605
15606 EVT BaseLdVT = BaseLd->getValueType(0);
15607
15608 // Go through the loads and check that they're strided
15610 Lds.push_back(BaseLd);
15611 Align Align = BaseLd->getAlign();
15612 for (SDValue Op : N->ops().drop_front()) {
15613 auto *Ld = dyn_cast<LoadSDNode>(Op);
15614 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15615 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15616 Ld->getValueType(0) != BaseLdVT)
15617 return SDValue();
15618
15619 Lds.push_back(Ld);
15620
15621 // The common alignment is the most restrictive (smallest) of all the loads
15622 Align = std::min(Align, Ld->getAlign());
15623 }
15624
15625 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15626 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15627 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15628 // If the load ptrs can be decomposed into a common (Base + Index) with a
15629 // common constant stride, then return the constant stride.
15630 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15631 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15632 if (BIO1.equalBaseIndex(BIO2, DAG))
15633 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15634
15635 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15636 SDValue P1 = Ld1->getBasePtr();
15637 SDValue P2 = Ld2->getBasePtr();
15638 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15639 return {{P2.getOperand(1), false}};
15640 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15641 return {{P1.getOperand(1), true}};
15642
15643 return std::nullopt;
15644 };
15645
15646 // Get the distance between the first and second loads
15647 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15648 if (!BaseDiff)
15649 return SDValue();
15650
15651 // Check all the loads are the same distance apart
15652 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15653 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15654 return SDValue();
15655
15656 // TODO: At this point, we've successfully matched a generalized gather
15657 // load. Maybe we should emit that, and then move the specialized
15658 // matchers above and below into a DAG combine?
15659
15660 // Get the widened scalar type, e.g. v4i8 -> i64
15661 unsigned WideScalarBitWidth =
15662 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15663 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15664
15665 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15666 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15667 if (!TLI.isTypeLegal(WideVecVT))
15668 return SDValue();
15669
15670 // Check that the operation is legal
15671 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15672 return SDValue();
15673
15674 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15675 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15676 ? std::get<SDValue>(StrideVariant)
15677 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15678 Lds[0]->getOffset().getValueType());
15679 if (MustNegateStride)
15680 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15681
15682 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15683 SDValue IntID =
15684 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15685 Subtarget.getXLenVT());
15686
15687 SDValue AllOneMask =
15688 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15689 DAG.getConstant(1, DL, MVT::i1));
15690
15691 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15692 BaseLd->getBasePtr(), Stride, AllOneMask};
15693
15694 uint64_t MemSize;
15695 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15696 ConstStride && ConstStride->getSExtValue() >= 0)
15697 // total size = (elsize * n) + (stride - elsize) * (n-1)
15698 // = elsize + stride * (n-1)
15699 MemSize = WideScalarVT.getSizeInBits() +
15700 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15701 else
15702 // If Stride isn't constant, then we can't know how much it will load
15704
15706 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15707 Align);
15708
15709 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15710 Ops, WideVecVT, MMO);
15711 for (SDValue Ld : N->ops())
15712 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15713
15714 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15715}
15716
15718 const RISCVSubtarget &Subtarget) {
15719
15720 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15721
15722 if (N->getValueType(0).isFixedLengthVector())
15723 return SDValue();
15724
15725 SDValue Addend = N->getOperand(0);
15726 SDValue MulOp = N->getOperand(1);
15727
15728 if (N->getOpcode() == RISCVISD::ADD_VL) {
15729 SDValue AddMergeOp = N->getOperand(2);
15730 if (!AddMergeOp.isUndef())
15731 return SDValue();
15732 }
15733
15734 auto IsVWMulOpc = [](unsigned Opc) {
15735 switch (Opc) {
15736 case RISCVISD::VWMUL_VL:
15739 return true;
15740 default:
15741 return false;
15742 }
15743 };
15744
15745 if (!IsVWMulOpc(MulOp.getOpcode()))
15746 std::swap(Addend, MulOp);
15747
15748 if (!IsVWMulOpc(MulOp.getOpcode()))
15749 return SDValue();
15750
15751 SDValue MulMergeOp = MulOp.getOperand(2);
15752
15753 if (!MulMergeOp.isUndef())
15754 return SDValue();
15755
15756 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15757 const RISCVSubtarget &Subtarget) {
15758 if (N->getOpcode() == ISD::ADD) {
15759 SDLoc DL(N);
15760 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15761 Subtarget);
15762 }
15763 return std::make_pair(N->getOperand(3), N->getOperand(4));
15764 }(N, DAG, Subtarget);
15765
15766 SDValue MulMask = MulOp.getOperand(3);
15767 SDValue MulVL = MulOp.getOperand(4);
15768
15769 if (AddMask != MulMask || AddVL != MulVL)
15770 return SDValue();
15771
15772 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15773 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15774 "Unexpected opcode after VWMACC_VL");
15775 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15776 "Unexpected opcode after VWMACC_VL!");
15777 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15778 "Unexpected opcode after VWMUL_VL!");
15779 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15780 "Unexpected opcode after VWMUL_VL!");
15781
15782 SDLoc DL(N);
15783 EVT VT = N->getValueType(0);
15784 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15785 AddVL};
15786 return DAG.getNode(Opc, DL, VT, Ops);
15787}
15788
15790 ISD::MemIndexType &IndexType,
15792 if (!DCI.isBeforeLegalize())
15793 return false;
15794
15795 SelectionDAG &DAG = DCI.DAG;
15796 const MVT XLenVT =
15797 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15798
15799 const EVT IndexVT = Index.getValueType();
15800
15801 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15802 // mode, so anything else must be manually legalized.
15803 if (!isIndexTypeSigned(IndexType))
15804 return false;
15805
15806 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15807 // Any index legalization should first promote to XLenVT, so we don't lose
15808 // bits when scaling. This may create an illegal index type so we let
15809 // LLVM's legalization take care of the splitting.
15810 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15812 IndexVT.changeVectorElementType(XLenVT), Index);
15813 }
15814 IndexType = ISD::UNSIGNED_SCALED;
15815 return true;
15816}
15817
15818/// Match the index vector of a scatter or gather node as the shuffle mask
15819/// which performs the rearrangement if possible. Will only match if
15820/// all lanes are touched, and thus replacing the scatter or gather with
15821/// a unit strided access and shuffle is legal.
15823 SmallVector<int> &ShuffleMask) {
15824 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15825 return false;
15827 return false;
15828
15829 const unsigned ElementSize = VT.getScalarStoreSize();
15830 const unsigned NumElems = VT.getVectorNumElements();
15831
15832 // Create the shuffle mask and check all bits active
15833 assert(ShuffleMask.empty());
15834 BitVector ActiveLanes(NumElems);
15835 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15836 // TODO: We've found an active bit of UB, and could be
15837 // more aggressive here if desired.
15838 if (Index->getOperand(i)->isUndef())
15839 return false;
15840 uint64_t C = Index->getConstantOperandVal(i);
15841 if (C % ElementSize != 0)
15842 return false;
15843 C = C / ElementSize;
15844 if (C >= NumElems)
15845 return false;
15846 ShuffleMask.push_back(C);
15847 ActiveLanes.set(C);
15848 }
15849 return ActiveLanes.all();
15850}
15851
15852/// Match the index of a gather or scatter operation as an operation
15853/// with twice the element width and half the number of elements. This is
15854/// generally profitable (if legal) because these operations are linear
15855/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15856/// come out ahead.
15858 Align BaseAlign, const RISCVSubtarget &ST) {
15859 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15860 return false;
15862 return false;
15863
15864 // Attempt a doubling. If we can use a element type 4x or 8x in
15865 // size, this will happen via multiply iterations of the transform.
15866 const unsigned NumElems = VT.getVectorNumElements();
15867 if (NumElems % 2 != 0)
15868 return false;
15869
15870 const unsigned ElementSize = VT.getScalarStoreSize();
15871 const unsigned WiderElementSize = ElementSize * 2;
15872 if (WiderElementSize > ST.getELen()/8)
15873 return false;
15874
15875 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15876 return false;
15877
15878 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15879 // TODO: We've found an active bit of UB, and could be
15880 // more aggressive here if desired.
15881 if (Index->getOperand(i)->isUndef())
15882 return false;
15883 // TODO: This offset check is too strict if we support fully
15884 // misaligned memory operations.
15885 uint64_t C = Index->getConstantOperandVal(i);
15886 if (i % 2 == 0) {
15887 if (C % WiderElementSize != 0)
15888 return false;
15889 continue;
15890 }
15891 uint64_t Last = Index->getConstantOperandVal(i-1);
15892 if (C != Last + ElementSize)
15893 return false;
15894 }
15895 return true;
15896}
15897
15898
15900 DAGCombinerInfo &DCI) const {
15901 SelectionDAG &DAG = DCI.DAG;
15902 const MVT XLenVT = Subtarget.getXLenVT();
15903 SDLoc DL(N);
15904
15905 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15906 // bits are demanded. N will be added to the Worklist if it was not deleted.
15907 // Caller should return SDValue(N, 0) if this returns true.
15908 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15909 SDValue Op = N->getOperand(OpNo);
15910 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
15911 if (!SimplifyDemandedBits(Op, Mask, DCI))
15912 return false;
15913
15914 if (N->getOpcode() != ISD::DELETED_NODE)
15915 DCI.AddToWorklist(N);
15916 return true;
15917 };
15918
15919 switch (N->getOpcode()) {
15920 default:
15921 break;
15922 case RISCVISD::SplitF64: {
15923 SDValue Op0 = N->getOperand(0);
15924 // If the input to SplitF64 is just BuildPairF64 then the operation is
15925 // redundant. Instead, use BuildPairF64's operands directly.
15926 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15927 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
15928
15929 if (Op0->isUndef()) {
15930 SDValue Lo = DAG.getUNDEF(MVT::i32);
15931 SDValue Hi = DAG.getUNDEF(MVT::i32);
15932 return DCI.CombineTo(N, Lo, Hi);
15933 }
15934
15935 // It's cheaper to materialise two 32-bit integers than to load a double
15936 // from the constant pool and transfer it to integer registers through the
15937 // stack.
15938 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
15939 APInt V = C->getValueAPF().bitcastToAPInt();
15940 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15941 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15942 return DCI.CombineTo(N, Lo, Hi);
15943 }
15944
15945 // This is a target-specific version of a DAGCombine performed in
15946 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15947 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15948 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15949 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15950 !Op0.getNode()->hasOneUse())
15951 break;
15952 SDValue NewSplitF64 =
15953 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15954 Op0.getOperand(0));
15955 SDValue Lo = NewSplitF64.getValue(0);
15956 SDValue Hi = NewSplitF64.getValue(1);
15957 APInt SignBit = APInt::getSignMask(32);
15958 if (Op0.getOpcode() == ISD::FNEG) {
15959 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15960 DAG.getConstant(SignBit, DL, MVT::i32));
15961 return DCI.CombineTo(N, Lo, NewHi);
15962 }
15963 assert(Op0.getOpcode() == ISD::FABS);
15964 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15965 DAG.getConstant(~SignBit, DL, MVT::i32));
15966 return DCI.CombineTo(N, Lo, NewHi);
15967 }
15968 case RISCVISD::SLLW:
15969 case RISCVISD::SRAW:
15970 case RISCVISD::SRLW:
15971 case RISCVISD::RORW:
15972 case RISCVISD::ROLW: {
15973 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15974 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15975 SimplifyDemandedLowBitsHelper(1, 5))
15976 return SDValue(N, 0);
15977
15978 break;
15979 }
15980 case RISCVISD::CLZW:
15981 case RISCVISD::CTZW: {
15982 // Only the lower 32 bits of the first operand are read
15983 if (SimplifyDemandedLowBitsHelper(0, 32))
15984 return SDValue(N, 0);
15985 break;
15986 }
15988 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15989 // conversion is unnecessary and can be replaced with the
15990 // FMV_X_ANYEXTW_RV64 operand.
15991 SDValue Op0 = N->getOperand(0);
15993 return Op0.getOperand(0);
15994 break;
15995 }
15998 SDLoc DL(N);
15999 SDValue Op0 = N->getOperand(0);
16000 MVT VT = N->getSimpleValueType(0);
16001 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16002 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16003 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16004 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16005 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16006 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16007 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16008 assert(Op0.getOperand(0).getValueType() == VT &&
16009 "Unexpected value type!");
16010 return Op0.getOperand(0);
16011 }
16012
16013 // This is a target-specific version of a DAGCombine performed in
16014 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16015 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16016 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16017 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16018 !Op0.getNode()->hasOneUse())
16019 break;
16020 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16021 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16022 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16023 if (Op0.getOpcode() == ISD::FNEG)
16024 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16025 DAG.getConstant(SignBit, DL, VT));
16026
16027 assert(Op0.getOpcode() == ISD::FABS);
16028 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16029 DAG.getConstant(~SignBit, DL, VT));
16030 }
16031 case ISD::ABS: {
16032 EVT VT = N->getValueType(0);
16033 SDValue N0 = N->getOperand(0);
16034 // abs (sext) -> zext (abs)
16035 // abs (zext) -> zext (handled elsewhere)
16036 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16037 SDValue Src = N0.getOperand(0);
16038 SDLoc DL(N);
16039 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16040 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16041 }
16042 break;
16043 }
16044 case ISD::ADD: {
16045 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16046 return V;
16047 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16048 return V;
16049 return performADDCombine(N, DAG, Subtarget);
16050 }
16051 case ISD::SUB: {
16052 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16053 return V;
16054 return performSUBCombine(N, DAG, Subtarget);
16055 }
16056 case ISD::AND:
16057 return performANDCombine(N, DCI, Subtarget);
16058 case ISD::OR: {
16059 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16060 return V;
16061 return performORCombine(N, DCI, Subtarget);
16062 }
16063 case ISD::XOR:
16064 return performXORCombine(N, DAG, Subtarget);
16065 case ISD::MUL:
16066 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16067 return V;
16068 return performMULCombine(N, DAG, DCI, Subtarget);
16069 case ISD::SDIV:
16070 case ISD::UDIV:
16071 case ISD::SREM:
16072 case ISD::UREM:
16073 if (SDValue V = combineBinOpOfZExt(N, DAG))
16074 return V;
16075 break;
16076 case ISD::FADD:
16077 case ISD::UMAX:
16078 case ISD::UMIN:
16079 case ISD::SMAX:
16080 case ISD::SMIN:
16081 case ISD::FMAXNUM:
16082 case ISD::FMINNUM: {
16083 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16084 return V;
16085 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16086 return V;
16087 return SDValue();
16088 }
16089 case ISD::SETCC:
16090 return performSETCCCombine(N, DAG, Subtarget);
16092 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16093 case ISD::ZERO_EXTEND:
16094 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16095 // type legalization. This is safe because fp_to_uint produces poison if
16096 // it overflows.
16097 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16098 SDValue Src = N->getOperand(0);
16099 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16100 isTypeLegal(Src.getOperand(0).getValueType()))
16101 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16102 Src.getOperand(0));
16103 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16104 isTypeLegal(Src.getOperand(1).getValueType())) {
16105 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16106 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16107 Src.getOperand(0), Src.getOperand(1));
16108 DCI.CombineTo(N, Res);
16109 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16110 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16111 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16112 }
16113 }
16114 return SDValue();
16116 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16117 // This would be benefit for the cases where X and Y are both the same value
16118 // type of low precision vectors. Since the truncate would be lowered into
16119 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16120 // restriction, such pattern would be expanded into a series of "vsetvli"
16121 // and "vnsrl" instructions later to reach this point.
16122 auto IsTruncNode = [](SDValue V) {
16123 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16124 return false;
16125 SDValue VL = V.getOperand(2);
16126 auto *C = dyn_cast<ConstantSDNode>(VL);
16127 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16128 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16129 (isa<RegisterSDNode>(VL) &&
16130 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16131 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16132 IsVLMAXForVMSET;
16133 };
16134
16135 SDValue Op = N->getOperand(0);
16136
16137 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16138 // to distinguish such pattern.
16139 while (IsTruncNode(Op)) {
16140 if (!Op.hasOneUse())
16141 return SDValue();
16142 Op = Op.getOperand(0);
16143 }
16144
16145 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16146 SDValue N0 = Op.getOperand(0);
16147 SDValue N1 = Op.getOperand(1);
16148 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16149 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16150 SDValue N00 = N0.getOperand(0);
16151 SDValue N10 = N1.getOperand(0);
16152 if (N00.getValueType().isVector() &&
16153 N00.getValueType() == N10.getValueType() &&
16154 N->getValueType(0) == N10.getValueType()) {
16155 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16156 SDValue SMin = DAG.getNode(
16157 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16158 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16159 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16160 }
16161 }
16162 }
16163 break;
16164 }
16165 case ISD::TRUNCATE:
16166 return performTRUNCATECombine(N, DAG, Subtarget);
16167 case ISD::SELECT:
16168 return performSELECTCombine(N, DAG, Subtarget);
16170 case RISCVISD::CZERO_NEZ: {
16171 SDValue Val = N->getOperand(0);
16172 SDValue Cond = N->getOperand(1);
16173
16174 unsigned Opc = N->getOpcode();
16175
16176 // czero_eqz x, x -> x
16177 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16178 return Val;
16179
16180 unsigned InvOpc =
16182
16183 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16184 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16185 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16186 SDValue NewCond = Cond.getOperand(0);
16187 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16188 if (DAG.MaskedValueIsZero(NewCond, Mask))
16189 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16190 }
16191 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16192 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16193 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16194 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16195 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16196 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16197 if (ISD::isIntEqualitySetCC(CCVal))
16198 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16199 N->getValueType(0), Val, Cond.getOperand(0));
16200 }
16201 return SDValue();
16202 }
16203 case RISCVISD::SELECT_CC: {
16204 // Transform
16205 SDValue LHS = N->getOperand(0);
16206 SDValue RHS = N->getOperand(1);
16207 SDValue CC = N->getOperand(2);
16208 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16209 SDValue TrueV = N->getOperand(3);
16210 SDValue FalseV = N->getOperand(4);
16211 SDLoc DL(N);
16212 EVT VT = N->getValueType(0);
16213
16214 // If the True and False values are the same, we don't need a select_cc.
16215 if (TrueV == FalseV)
16216 return TrueV;
16217
16218 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16219 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16220 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16221 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16222 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16223 if (CCVal == ISD::CondCode::SETGE)
16224 std::swap(TrueV, FalseV);
16225
16226 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16227 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16228 // Only handle simm12, if it is not in this range, it can be considered as
16229 // register.
16230 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16231 isInt<12>(TrueSImm - FalseSImm)) {
16232 SDValue SRA =
16233 DAG.getNode(ISD::SRA, DL, VT, LHS,
16234 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16235 SDValue AND =
16236 DAG.getNode(ISD::AND, DL, VT, SRA,
16237 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16238 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16239 }
16240
16241 if (CCVal == ISD::CondCode::SETGE)
16242 std::swap(TrueV, FalseV);
16243 }
16244
16245 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16246 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16247 {LHS, RHS, CC, TrueV, FalseV});
16248
16249 if (!Subtarget.hasConditionalMoveFusion()) {
16250 // (select c, -1, y) -> -c | y
16251 if (isAllOnesConstant(TrueV)) {
16252 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16253 SDValue Neg = DAG.getNegative(C, DL, VT);
16254 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16255 }
16256 // (select c, y, -1) -> -!c | y
16257 if (isAllOnesConstant(FalseV)) {
16258 SDValue C =
16259 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16260 SDValue Neg = DAG.getNegative(C, DL, VT);
16261 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16262 }
16263
16264 // (select c, 0, y) -> -!c & y
16265 if (isNullConstant(TrueV)) {
16266 SDValue C =
16267 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16268 SDValue Neg = DAG.getNegative(C, DL, VT);
16269 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16270 }
16271 // (select c, y, 0) -> -c & y
16272 if (isNullConstant(FalseV)) {
16273 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16274 SDValue Neg = DAG.getNegative(C, DL, VT);
16275 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16276 }
16277 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16278 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16279 if (((isOneConstant(FalseV) && LHS == TrueV &&
16280 CCVal == ISD::CondCode::SETNE) ||
16281 (isOneConstant(TrueV) && LHS == FalseV &&
16282 CCVal == ISD::CondCode::SETEQ)) &&
16284 // freeze it to be safe.
16285 LHS = DAG.getFreeze(LHS);
16287 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16288 }
16289 }
16290
16291 // If both true/false are an xor with 1, pull through the select.
16292 // This can occur after op legalization if both operands are setccs that
16293 // require an xor to invert.
16294 // FIXME: Generalize to other binary ops with identical operand?
16295 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16296 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16297 isOneConstant(TrueV.getOperand(1)) &&
16298 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16299 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16300 TrueV.getOperand(0), FalseV.getOperand(0));
16301 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16302 }
16303
16304 return SDValue();
16305 }
16306 case RISCVISD::BR_CC: {
16307 SDValue LHS = N->getOperand(1);
16308 SDValue RHS = N->getOperand(2);
16309 SDValue CC = N->getOperand(3);
16310 SDLoc DL(N);
16311
16312 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16313 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16314 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16315
16316 return SDValue();
16317 }
16318 case ISD::BITREVERSE:
16319 return performBITREVERSECombine(N, DAG, Subtarget);
16320 case ISD::FP_TO_SINT:
16321 case ISD::FP_TO_UINT:
16322 return performFP_TO_INTCombine(N, DCI, Subtarget);
16325 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16326 case ISD::FCOPYSIGN: {
16327 EVT VT = N->getValueType(0);
16328 if (!VT.isVector())
16329 break;
16330 // There is a form of VFSGNJ which injects the negated sign of its second
16331 // operand. Try and bubble any FNEG up after the extend/round to produce
16332 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16333 // TRUNC=1.
16334 SDValue In2 = N->getOperand(1);
16335 // Avoid cases where the extend/round has multiple uses, as duplicating
16336 // those is typically more expensive than removing a fneg.
16337 if (!In2.hasOneUse())
16338 break;
16339 if (In2.getOpcode() != ISD::FP_EXTEND &&
16340 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16341 break;
16342 In2 = In2.getOperand(0);
16343 if (In2.getOpcode() != ISD::FNEG)
16344 break;
16345 SDLoc DL(N);
16346 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16347 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16348 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16349 }
16350 case ISD::MGATHER: {
16351 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16352 const EVT VT = N->getValueType(0);
16353 SDValue Index = MGN->getIndex();
16354 SDValue ScaleOp = MGN->getScale();
16355 ISD::MemIndexType IndexType = MGN->getIndexType();
16356 assert(!MGN->isIndexScaled() &&
16357 "Scaled gather/scatter should not be formed");
16358
16359 SDLoc DL(N);
16360 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16361 return DAG.getMaskedGather(
16362 N->getVTList(), MGN->getMemoryVT(), DL,
16363 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16364 MGN->getBasePtr(), Index, ScaleOp},
16365 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16366
16367 if (narrowIndex(Index, IndexType, DAG))
16368 return DAG.getMaskedGather(
16369 N->getVTList(), MGN->getMemoryVT(), DL,
16370 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16371 MGN->getBasePtr(), Index, ScaleOp},
16372 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16373
16374 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16375 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16376 // The sequence will be XLenVT, not the type of Index. Tell
16377 // isSimpleVIDSequence this so we avoid overflow.
16378 if (std::optional<VIDSequence> SimpleVID =
16379 isSimpleVIDSequence(Index, Subtarget.getXLen());
16380 SimpleVID && SimpleVID->StepDenominator == 1) {
16381 const int64_t StepNumerator = SimpleVID->StepNumerator;
16382 const int64_t Addend = SimpleVID->Addend;
16383
16384 // Note: We don't need to check alignment here since (by assumption
16385 // from the existance of the gather), our offsets must be sufficiently
16386 // aligned.
16387
16388 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16389 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16390 assert(IndexType == ISD::UNSIGNED_SCALED);
16391 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16392 DAG.getConstant(Addend, DL, PtrVT));
16393
16394 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16395 SDValue IntID =
16396 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16397 XLenVT);
16398 SDValue Ops[] =
16399 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16400 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16402 Ops, VT, MGN->getMemOperand());
16403 }
16404 }
16405
16406 SmallVector<int> ShuffleMask;
16407 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16408 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16409 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16410 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16411 MGN->getMask(), DAG.getUNDEF(VT),
16412 MGN->getMemoryVT(), MGN->getMemOperand(),
16414 SDValue Shuffle =
16415 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16416 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16417 }
16418
16419 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16420 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16421 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16422 SmallVector<SDValue> NewIndices;
16423 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16424 NewIndices.push_back(Index.getOperand(i));
16425 EVT IndexVT = Index.getValueType()
16427 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16428
16429 unsigned ElementSize = VT.getScalarStoreSize();
16430 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16431 auto EltCnt = VT.getVectorElementCount();
16432 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16433 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16434 EltCnt.divideCoefficientBy(2));
16435 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16436 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16437 EltCnt.divideCoefficientBy(2));
16438 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16439
16440 SDValue Gather =
16441 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16442 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16443 Index, ScaleOp},
16444 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16445 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16446 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16447 }
16448 break;
16449 }
16450 case ISD::MSCATTER:{
16451 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16452 SDValue Index = MSN->getIndex();
16453 SDValue ScaleOp = MSN->getScale();
16454 ISD::MemIndexType IndexType = MSN->getIndexType();
16455 assert(!MSN->isIndexScaled() &&
16456 "Scaled gather/scatter should not be formed");
16457
16458 SDLoc DL(N);
16459 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16460 return DAG.getMaskedScatter(
16461 N->getVTList(), MSN->getMemoryVT(), DL,
16462 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16463 Index, ScaleOp},
16464 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16465
16466 if (narrowIndex(Index, IndexType, DAG))
16467 return DAG.getMaskedScatter(
16468 N->getVTList(), MSN->getMemoryVT(), DL,
16469 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16470 Index, ScaleOp},
16471 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16472
16473 EVT VT = MSN->getValue()->getValueType(0);
16474 SmallVector<int> ShuffleMask;
16475 if (!MSN->isTruncatingStore() &&
16476 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16477 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16478 DAG.getUNDEF(VT), ShuffleMask);
16479 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16480 DAG.getUNDEF(XLenVT), MSN->getMask(),
16481 MSN->getMemoryVT(), MSN->getMemOperand(),
16482 ISD::UNINDEXED, false);
16483 }
16484 break;
16485 }
16486 case ISD::VP_GATHER: {
16487 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16488 SDValue Index = VPGN->getIndex();
16489 SDValue ScaleOp = VPGN->getScale();
16490 ISD::MemIndexType IndexType = VPGN->getIndexType();
16491 assert(!VPGN->isIndexScaled() &&
16492 "Scaled gather/scatter should not be formed");
16493
16494 SDLoc DL(N);
16495 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16496 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16497 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16498 ScaleOp, VPGN->getMask(),
16499 VPGN->getVectorLength()},
16500 VPGN->getMemOperand(), IndexType);
16501
16502 if (narrowIndex(Index, IndexType, DAG))
16503 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16504 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16505 ScaleOp, VPGN->getMask(),
16506 VPGN->getVectorLength()},
16507 VPGN->getMemOperand(), IndexType);
16508
16509 break;
16510 }
16511 case ISD::VP_SCATTER: {
16512 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16513 SDValue Index = VPSN->getIndex();
16514 SDValue ScaleOp = VPSN->getScale();
16515 ISD::MemIndexType IndexType = VPSN->getIndexType();
16516 assert(!VPSN->isIndexScaled() &&
16517 "Scaled gather/scatter should not be formed");
16518
16519 SDLoc DL(N);
16520 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16521 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16522 {VPSN->getChain(), VPSN->getValue(),
16523 VPSN->getBasePtr(), Index, ScaleOp,
16524 VPSN->getMask(), VPSN->getVectorLength()},
16525 VPSN->getMemOperand(), IndexType);
16526
16527 if (narrowIndex(Index, IndexType, DAG))
16528 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16529 {VPSN->getChain(), VPSN->getValue(),
16530 VPSN->getBasePtr(), Index, ScaleOp,
16531 VPSN->getMask(), VPSN->getVectorLength()},
16532 VPSN->getMemOperand(), IndexType);
16533 break;
16534 }
16535 case RISCVISD::SHL_VL:
16536 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16537 return V;
16538 [[fallthrough]];
16539 case RISCVISD::SRA_VL:
16540 case RISCVISD::SRL_VL: {
16541 SDValue ShAmt = N->getOperand(1);
16543 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16544 SDLoc DL(N);
16545 SDValue VL = N->getOperand(4);
16546 EVT VT = N->getValueType(0);
16547 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16548 ShAmt.getOperand(1), VL);
16549 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16550 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16551 }
16552 break;
16553 }
16554 case ISD::SRA:
16555 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16556 return V;
16557 [[fallthrough]];
16558 case ISD::SRL:
16559 case ISD::SHL: {
16560 if (N->getOpcode() == ISD::SHL) {
16561 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16562 return V;
16563 }
16564 SDValue ShAmt = N->getOperand(1);
16566 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16567 SDLoc DL(N);
16568 EVT VT = N->getValueType(0);
16569 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16570 ShAmt.getOperand(1),
16571 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16572 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16573 }
16574 break;
16575 }
16576 case RISCVISD::ADD_VL:
16577 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16578 return V;
16579 return combineToVWMACC(N, DAG, Subtarget);
16584 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16585 case RISCVISD::SUB_VL:
16586 case RISCVISD::MUL_VL:
16587 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16596 return performVFMADD_VLCombine(N, DAG, Subtarget);
16597 case RISCVISD::FADD_VL:
16598 case RISCVISD::FSUB_VL:
16599 case RISCVISD::FMUL_VL:
16601 case RISCVISD::VFWSUB_W_VL: {
16602 if (N->getValueType(0).isScalableVector() &&
16603 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16604 (Subtarget.hasVInstructionsF16Minimal() &&
16605 !Subtarget.hasVInstructionsF16()))
16606 return SDValue();
16607 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16608 }
16609 case ISD::LOAD:
16610 case ISD::STORE: {
16611 if (DCI.isAfterLegalizeDAG())
16612 if (SDValue V = performMemPairCombine(N, DCI))
16613 return V;
16614
16615 if (N->getOpcode() != ISD::STORE)
16616 break;
16617
16618 auto *Store = cast<StoreSDNode>(N);
16619 SDValue Chain = Store->getChain();
16620 EVT MemVT = Store->getMemoryVT();
16621 SDValue Val = Store->getValue();
16622 SDLoc DL(N);
16623
16624 bool IsScalarizable =
16625 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16626 Store->isSimple() &&
16627 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16628 isPowerOf2_64(MemVT.getSizeInBits()) &&
16629 MemVT.getSizeInBits() <= Subtarget.getXLen();
16630
16631 // If sufficiently aligned we can scalarize stores of constant vectors of
16632 // any power-of-two size up to XLen bits, provided that they aren't too
16633 // expensive to materialize.
16634 // vsetivli zero, 2, e8, m1, ta, ma
16635 // vmv.v.i v8, 4
16636 // vse64.v v8, (a0)
16637 // ->
16638 // li a1, 1028
16639 // sh a1, 0(a0)
16640 if (DCI.isBeforeLegalize() && IsScalarizable &&
16642 // Get the constant vector bits
16643 APInt NewC(Val.getValueSizeInBits(), 0);
16644 uint64_t EltSize = Val.getScalarValueSizeInBits();
16645 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16646 if (Val.getOperand(i).isUndef())
16647 continue;
16648 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16649 i * EltSize);
16650 }
16651 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16652
16653 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16654 true) <= 2 &&
16656 NewVT, *Store->getMemOperand())) {
16657 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16658 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16659 Store->getPointerInfo(), Store->getOriginalAlign(),
16660 Store->getMemOperand()->getFlags());
16661 }
16662 }
16663
16664 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16665 // vsetivli zero, 2, e16, m1, ta, ma
16666 // vle16.v v8, (a0)
16667 // vse16.v v8, (a1)
16668 if (auto *L = dyn_cast<LoadSDNode>(Val);
16669 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16670 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16671 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16672 L->getMemoryVT() == MemVT) {
16673 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16675 NewVT, *Store->getMemOperand()) &&
16677 NewVT, *L->getMemOperand())) {
16678 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16679 L->getPointerInfo(), L->getOriginalAlign(),
16680 L->getMemOperand()->getFlags());
16681 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16682 Store->getPointerInfo(), Store->getOriginalAlign(),
16683 Store->getMemOperand()->getFlags());
16684 }
16685 }
16686
16687 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16688 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16689 // any illegal types.
16690 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16691 (DCI.isAfterLegalizeDAG() &&
16693 isNullConstant(Val.getOperand(1)))) {
16694 SDValue Src = Val.getOperand(0);
16695 MVT VecVT = Src.getSimpleValueType();
16696 // VecVT should be scalable and memory VT should match the element type.
16697 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16698 MemVT == VecVT.getVectorElementType()) {
16699 SDLoc DL(N);
16700 MVT MaskVT = getMaskTypeFor(VecVT);
16701 return DAG.getStoreVP(
16702 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16703 DAG.getConstant(1, DL, MaskVT),
16704 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16705 Store->getMemOperand(), Store->getAddressingMode(),
16706 Store->isTruncatingStore(), /*IsCompress*/ false);
16707 }
16708 }
16709
16710 break;
16711 }
16712 case ISD::SPLAT_VECTOR: {
16713 EVT VT = N->getValueType(0);
16714 // Only perform this combine on legal MVT types.
16715 if (!isTypeLegal(VT))
16716 break;
16717 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16718 DAG, Subtarget))
16719 return Gather;
16720 break;
16721 }
16722 case ISD::BUILD_VECTOR:
16723 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16724 return V;
16725 break;
16727 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16728 return V;
16729 break;
16731 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16732 return V;
16733 break;
16734 case RISCVISD::VFMV_V_F_VL: {
16735 const MVT VT = N->getSimpleValueType(0);
16736 SDValue Passthru = N->getOperand(0);
16737 SDValue Scalar = N->getOperand(1);
16738 SDValue VL = N->getOperand(2);
16739
16740 // If VL is 1, we can use vfmv.s.f.
16741 if (isOneConstant(VL))
16742 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16743 break;
16744 }
16745 case RISCVISD::VMV_V_X_VL: {
16746 const MVT VT = N->getSimpleValueType(0);
16747 SDValue Passthru = N->getOperand(0);
16748 SDValue Scalar = N->getOperand(1);
16749 SDValue VL = N->getOperand(2);
16750
16751 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16752 // scalar input.
16753 unsigned ScalarSize = Scalar.getValueSizeInBits();
16754 unsigned EltWidth = VT.getScalarSizeInBits();
16755 if (ScalarSize > EltWidth && Passthru.isUndef())
16756 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16757 return SDValue(N, 0);
16758
16759 // If VL is 1 and the scalar value won't benefit from immediate, we can
16760 // use vmv.s.x.
16761 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16762 if (isOneConstant(VL) &&
16763 (!Const || Const->isZero() ||
16764 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16765 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16766
16767 break;
16768 }
16769 case RISCVISD::VFMV_S_F_VL: {
16770 SDValue Src = N->getOperand(1);
16771 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16772 // into an undef vector.
16773 // TODO: Could use a vslide or vmv.v.v for non-undef.
16774 if (N->getOperand(0).isUndef() &&
16775 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16776 isNullConstant(Src.getOperand(1)) &&
16777 Src.getOperand(0).getValueType().isScalableVector()) {
16778 EVT VT = N->getValueType(0);
16779 EVT SrcVT = Src.getOperand(0).getValueType();
16781 // Widths match, just return the original vector.
16782 if (SrcVT == VT)
16783 return Src.getOperand(0);
16784 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16785 }
16786 [[fallthrough]];
16787 }
16788 case RISCVISD::VMV_S_X_VL: {
16789 const MVT VT = N->getSimpleValueType(0);
16790 SDValue Passthru = N->getOperand(0);
16791 SDValue Scalar = N->getOperand(1);
16792 SDValue VL = N->getOperand(2);
16793
16794 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
16795 Scalar.getOperand(0).getValueType() == N->getValueType(0))
16796 return Scalar.getOperand(0);
16797
16798 // Use M1 or smaller to avoid over constraining register allocation
16799 const MVT M1VT = getLMUL1VT(VT);
16800 if (M1VT.bitsLT(VT)) {
16801 SDValue M1Passthru =
16802 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16803 DAG.getVectorIdxConstant(0, DL));
16804 SDValue Result =
16805 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16806 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16807 DAG.getVectorIdxConstant(0, DL));
16808 return Result;
16809 }
16810
16811 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16812 // higher would involve overly constraining the register allocator for
16813 // no purpose.
16814 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16815 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
16816 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
16817 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
16818
16819 break;
16820 }
16821 case RISCVISD::VMV_X_S: {
16822 SDValue Vec = N->getOperand(0);
16823 MVT VecVT = N->getOperand(0).getSimpleValueType();
16824 const MVT M1VT = getLMUL1VT(VecVT);
16825 if (M1VT.bitsLT(VecVT)) {
16826 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
16827 DAG.getVectorIdxConstant(0, DL));
16828 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16829 }
16830 break;
16831 }
16835 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16836 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16837 switch (IntNo) {
16838 // By default we do not combine any intrinsic.
16839 default:
16840 return SDValue();
16841 case Intrinsic::riscv_masked_strided_load: {
16842 MVT VT = N->getSimpleValueType(0);
16843 auto *Load = cast<MemIntrinsicSDNode>(N);
16844 SDValue PassThru = N->getOperand(2);
16845 SDValue Base = N->getOperand(3);
16846 SDValue Stride = N->getOperand(4);
16847 SDValue Mask = N->getOperand(5);
16848
16849 // If the stride is equal to the element size in bytes, we can use
16850 // a masked.load.
16851 const unsigned ElementSize = VT.getScalarStoreSize();
16852 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16853 StrideC && StrideC->getZExtValue() == ElementSize)
16854 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16855 DAG.getUNDEF(XLenVT), Mask, PassThru,
16856 Load->getMemoryVT(), Load->getMemOperand(),
16858 return SDValue();
16859 }
16860 case Intrinsic::riscv_masked_strided_store: {
16861 auto *Store = cast<MemIntrinsicSDNode>(N);
16862 SDValue Value = N->getOperand(2);
16863 SDValue Base = N->getOperand(3);
16864 SDValue Stride = N->getOperand(4);
16865 SDValue Mask = N->getOperand(5);
16866
16867 // If the stride is equal to the element size in bytes, we can use
16868 // a masked.store.
16869 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16870 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16871 StrideC && StrideC->getZExtValue() == ElementSize)
16872 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16873 DAG.getUNDEF(XLenVT), Mask,
16874 Value.getValueType(), Store->getMemOperand(),
16875 ISD::UNINDEXED, false);
16876 return SDValue();
16877 }
16878 case Intrinsic::riscv_vcpop:
16879 case Intrinsic::riscv_vcpop_mask:
16880 case Intrinsic::riscv_vfirst:
16881 case Intrinsic::riscv_vfirst_mask: {
16882 SDValue VL = N->getOperand(2);
16883 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16884 IntNo == Intrinsic::riscv_vfirst_mask)
16885 VL = N->getOperand(3);
16886 if (!isNullConstant(VL))
16887 return SDValue();
16888 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16889 SDLoc DL(N);
16890 EVT VT = N->getValueType(0);
16891 if (IntNo == Intrinsic::riscv_vfirst ||
16892 IntNo == Intrinsic::riscv_vfirst_mask)
16893 return DAG.getConstant(-1, DL, VT);
16894 return DAG.getConstant(0, DL, VT);
16895 }
16896 }
16897 }
16898 case ISD::BITCAST: {
16900 SDValue N0 = N->getOperand(0);
16901 EVT VT = N->getValueType(0);
16902 EVT SrcVT = N0.getValueType();
16903 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16904 // type, widen both sides to avoid a trip through memory.
16905 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16906 VT.isScalarInteger()) {
16907 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16908 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
16909 Ops[0] = N0;
16910 SDLoc DL(N);
16911 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16912 N0 = DAG.getBitcast(MVT::i8, N0);
16913 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
16914 }
16915
16916 return SDValue();
16917 }
16918 }
16919
16920 return SDValue();
16921}
16922
16924 EVT XVT, unsigned KeptBits) const {
16925 // For vectors, we don't have a preference..
16926 if (XVT.isVector())
16927 return false;
16928
16929 if (XVT != MVT::i32 && XVT != MVT::i64)
16930 return false;
16931
16932 // We can use sext.w for RV64 or an srai 31 on RV32.
16933 if (KeptBits == 32 || KeptBits == 64)
16934 return true;
16935
16936 // With Zbb we can use sext.h/sext.b.
16937 return Subtarget.hasStdExtZbb() &&
16938 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16939 KeptBits == 16);
16940}
16941
16943 const SDNode *N, CombineLevel Level) const {
16944 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16945 N->getOpcode() == ISD::SRL) &&
16946 "Expected shift op");
16947
16948 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16949 // materialised in fewer instructions than `(OP _, c1)`:
16950 //
16951 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16952 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16953 SDValue N0 = N->getOperand(0);
16954 EVT Ty = N0.getValueType();
16955 if (Ty.isScalarInteger() &&
16956 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16957 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16958 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16959 if (C1 && C2) {
16960 const APInt &C1Int = C1->getAPIntValue();
16961 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16962
16963 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16964 // and the combine should happen, to potentially allow further combines
16965 // later.
16966 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16967 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
16968 return true;
16969
16970 // We can materialise `c1` in an add immediate, so it's "free", and the
16971 // combine should be prevented.
16972 if (C1Int.getSignificantBits() <= 64 &&
16974 return false;
16975
16976 // Neither constant will fit into an immediate, so find materialisation
16977 // costs.
16978 int C1Cost =
16979 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16980 /*CompressionCost*/ true);
16981 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16982 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16983 /*CompressionCost*/ true);
16984
16985 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16986 // combine should be prevented.
16987 if (C1Cost < ShiftedC1Cost)
16988 return false;
16989 }
16990 }
16991 return true;
16992}
16993
16995 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16996 TargetLoweringOpt &TLO) const {
16997 // Delay this optimization as late as possible.
16998 if (!TLO.LegalOps)
16999 return false;
17000
17001 EVT VT = Op.getValueType();
17002 if (VT.isVector())
17003 return false;
17004
17005 unsigned Opcode = Op.getOpcode();
17006 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17007 return false;
17008
17009 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17010 if (!C)
17011 return false;
17012
17013 const APInt &Mask = C->getAPIntValue();
17014
17015 // Clear all non-demanded bits initially.
17016 APInt ShrunkMask = Mask & DemandedBits;
17017
17018 // Try to make a smaller immediate by setting undemanded bits.
17019
17020 APInt ExpandedMask = Mask | ~DemandedBits;
17021
17022 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17023 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17024 };
17025 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17026 if (NewMask == Mask)
17027 return true;
17028 SDLoc DL(Op);
17029 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17030 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17031 Op.getOperand(0), NewC);
17032 return TLO.CombineTo(Op, NewOp);
17033 };
17034
17035 // If the shrunk mask fits in sign extended 12 bits, let the target
17036 // independent code apply it.
17037 if (ShrunkMask.isSignedIntN(12))
17038 return false;
17039
17040 // And has a few special cases for zext.
17041 if (Opcode == ISD::AND) {
17042 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17043 // otherwise use SLLI + SRLI.
17044 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17045 if (IsLegalMask(NewMask))
17046 return UseMask(NewMask);
17047
17048 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17049 if (VT == MVT::i64) {
17050 APInt NewMask = APInt(64, 0xffffffff);
17051 if (IsLegalMask(NewMask))
17052 return UseMask(NewMask);
17053 }
17054 }
17055
17056 // For the remaining optimizations, we need to be able to make a negative
17057 // number through a combination of mask and undemanded bits.
17058 if (!ExpandedMask.isNegative())
17059 return false;
17060
17061 // What is the fewest number of bits we need to represent the negative number.
17062 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17063
17064 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17065 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17066 // If we can't create a simm12, we shouldn't change opaque constants.
17067 APInt NewMask = ShrunkMask;
17068 if (MinSignedBits <= 12)
17069 NewMask.setBitsFrom(11);
17070 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17071 NewMask.setBitsFrom(31);
17072 else
17073 return false;
17074
17075 // Check that our new mask is a subset of the demanded mask.
17076 assert(IsLegalMask(NewMask));
17077 return UseMask(NewMask);
17078}
17079
17080static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17081 static const uint64_t GREVMasks[] = {
17082 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17083 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17084
17085 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17086 unsigned Shift = 1 << Stage;
17087 if (ShAmt & Shift) {
17088 uint64_t Mask = GREVMasks[Stage];
17089 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17090 if (IsGORC)
17091 Res |= x;
17092 x = Res;
17093 }
17094 }
17095
17096 return x;
17097}
17098
17100 KnownBits &Known,
17101 const APInt &DemandedElts,
17102 const SelectionDAG &DAG,
17103 unsigned Depth) const {
17104 unsigned BitWidth = Known.getBitWidth();
17105 unsigned Opc = Op.getOpcode();
17106 assert((Opc >= ISD::BUILTIN_OP_END ||
17107 Opc == ISD::INTRINSIC_WO_CHAIN ||
17108 Opc == ISD::INTRINSIC_W_CHAIN ||
17109 Opc == ISD::INTRINSIC_VOID) &&
17110 "Should use MaskedValueIsZero if you don't know whether Op"
17111 " is a target node!");
17112
17113 Known.resetAll();
17114 switch (Opc) {
17115 default: break;
17116 case RISCVISD::SELECT_CC: {
17117 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17118 // If we don't know any bits, early out.
17119 if (Known.isUnknown())
17120 break;
17121 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17122
17123 // Only known if known in both the LHS and RHS.
17124 Known = Known.intersectWith(Known2);
17125 break;
17126 }
17129 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17130 // Result is either all zero or operand 0. We can propagate zeros, but not
17131 // ones.
17132 Known.One.clearAllBits();
17133 break;
17134 case RISCVISD::REMUW: {
17135 KnownBits Known2;
17136 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17137 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17138 // We only care about the lower 32 bits.
17139 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17140 // Restore the original width by sign extending.
17141 Known = Known.sext(BitWidth);
17142 break;
17143 }
17144 case RISCVISD::DIVUW: {
17145 KnownBits Known2;
17146 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17147 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17148 // We only care about the lower 32 bits.
17149 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17150 // Restore the original width by sign extending.
17151 Known = Known.sext(BitWidth);
17152 break;
17153 }
17154 case RISCVISD::SLLW: {
17155 KnownBits Known2;
17156 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17157 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17158 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17159 // Restore the original width by sign extending.
17160 Known = Known.sext(BitWidth);
17161 break;
17162 }
17163 case RISCVISD::CTZW: {
17164 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17165 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17166 unsigned LowBits = llvm::bit_width(PossibleTZ);
17167 Known.Zero.setBitsFrom(LowBits);
17168 break;
17169 }
17170 case RISCVISD::CLZW: {
17171 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17172 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17173 unsigned LowBits = llvm::bit_width(PossibleLZ);
17174 Known.Zero.setBitsFrom(LowBits);
17175 break;
17176 }
17177 case RISCVISD::BREV8:
17178 case RISCVISD::ORC_B: {
17179 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17180 // control value of 7 is equivalent to brev8 and orc.b.
17181 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17182 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17183 // To compute zeros, we need to invert the value and invert it back after.
17184 Known.Zero =
17185 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17186 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17187 break;
17188 }
17189 case RISCVISD::READ_VLENB: {
17190 // We can use the minimum and maximum VLEN values to bound VLENB. We
17191 // know VLEN must be a power of two.
17192 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17193 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17194 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17195 Known.Zero.setLowBits(Log2_32(MinVLenB));
17196 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17197 if (MaxVLenB == MinVLenB)
17198 Known.One.setBit(Log2_32(MinVLenB));
17199 break;
17200 }
17201 case RISCVISD::FCLASS: {
17202 // fclass will only set one of the low 10 bits.
17203 Known.Zero.setBitsFrom(10);
17204 break;
17205 }
17208 unsigned IntNo =
17209 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17210 switch (IntNo) {
17211 default:
17212 // We can't do anything for most intrinsics.
17213 break;
17214 case Intrinsic::riscv_vsetvli:
17215 case Intrinsic::riscv_vsetvlimax: {
17216 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17217 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17218 RISCVII::VLMUL VLMUL =
17219 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17220 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17221 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17222 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17223 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17224
17225 // Result of vsetvli must be not larger than AVL.
17226 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17227 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17228
17229 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17230 if (BitWidth > KnownZeroFirstBit)
17231 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17232 break;
17233 }
17234 }
17235 break;
17236 }
17237 }
17238}
17239
17241 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17242 unsigned Depth) const {
17243 switch (Op.getOpcode()) {
17244 default:
17245 break;
17246 case RISCVISD::SELECT_CC: {
17247 unsigned Tmp =
17248 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17249 if (Tmp == 1) return 1; // Early out.
17250 unsigned Tmp2 =
17251 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17252 return std::min(Tmp, Tmp2);
17253 }
17256 // Output is either all zero or operand 0. We can propagate sign bit count
17257 // from operand 0.
17258 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17259 case RISCVISD::ABSW: {
17260 // We expand this at isel to negw+max. The result will have 33 sign bits
17261 // if the input has at least 33 sign bits.
17262 unsigned Tmp =
17263 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17264 if (Tmp < 33) return 1;
17265 return 33;
17266 }
17267 case RISCVISD::SLLW:
17268 case RISCVISD::SRAW:
17269 case RISCVISD::SRLW:
17270 case RISCVISD::DIVW:
17271 case RISCVISD::DIVUW:
17272 case RISCVISD::REMUW:
17273 case RISCVISD::ROLW:
17274 case RISCVISD::RORW:
17279 // TODO: As the result is sign-extended, this is conservatively correct. A
17280 // more precise answer could be calculated for SRAW depending on known
17281 // bits in the shift amount.
17282 return 33;
17283 case RISCVISD::VMV_X_S: {
17284 // The number of sign bits of the scalar result is computed by obtaining the
17285 // element type of the input vector operand, subtracting its width from the
17286 // XLEN, and then adding one (sign bit within the element type). If the
17287 // element type is wider than XLen, the least-significant XLEN bits are
17288 // taken.
17289 unsigned XLen = Subtarget.getXLen();
17290 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17291 if (EltBits <= XLen)
17292 return XLen - EltBits + 1;
17293 break;
17294 }
17296 unsigned IntNo = Op.getConstantOperandVal(1);
17297 switch (IntNo) {
17298 default:
17299 break;
17300 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17301 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17302 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17303 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17304 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17305 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17306 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17307 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17308 case Intrinsic::riscv_masked_cmpxchg_i64:
17309 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17310 // narrow atomic operation. These are implemented using atomic
17311 // operations at the minimum supported atomicrmw/cmpxchg width whose
17312 // result is then sign extended to XLEN. With +A, the minimum width is
17313 // 32 for both 64 and 32.
17314 assert(Subtarget.getXLen() == 64);
17316 assert(Subtarget.hasStdExtA());
17317 return 33;
17318 }
17319 break;
17320 }
17321 }
17322
17323 return 1;
17324}
17325
17327 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17328 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17329
17330 // TODO: Add more target nodes.
17331 switch (Op.getOpcode()) {
17333 // Integer select_cc cannot create poison.
17334 // TODO: What are the FP poison semantics?
17335 // TODO: This instruction blocks poison from the unselected operand, can
17336 // we do anything with that?
17337 return !Op.getValueType().isInteger();
17338 }
17340 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17341}
17342
17343const Constant *
17345 assert(Ld && "Unexpected null LoadSDNode");
17346 if (!ISD::isNormalLoad(Ld))
17347 return nullptr;
17348
17349 SDValue Ptr = Ld->getBasePtr();
17350
17351 // Only constant pools with no offset are supported.
17352 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17353 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17354 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17355 CNode->getOffset() != 0)
17356 return nullptr;
17357
17358 return CNode;
17359 };
17360
17361 // Simple case, LLA.
17362 if (Ptr.getOpcode() == RISCVISD::LLA) {
17363 auto *CNode = GetSupportedConstantPool(Ptr);
17364 if (!CNode || CNode->getTargetFlags() != 0)
17365 return nullptr;
17366
17367 return CNode->getConstVal();
17368 }
17369
17370 // Look for a HI and ADD_LO pair.
17371 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17372 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17373 return nullptr;
17374
17375 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17376 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17377
17378 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17379 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17380 return nullptr;
17381
17382 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17383 return nullptr;
17384
17385 return CNodeLo->getConstVal();
17386}
17387
17389 MachineBasicBlock *BB) {
17390 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17391
17392 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17393 // Should the count have wrapped while it was being read, we need to try
17394 // again.
17395 // For example:
17396 // ```
17397 // read:
17398 // csrrs x3, counterh # load high word of counter
17399 // csrrs x2, counter # load low word of counter
17400 // csrrs x4, counterh # load high word of counter
17401 // bne x3, x4, read # check if high word reads match, otherwise try again
17402 // ```
17403
17404 MachineFunction &MF = *BB->getParent();
17405 const BasicBlock *LLVMBB = BB->getBasicBlock();
17407
17408 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17409 MF.insert(It, LoopMBB);
17410
17411 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17412 MF.insert(It, DoneMBB);
17413
17414 // Transfer the remainder of BB and its successor edges to DoneMBB.
17415 DoneMBB->splice(DoneMBB->begin(), BB,
17416 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17418
17419 BB->addSuccessor(LoopMBB);
17420
17422 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17423 Register LoReg = MI.getOperand(0).getReg();
17424 Register HiReg = MI.getOperand(1).getReg();
17425 int64_t LoCounter = MI.getOperand(2).getImm();
17426 int64_t HiCounter = MI.getOperand(3).getImm();
17427 DebugLoc DL = MI.getDebugLoc();
17428
17430 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17431 .addImm(HiCounter)
17432 .addReg(RISCV::X0);
17433 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17434 .addImm(LoCounter)
17435 .addReg(RISCV::X0);
17436 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17437 .addImm(HiCounter)
17438 .addReg(RISCV::X0);
17439
17440 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17441 .addReg(HiReg)
17442 .addReg(ReadAgainReg)
17443 .addMBB(LoopMBB);
17444
17445 LoopMBB->addSuccessor(LoopMBB);
17446 LoopMBB->addSuccessor(DoneMBB);
17447
17448 MI.eraseFromParent();
17449
17450 return DoneMBB;
17451}
17452
17455 const RISCVSubtarget &Subtarget) {
17456 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17457
17458 MachineFunction &MF = *BB->getParent();
17459 DebugLoc DL = MI.getDebugLoc();
17462 Register LoReg = MI.getOperand(0).getReg();
17463 Register HiReg = MI.getOperand(1).getReg();
17464 Register SrcReg = MI.getOperand(2).getReg();
17465
17466 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17467 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17468
17469 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17470 RI, Register());
17472 MachineMemOperand *MMOLo =
17476 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17477 .addFrameIndex(FI)
17478 .addImm(0)
17479 .addMemOperand(MMOLo);
17480 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17481 .addFrameIndex(FI)
17482 .addImm(4)
17483 .addMemOperand(MMOHi);
17484 MI.eraseFromParent(); // The pseudo instruction is gone now.
17485 return BB;
17486}
17487
17490 const RISCVSubtarget &Subtarget) {
17491 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17492 "Unexpected instruction");
17493
17494 MachineFunction &MF = *BB->getParent();
17495 DebugLoc DL = MI.getDebugLoc();
17498 Register DstReg = MI.getOperand(0).getReg();
17499 Register LoReg = MI.getOperand(1).getReg();
17500 Register HiReg = MI.getOperand(2).getReg();
17501
17502 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17503 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17504
17506 MachineMemOperand *MMOLo =
17510 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17511 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17512 .addFrameIndex(FI)
17513 .addImm(0)
17514 .addMemOperand(MMOLo);
17515 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17516 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17517 .addFrameIndex(FI)
17518 .addImm(4)
17519 .addMemOperand(MMOHi);
17520 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17521 MI.eraseFromParent(); // The pseudo instruction is gone now.
17522 return BB;
17523}
17524
17526 switch (MI.getOpcode()) {
17527 default:
17528 return false;
17529 case RISCV::Select_GPR_Using_CC_GPR:
17530 case RISCV::Select_FPR16_Using_CC_GPR:
17531 case RISCV::Select_FPR16INX_Using_CC_GPR:
17532 case RISCV::Select_FPR32_Using_CC_GPR:
17533 case RISCV::Select_FPR32INX_Using_CC_GPR:
17534 case RISCV::Select_FPR64_Using_CC_GPR:
17535 case RISCV::Select_FPR64INX_Using_CC_GPR:
17536 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17537 return true;
17538 }
17539}
17540
17542 unsigned RelOpcode, unsigned EqOpcode,
17543 const RISCVSubtarget &Subtarget) {
17544 DebugLoc DL = MI.getDebugLoc();
17545 Register DstReg = MI.getOperand(0).getReg();
17546 Register Src1Reg = MI.getOperand(1).getReg();
17547 Register Src2Reg = MI.getOperand(2).getReg();
17549 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17551
17552 // Save the current FFLAGS.
17553 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17554
17555 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17556 .addReg(Src1Reg)
17557 .addReg(Src2Reg);
17560
17561 // Restore the FFLAGS.
17562 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17563 .addReg(SavedFFlags, RegState::Kill);
17564
17565 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17566 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17567 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17568 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17571
17572 // Erase the pseudoinstruction.
17573 MI.eraseFromParent();
17574 return BB;
17575}
17576
17577static MachineBasicBlock *
17579 MachineBasicBlock *ThisMBB,
17580 const RISCVSubtarget &Subtarget) {
17581 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17582 // Without this, custom-inserter would have generated:
17583 //
17584 // A
17585 // | \
17586 // | B
17587 // | /
17588 // C
17589 // | \
17590 // | D
17591 // | /
17592 // E
17593 //
17594 // A: X = ...; Y = ...
17595 // B: empty
17596 // C: Z = PHI [X, A], [Y, B]
17597 // D: empty
17598 // E: PHI [X, C], [Z, D]
17599 //
17600 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17601 //
17602 // A
17603 // | \
17604 // | C
17605 // | /|
17606 // |/ |
17607 // | |
17608 // | D
17609 // | /
17610 // E
17611 //
17612 // A: X = ...; Y = ...
17613 // D: empty
17614 // E: PHI [X, A], [X, C], [Y, D]
17615
17616 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17617 const DebugLoc &DL = First.getDebugLoc();
17618 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17619 MachineFunction *F = ThisMBB->getParent();
17620 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17621 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17622 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17623 MachineFunction::iterator It = ++ThisMBB->getIterator();
17624 F->insert(It, FirstMBB);
17625 F->insert(It, SecondMBB);
17626 F->insert(It, SinkMBB);
17627
17628 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17629 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17631 ThisMBB->end());
17632 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17633
17634 // Fallthrough block for ThisMBB.
17635 ThisMBB->addSuccessor(FirstMBB);
17636 // Fallthrough block for FirstMBB.
17637 FirstMBB->addSuccessor(SecondMBB);
17638 ThisMBB->addSuccessor(SinkMBB);
17639 FirstMBB->addSuccessor(SinkMBB);
17640 // This is fallthrough.
17641 SecondMBB->addSuccessor(SinkMBB);
17642
17643 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17644 Register FLHS = First.getOperand(1).getReg();
17645 Register FRHS = First.getOperand(2).getReg();
17646 // Insert appropriate branch.
17647 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17648 .addReg(FLHS)
17649 .addReg(FRHS)
17650 .addMBB(SinkMBB);
17651
17652 Register SLHS = Second.getOperand(1).getReg();
17653 Register SRHS = Second.getOperand(2).getReg();
17654 Register Op1Reg4 = First.getOperand(4).getReg();
17655 Register Op1Reg5 = First.getOperand(5).getReg();
17656
17657 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17658 // Insert appropriate branch.
17659 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17660 .addReg(SLHS)
17661 .addReg(SRHS)
17662 .addMBB(SinkMBB);
17663
17664 Register DestReg = Second.getOperand(0).getReg();
17665 Register Op2Reg4 = Second.getOperand(4).getReg();
17666 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17667 .addReg(Op2Reg4)
17668 .addMBB(ThisMBB)
17669 .addReg(Op1Reg4)
17670 .addMBB(FirstMBB)
17671 .addReg(Op1Reg5)
17672 .addMBB(SecondMBB);
17673
17674 // Now remove the Select_FPRX_s.
17675 First.eraseFromParent();
17676 Second.eraseFromParent();
17677 return SinkMBB;
17678}
17679
17682 const RISCVSubtarget &Subtarget) {
17683 // To "insert" Select_* instructions, we actually have to insert the triangle
17684 // control-flow pattern. The incoming instructions know the destination vreg
17685 // to set, the condition code register to branch on, the true/false values to
17686 // select between, and the condcode to use to select the appropriate branch.
17687 //
17688 // We produce the following control flow:
17689 // HeadMBB
17690 // | \
17691 // | IfFalseMBB
17692 // | /
17693 // TailMBB
17694 //
17695 // When we find a sequence of selects we attempt to optimize their emission
17696 // by sharing the control flow. Currently we only handle cases where we have
17697 // multiple selects with the exact same condition (same LHS, RHS and CC).
17698 // The selects may be interleaved with other instructions if the other
17699 // instructions meet some requirements we deem safe:
17700 // - They are not pseudo instructions.
17701 // - They are debug instructions. Otherwise,
17702 // - They do not have side-effects, do not access memory and their inputs do
17703 // not depend on the results of the select pseudo-instructions.
17704 // The TrueV/FalseV operands of the selects cannot depend on the result of
17705 // previous selects in the sequence.
17706 // These conditions could be further relaxed. See the X86 target for a
17707 // related approach and more information.
17708 //
17709 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17710 // is checked here and handled by a separate function -
17711 // EmitLoweredCascadedSelect.
17712 Register LHS = MI.getOperand(1).getReg();
17713 Register RHS = MI.getOperand(2).getReg();
17714 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17715
17716 SmallVector<MachineInstr *, 4> SelectDebugValues;
17717 SmallSet<Register, 4> SelectDests;
17718 SelectDests.insert(MI.getOperand(0).getReg());
17719
17720 MachineInstr *LastSelectPseudo = &MI;
17721 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17722 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17723 Next->getOpcode() == MI.getOpcode() &&
17724 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17725 Next->getOperand(5).isKill()) {
17726 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17727 }
17728
17729 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17730 SequenceMBBI != E; ++SequenceMBBI) {
17731 if (SequenceMBBI->isDebugInstr())
17732 continue;
17733 if (isSelectPseudo(*SequenceMBBI)) {
17734 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17735 SequenceMBBI->getOperand(2).getReg() != RHS ||
17736 SequenceMBBI->getOperand(3).getImm() != CC ||
17737 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17738 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17739 break;
17740 LastSelectPseudo = &*SequenceMBBI;
17741 SequenceMBBI->collectDebugValues(SelectDebugValues);
17742 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17743 continue;
17744 }
17745 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17746 SequenceMBBI->mayLoadOrStore() ||
17747 SequenceMBBI->usesCustomInsertionHook())
17748 break;
17749 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17750 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17751 }))
17752 break;
17753 }
17754
17755 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17756 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17757 DebugLoc DL = MI.getDebugLoc();
17759
17760 MachineBasicBlock *HeadMBB = BB;
17761 MachineFunction *F = BB->getParent();
17762 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17763 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17764
17765 F->insert(I, IfFalseMBB);
17766 F->insert(I, TailMBB);
17767
17768 // Transfer debug instructions associated with the selects to TailMBB.
17769 for (MachineInstr *DebugInstr : SelectDebugValues) {
17770 TailMBB->push_back(DebugInstr->removeFromParent());
17771 }
17772
17773 // Move all instructions after the sequence to TailMBB.
17774 TailMBB->splice(TailMBB->end(), HeadMBB,
17775 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17776 // Update machine-CFG edges by transferring all successors of the current
17777 // block to the new block which will contain the Phi nodes for the selects.
17778 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17779 // Set the successors for HeadMBB.
17780 HeadMBB->addSuccessor(IfFalseMBB);
17781 HeadMBB->addSuccessor(TailMBB);
17782
17783 // Insert appropriate branch.
17784 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17785 .addReg(LHS)
17786 .addReg(RHS)
17787 .addMBB(TailMBB);
17788
17789 // IfFalseMBB just falls through to TailMBB.
17790 IfFalseMBB->addSuccessor(TailMBB);
17791
17792 // Create PHIs for all of the select pseudo-instructions.
17793 auto SelectMBBI = MI.getIterator();
17794 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17795 auto InsertionPoint = TailMBB->begin();
17796 while (SelectMBBI != SelectEnd) {
17797 auto Next = std::next(SelectMBBI);
17798 if (isSelectPseudo(*SelectMBBI)) {
17799 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17800 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17801 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17802 .addReg(SelectMBBI->getOperand(4).getReg())
17803 .addMBB(HeadMBB)
17804 .addReg(SelectMBBI->getOperand(5).getReg())
17805 .addMBB(IfFalseMBB);
17806 SelectMBBI->eraseFromParent();
17807 }
17808 SelectMBBI = Next;
17809 }
17810
17811 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
17812 return TailMBB;
17813}
17814
17815// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
17816static const RISCV::RISCVMaskedPseudoInfo *
17817lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
17819 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
17820 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
17821 const RISCV::RISCVMaskedPseudoInfo *Masked =
17822 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
17823 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
17824 return Masked;
17825}
17826
17829 unsigned CVTXOpc) {
17830 DebugLoc DL = MI.getDebugLoc();
17831
17833
17835 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17836
17837 // Save the old value of FFLAGS.
17838 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17839
17840 assert(MI.getNumOperands() == 7);
17841
17842 // Emit a VFCVT_X_F
17843 const TargetRegisterInfo *TRI =
17845 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
17846 Register Tmp = MRI.createVirtualRegister(RC);
17847 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17848 .add(MI.getOperand(1))
17849 .add(MI.getOperand(2))
17850 .add(MI.getOperand(3))
17851 .add(MachineOperand::CreateImm(7)) // frm = DYN
17852 .add(MI.getOperand(4))
17853 .add(MI.getOperand(5))
17854 .add(MI.getOperand(6))
17855 .add(MachineOperand::CreateReg(RISCV::FRM,
17856 /*IsDef*/ false,
17857 /*IsImp*/ true));
17858
17859 // Emit a VFCVT_F_X
17860 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
17861 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
17862 // There is no E8 variant for VFCVT_F_X.
17863 assert(Log2SEW >= 4);
17864 unsigned CVTFOpc =
17865 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
17866 ->MaskedPseudo;
17867
17868 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17869 .add(MI.getOperand(0))
17870 .add(MI.getOperand(1))
17871 .addReg(Tmp)
17872 .add(MI.getOperand(3))
17873 .add(MachineOperand::CreateImm(7)) // frm = DYN
17874 .add(MI.getOperand(4))
17875 .add(MI.getOperand(5))
17876 .add(MI.getOperand(6))
17877 .add(MachineOperand::CreateReg(RISCV::FRM,
17878 /*IsDef*/ false,
17879 /*IsImp*/ true));
17880
17881 // Restore FFLAGS.
17882 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17883 .addReg(SavedFFLAGS, RegState::Kill);
17884
17885 // Erase the pseudoinstruction.
17886 MI.eraseFromParent();
17887 return BB;
17888}
17889
17891 const RISCVSubtarget &Subtarget) {
17892 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17893 const TargetRegisterClass *RC;
17894 switch (MI.getOpcode()) {
17895 default:
17896 llvm_unreachable("Unexpected opcode");
17897 case RISCV::PseudoFROUND_H:
17898 CmpOpc = RISCV::FLT_H;
17899 F2IOpc = RISCV::FCVT_W_H;
17900 I2FOpc = RISCV::FCVT_H_W;
17901 FSGNJOpc = RISCV::FSGNJ_H;
17902 FSGNJXOpc = RISCV::FSGNJX_H;
17903 RC = &RISCV::FPR16RegClass;
17904 break;
17905 case RISCV::PseudoFROUND_H_INX:
17906 CmpOpc = RISCV::FLT_H_INX;
17907 F2IOpc = RISCV::FCVT_W_H_INX;
17908 I2FOpc = RISCV::FCVT_H_W_INX;
17909 FSGNJOpc = RISCV::FSGNJ_H_INX;
17910 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17911 RC = &RISCV::GPRF16RegClass;
17912 break;
17913 case RISCV::PseudoFROUND_S:
17914 CmpOpc = RISCV::FLT_S;
17915 F2IOpc = RISCV::FCVT_W_S;
17916 I2FOpc = RISCV::FCVT_S_W;
17917 FSGNJOpc = RISCV::FSGNJ_S;
17918 FSGNJXOpc = RISCV::FSGNJX_S;
17919 RC = &RISCV::FPR32RegClass;
17920 break;
17921 case RISCV::PseudoFROUND_S_INX:
17922 CmpOpc = RISCV::FLT_S_INX;
17923 F2IOpc = RISCV::FCVT_W_S_INX;
17924 I2FOpc = RISCV::FCVT_S_W_INX;
17925 FSGNJOpc = RISCV::FSGNJ_S_INX;
17926 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17927 RC = &RISCV::GPRF32RegClass;
17928 break;
17929 case RISCV::PseudoFROUND_D:
17930 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17931 CmpOpc = RISCV::FLT_D;
17932 F2IOpc = RISCV::FCVT_L_D;
17933 I2FOpc = RISCV::FCVT_D_L;
17934 FSGNJOpc = RISCV::FSGNJ_D;
17935 FSGNJXOpc = RISCV::FSGNJX_D;
17936 RC = &RISCV::FPR64RegClass;
17937 break;
17938 case RISCV::PseudoFROUND_D_INX:
17939 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17940 CmpOpc = RISCV::FLT_D_INX;
17941 F2IOpc = RISCV::FCVT_L_D_INX;
17942 I2FOpc = RISCV::FCVT_D_L_INX;
17943 FSGNJOpc = RISCV::FSGNJ_D_INX;
17944 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17945 RC = &RISCV::GPRRegClass;
17946 break;
17947 }
17948
17949 const BasicBlock *BB = MBB->getBasicBlock();
17950 DebugLoc DL = MI.getDebugLoc();
17952
17954 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17955 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17956
17957 F->insert(I, CvtMBB);
17958 F->insert(I, DoneMBB);
17959 // Move all instructions after the sequence to DoneMBB.
17960 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
17961 MBB->end());
17962 // Update machine-CFG edges by transferring all successors of the current
17963 // block to the new block which will contain the Phi nodes for the selects.
17965 // Set the successors for MBB.
17966 MBB->addSuccessor(CvtMBB);
17967 MBB->addSuccessor(DoneMBB);
17968
17969 Register DstReg = MI.getOperand(0).getReg();
17970 Register SrcReg = MI.getOperand(1).getReg();
17971 Register MaxReg = MI.getOperand(2).getReg();
17972 int64_t FRM = MI.getOperand(3).getImm();
17973
17974 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17976
17977 Register FabsReg = MRI.createVirtualRegister(RC);
17978 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17979
17980 // Compare the FP value to the max value.
17981 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17982 auto MIB =
17983 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17986
17987 // Insert branch.
17988 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17989 .addReg(CmpReg)
17990 .addReg(RISCV::X0)
17991 .addMBB(DoneMBB);
17992
17993 CvtMBB->addSuccessor(DoneMBB);
17994
17995 // Convert to integer.
17996 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17997 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18000
18001 // Convert back to FP.
18002 Register I2FReg = MRI.createVirtualRegister(RC);
18003 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18006
18007 // Restore the sign bit.
18008 Register CvtReg = MRI.createVirtualRegister(RC);
18009 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18010
18011 // Merge the results.
18012 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18013 .addReg(SrcReg)
18014 .addMBB(MBB)
18015 .addReg(CvtReg)
18016 .addMBB(CvtMBB);
18017
18018 MI.eraseFromParent();
18019 return DoneMBB;
18020}
18021
18024 MachineBasicBlock *BB) const {
18025 switch (MI.getOpcode()) {
18026 default:
18027 llvm_unreachable("Unexpected instr type to insert");
18028 case RISCV::ReadCounterWide:
18029 assert(!Subtarget.is64Bit() &&
18030 "ReadCounterWide is only to be used on riscv32");
18031 return emitReadCounterWidePseudo(MI, BB);
18032 case RISCV::Select_GPR_Using_CC_GPR:
18033 case RISCV::Select_FPR16_Using_CC_GPR:
18034 case RISCV::Select_FPR16INX_Using_CC_GPR:
18035 case RISCV::Select_FPR32_Using_CC_GPR:
18036 case RISCV::Select_FPR32INX_Using_CC_GPR:
18037 case RISCV::Select_FPR64_Using_CC_GPR:
18038 case RISCV::Select_FPR64INX_Using_CC_GPR:
18039 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18040 return emitSelectPseudo(MI, BB, Subtarget);
18041 case RISCV::BuildPairF64Pseudo:
18042 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18043 case RISCV::SplitF64Pseudo:
18044 return emitSplitF64Pseudo(MI, BB, Subtarget);
18045 case RISCV::PseudoQuietFLE_H:
18046 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18047 case RISCV::PseudoQuietFLE_H_INX:
18048 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18049 case RISCV::PseudoQuietFLT_H:
18050 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18051 case RISCV::PseudoQuietFLT_H_INX:
18052 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18053 case RISCV::PseudoQuietFLE_S:
18054 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18055 case RISCV::PseudoQuietFLE_S_INX:
18056 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18057 case RISCV::PseudoQuietFLT_S:
18058 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18059 case RISCV::PseudoQuietFLT_S_INX:
18060 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18061 case RISCV::PseudoQuietFLE_D:
18062 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18063 case RISCV::PseudoQuietFLE_D_INX:
18064 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18065 case RISCV::PseudoQuietFLE_D_IN32X:
18066 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18067 Subtarget);
18068 case RISCV::PseudoQuietFLT_D:
18069 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18070 case RISCV::PseudoQuietFLT_D_INX:
18071 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18072 case RISCV::PseudoQuietFLT_D_IN32X:
18073 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18074 Subtarget);
18075
18076 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18077 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18078 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18079 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18080 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18081 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18082 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18083 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18084 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18085 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18086 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18087 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18088 case RISCV::PseudoFROUND_H:
18089 case RISCV::PseudoFROUND_H_INX:
18090 case RISCV::PseudoFROUND_S:
18091 case RISCV::PseudoFROUND_S_INX:
18092 case RISCV::PseudoFROUND_D:
18093 case RISCV::PseudoFROUND_D_INX:
18094 case RISCV::PseudoFROUND_D_IN32X:
18095 return emitFROUND(MI, BB, Subtarget);
18096 case TargetOpcode::STATEPOINT:
18097 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18098 // while jal call instruction (where statepoint will be lowered at the end)
18099 // has implicit def. This def is early-clobber as it will be set at
18100 // the moment of the call and earlier than any use is read.
18101 // Add this implicit dead def here as a workaround.
18102 MI.addOperand(*MI.getMF(),
18104 RISCV::X1, /*isDef*/ true,
18105 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18106 /*isUndef*/ false, /*isEarlyClobber*/ true));
18107 [[fallthrough]];
18108 case TargetOpcode::STACKMAP:
18109 case TargetOpcode::PATCHPOINT:
18110 if (!Subtarget.is64Bit())
18111 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18112 "supported on 64-bit targets");
18113 return emitPatchPoint(MI, BB);
18114 }
18115}
18116
18118 SDNode *Node) const {
18119 // Add FRM dependency to any instructions with dynamic rounding mode.
18120 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18121 if (Idx < 0) {
18122 // Vector pseudos have FRM index indicated by TSFlags.
18123 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18124 if (Idx < 0)
18125 return;
18126 }
18127 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18128 return;
18129 // If the instruction already reads FRM, don't add another read.
18130 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18131 return;
18132 MI.addOperand(
18133 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18134}
18135
18136// Calling Convention Implementation.
18137// The expectations for frontend ABI lowering vary from target to target.
18138// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18139// details, but this is a longer term goal. For now, we simply try to keep the
18140// role of the frontend as simple and well-defined as possible. The rules can
18141// be summarised as:
18142// * Never split up large scalar arguments. We handle them here.
18143// * If a hardfloat calling convention is being used, and the struct may be
18144// passed in a pair of registers (fp+fp, int+fp), and both registers are
18145// available, then pass as two separate arguments. If either the GPRs or FPRs
18146// are exhausted, then pass according to the rule below.
18147// * If a struct could never be passed in registers or directly in a stack
18148// slot (as it is larger than 2*XLEN and the floating point rules don't
18149// apply), then pass it using a pointer with the byval attribute.
18150// * If a struct is less than 2*XLEN, then coerce to either a two-element
18151// word-sized array or a 2*XLEN scalar (depending on alignment).
18152// * The frontend can determine whether a struct is returned by reference or
18153// not based on its size and fields. If it will be returned by reference, the
18154// frontend must modify the prototype so a pointer with the sret annotation is
18155// passed as the first argument. This is not necessary for large scalar
18156// returns.
18157// * Struct return values and varargs should be coerced to structs containing
18158// register-size fields in the same situations they would be for fixed
18159// arguments.
18160
18161static const MCPhysReg ArgFPR16s[] = {
18162 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18163 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18164};
18165static const MCPhysReg ArgFPR32s[] = {
18166 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18167 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18168};
18169static const MCPhysReg ArgFPR64s[] = {
18170 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18171 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18172};
18173// This is an interim calling convention and it may be changed in the future.
18174static const MCPhysReg ArgVRs[] = {
18175 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18176 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18177 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18178static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18179 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18180 RISCV::V20M2, RISCV::V22M2};
18181static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18182 RISCV::V20M4};
18183static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18184
18186 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18187 // the ILP32E ABI.
18188 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18189 RISCV::X13, RISCV::X14, RISCV::X15,
18190 RISCV::X16, RISCV::X17};
18191 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18192 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18193 RISCV::X13, RISCV::X14, RISCV::X15};
18194
18195 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18196 return ArrayRef(ArgEGPRs);
18197
18198 return ArrayRef(ArgIGPRs);
18199}
18200
18202 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18203 // for save-restore libcall, so we don't use them.
18204 static const MCPhysReg FastCCIGPRs[] = {
18205 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18206 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18207 RISCV::X29, RISCV::X30, RISCV::X31};
18208
18209 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18210 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18211 RISCV::X13, RISCV::X14, RISCV::X15,
18212 RISCV::X7};
18213
18214 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18215 return ArrayRef(FastCCEGPRs);
18216
18217 return ArrayRef(FastCCIGPRs);
18218}
18219
18220// Pass a 2*XLEN argument that has been split into two XLEN values through
18221// registers or the stack as necessary.
18222static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18223 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18224 MVT ValVT2, MVT LocVT2,
18225 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18226 unsigned XLenInBytes = XLen / 8;
18227 const RISCVSubtarget &STI =
18230
18231 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18232 // At least one half can be passed via register.
18233 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18234 VA1.getLocVT(), CCValAssign::Full));
18235 } else {
18236 // Both halves must be passed on the stack, with proper alignment.
18237 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18238 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18239 Align StackAlign(XLenInBytes);
18240 if (!EABI || XLen != 32)
18241 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18242 State.addLoc(
18244 State.AllocateStack(XLenInBytes, StackAlign),
18245 VA1.getLocVT(), CCValAssign::Full));
18247 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18248 LocVT2, CCValAssign::Full));
18249 return false;
18250 }
18251
18252 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18253 // The second half can also be passed via register.
18254 State.addLoc(
18255 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18256 } else {
18257 // The second half is passed via the stack, without additional alignment.
18259 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18260 LocVT2, CCValAssign::Full));
18261 }
18262
18263 return false;
18264}
18265
18266// Implements the RISC-V calling convention. Returns true upon failure.
18267bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18268 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18269 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18270 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18271 RVVArgDispatcher &RVVDispatcher) {
18272 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18273 assert(XLen == 32 || XLen == 64);
18274 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18275
18276 // Static chain parameter must not be passed in normal argument registers,
18277 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18278 if (ArgFlags.isNest()) {
18279 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18280 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18281 return false;
18282 }
18283 }
18284
18285 // Any return value split in to more than two values can't be returned
18286 // directly. Vectors are returned via the available vector registers.
18287 if (!LocVT.isVector() && IsRet && ValNo > 1)
18288 return true;
18289
18290 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18291 // variadic argument, or if no F16/F32 argument registers are available.
18292 bool UseGPRForF16_F32 = true;
18293 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18294 // variadic argument, or if no F64 argument registers are available.
18295 bool UseGPRForF64 = true;
18296
18297 switch (ABI) {
18298 default:
18299 llvm_unreachable("Unexpected ABI");
18302 case RISCVABI::ABI_LP64:
18304 break;
18307 UseGPRForF16_F32 = !IsFixed;
18308 break;
18311 UseGPRForF16_F32 = !IsFixed;
18312 UseGPRForF64 = !IsFixed;
18313 break;
18314 }
18315
18316 // FPR16, FPR32, and FPR64 alias each other.
18317 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18318 UseGPRForF16_F32 = true;
18319 UseGPRForF64 = true;
18320 }
18321
18322 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18323 // similar local variables rather than directly checking against the target
18324 // ABI.
18325
18326 if (UseGPRForF16_F32 &&
18327 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18328 LocVT = XLenVT;
18329 LocInfo = CCValAssign::BCvt;
18330 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18331 LocVT = MVT::i64;
18332 LocInfo = CCValAssign::BCvt;
18333 }
18334
18336
18337 // If this is a variadic argument, the RISC-V calling convention requires
18338 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18339 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18340 // be used regardless of whether the original argument was split during
18341 // legalisation or not. The argument will not be passed by registers if the
18342 // original type is larger than 2*XLEN, so the register alignment rule does
18343 // not apply.
18344 // TODO: To be compatible with GCC's behaviors, we don't align registers
18345 // currently if we are using ILP32E calling convention. This behavior may be
18346 // changed when RV32E/ILP32E is ratified.
18347 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18348 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18349 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18350 ABI != RISCVABI::ABI_ILP32E) {
18351 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18352 // Skip 'odd' register if necessary.
18353 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18354 State.AllocateReg(ArgGPRs);
18355 }
18356
18357 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18358 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18359 State.getPendingArgFlags();
18360
18361 assert(PendingLocs.size() == PendingArgFlags.size() &&
18362 "PendingLocs and PendingArgFlags out of sync");
18363
18364 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18365 // registers are exhausted.
18366 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18367 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18368 // Depending on available argument GPRS, f64 may be passed in a pair of
18369 // GPRs, split between a GPR and the stack, or passed completely on the
18370 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18371 // cases.
18372 Register Reg = State.AllocateReg(ArgGPRs);
18373 if (!Reg) {
18374 unsigned StackOffset = State.AllocateStack(8, Align(8));
18375 State.addLoc(
18376 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18377 return false;
18378 }
18379 LocVT = MVT::i32;
18380 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18381 Register HiReg = State.AllocateReg(ArgGPRs);
18382 if (HiReg) {
18383 State.addLoc(
18384 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18385 } else {
18386 unsigned StackOffset = State.AllocateStack(4, Align(4));
18387 State.addLoc(
18388 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18389 }
18390 return false;
18391 }
18392
18393 // Fixed-length vectors are located in the corresponding scalable-vector
18394 // container types.
18395 if (ValVT.isFixedLengthVector())
18396 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18397
18398 // Split arguments might be passed indirectly, so keep track of the pending
18399 // values. Split vectors are passed via a mix of registers and indirectly, so
18400 // treat them as we would any other argument.
18401 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18402 LocVT = XLenVT;
18403 LocInfo = CCValAssign::Indirect;
18404 PendingLocs.push_back(
18405 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18406 PendingArgFlags.push_back(ArgFlags);
18407 if (!ArgFlags.isSplitEnd()) {
18408 return false;
18409 }
18410 }
18411
18412 // If the split argument only had two elements, it should be passed directly
18413 // in registers or on the stack.
18414 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18415 PendingLocs.size() <= 2) {
18416 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18417 // Apply the normal calling convention rules to the first half of the
18418 // split argument.
18419 CCValAssign VA = PendingLocs[0];
18420 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18421 PendingLocs.clear();
18422 PendingArgFlags.clear();
18423 return CC_RISCVAssign2XLen(
18424 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18425 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18426 }
18427
18428 // Allocate to a register if possible, or else a stack slot.
18429 Register Reg;
18430 unsigned StoreSizeBytes = XLen / 8;
18431 Align StackAlign = Align(XLen / 8);
18432
18433 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18434 Reg = State.AllocateReg(ArgFPR16s);
18435 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18436 Reg = State.AllocateReg(ArgFPR32s);
18437 else if (ValVT == MVT::f64 && !UseGPRForF64)
18438 Reg = State.AllocateReg(ArgFPR64s);
18439 else if (ValVT.isVector()) {
18440 Reg = RVVDispatcher.getNextPhysReg();
18441 if (!Reg) {
18442 // For return values, the vector must be passed fully via registers or
18443 // via the stack.
18444 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18445 // but we're using all of them.
18446 if (IsRet)
18447 return true;
18448 // Try using a GPR to pass the address
18449 if ((Reg = State.AllocateReg(ArgGPRs))) {
18450 LocVT = XLenVT;
18451 LocInfo = CCValAssign::Indirect;
18452 } else if (ValVT.isScalableVector()) {
18453 LocVT = XLenVT;
18454 LocInfo = CCValAssign::Indirect;
18455 } else {
18456 // Pass fixed-length vectors on the stack.
18457 LocVT = ValVT;
18458 StoreSizeBytes = ValVT.getStoreSize();
18459 // Align vectors to their element sizes, being careful for vXi1
18460 // vectors.
18461 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18462 }
18463 }
18464 } else {
18465 Reg = State.AllocateReg(ArgGPRs);
18466 }
18467
18468 unsigned StackOffset =
18469 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18470
18471 // If we reach this point and PendingLocs is non-empty, we must be at the
18472 // end of a split argument that must be passed indirectly.
18473 if (!PendingLocs.empty()) {
18474 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18475 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18476
18477 for (auto &It : PendingLocs) {
18478 if (Reg)
18479 It.convertToReg(Reg);
18480 else
18481 It.convertToMem(StackOffset);
18482 State.addLoc(It);
18483 }
18484 PendingLocs.clear();
18485 PendingArgFlags.clear();
18486 return false;
18487 }
18488
18489 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18490 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18491 "Expected an XLenVT or vector types at this stage");
18492
18493 if (Reg) {
18494 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18495 return false;
18496 }
18497
18498 // When a scalar floating-point value is passed on the stack, no
18499 // bit-conversion is needed.
18500 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18501 assert(!ValVT.isVector());
18502 LocVT = ValVT;
18503 LocInfo = CCValAssign::Full;
18504 }
18505 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18506 return false;
18507}
18508
18509template <typename ArgTy>
18510static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18511 for (const auto &ArgIdx : enumerate(Args)) {
18512 MVT ArgVT = ArgIdx.value().VT;
18513 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18514 return ArgIdx.index();
18515 }
18516 return std::nullopt;
18517}
18518
18519void RISCVTargetLowering::analyzeInputArgs(
18520 MachineFunction &MF, CCState &CCInfo,
18521 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18522 RISCVCCAssignFn Fn) const {
18523 unsigned NumArgs = Ins.size();
18525
18526 RVVArgDispatcher Dispatcher;
18527 if (IsRet) {
18528 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18529 } else {
18530 SmallVector<Type *, 4> TypeList;
18531 for (const Argument &Arg : MF.getFunction().args())
18532 TypeList.push_back(Arg.getType());
18533 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18534 }
18535
18536 for (unsigned i = 0; i != NumArgs; ++i) {
18537 MVT ArgVT = Ins[i].VT;
18538 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18539
18540 Type *ArgTy = nullptr;
18541 if (IsRet)
18542 ArgTy = FType->getReturnType();
18543 else if (Ins[i].isOrigArg())
18544 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18545
18547 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18548 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18549 Dispatcher)) {
18550 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18551 << ArgVT << '\n');
18552 llvm_unreachable(nullptr);
18553 }
18554 }
18555}
18556
18557void RISCVTargetLowering::analyzeOutputArgs(
18558 MachineFunction &MF, CCState &CCInfo,
18559 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18560 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18561 unsigned NumArgs = Outs.size();
18562
18563 SmallVector<Type *, 4> TypeList;
18564 if (IsRet)
18565 TypeList.push_back(MF.getFunction().getReturnType());
18566 else if (CLI)
18567 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18568 TypeList.push_back(Arg.Ty);
18569 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18570
18571 for (unsigned i = 0; i != NumArgs; i++) {
18572 MVT ArgVT = Outs[i].VT;
18573 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18574 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18575
18577 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18578 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18579 Dispatcher)) {
18580 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18581 << ArgVT << "\n");
18582 llvm_unreachable(nullptr);
18583 }
18584 }
18585}
18586
18587// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18588// values.
18590 const CCValAssign &VA, const SDLoc &DL,
18591 const RISCVSubtarget &Subtarget) {
18592 switch (VA.getLocInfo()) {
18593 default:
18594 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18595 case CCValAssign::Full:
18597 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18598 break;
18599 case CCValAssign::BCvt:
18600 if (VA.getLocVT().isInteger() &&
18601 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18602 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18603 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18604 if (RV64LegalI32) {
18605 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18606 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18607 } else {
18608 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18609 }
18610 } else {
18611 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18612 }
18613 break;
18614 }
18615 return Val;
18616}
18617
18618// The caller is responsible for loading the full value if the argument is
18619// passed with CCValAssign::Indirect.
18621 const CCValAssign &VA, const SDLoc &DL,
18622 const ISD::InputArg &In,
18623 const RISCVTargetLowering &TLI) {
18626 EVT LocVT = VA.getLocVT();
18627 SDValue Val;
18628 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18629 Register VReg = RegInfo.createVirtualRegister(RC);
18630 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18631 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18632
18633 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18634 if (In.isOrigArg()) {
18635 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18636 if (OrigArg->getType()->isIntegerTy()) {
18637 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18638 // An input zero extended from i31 can also be considered sign extended.
18639 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18640 (BitWidth < 32 && In.Flags.isZExt())) {
18642 RVFI->addSExt32Register(VReg);
18643 }
18644 }
18645 }
18646
18648 return Val;
18649
18650 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18651}
18652
18654 const CCValAssign &VA, const SDLoc &DL,
18655 const RISCVSubtarget &Subtarget) {
18656 EVT LocVT = VA.getLocVT();
18657
18658 switch (VA.getLocInfo()) {
18659 default:
18660 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18661 case CCValAssign::Full:
18662 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18663 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18664 break;
18665 case CCValAssign::BCvt:
18666 if (LocVT.isInteger() &&
18667 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18668 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18669 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18670 if (RV64LegalI32) {
18671 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18672 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18673 } else {
18674 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18675 }
18676 } else {
18677 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18678 }
18679 break;
18680 }
18681 return Val;
18682}
18683
18684// The caller is responsible for loading the full value if the argument is
18685// passed with CCValAssign::Indirect.
18687 const CCValAssign &VA, const SDLoc &DL) {
18689 MachineFrameInfo &MFI = MF.getFrameInfo();
18690 EVT LocVT = VA.getLocVT();
18691 EVT ValVT = VA.getValVT();
18693 if (ValVT.isScalableVector()) {
18694 // When the value is a scalable vector, we save the pointer which points to
18695 // the scalable vector value in the stack. The ValVT will be the pointer
18696 // type, instead of the scalable vector type.
18697 ValVT = LocVT;
18698 }
18699 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18700 /*IsImmutable=*/true);
18701 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18702 SDValue Val;
18703
18704 ISD::LoadExtType ExtType;
18705 switch (VA.getLocInfo()) {
18706 default:
18707 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18708 case CCValAssign::Full:
18710 case CCValAssign::BCvt:
18711 ExtType = ISD::NON_EXTLOAD;
18712 break;
18713 }
18714 Val = DAG.getExtLoad(
18715 ExtType, DL, LocVT, Chain, FIN,
18717 return Val;
18718}
18719
18721 const CCValAssign &VA,
18722 const CCValAssign &HiVA,
18723 const SDLoc &DL) {
18724 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18725 "Unexpected VA");
18727 MachineFrameInfo &MFI = MF.getFrameInfo();
18729
18730 assert(VA.isRegLoc() && "Expected register VA assignment");
18731
18732 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18733 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18734 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18735 SDValue Hi;
18736 if (HiVA.isMemLoc()) {
18737 // Second half of f64 is passed on the stack.
18738 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18739 /*IsImmutable=*/true);
18740 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18741 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18743 } else {
18744 // Second half of f64 is passed in another GPR.
18745 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18746 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18747 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18748 }
18749 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18750}
18751
18752// FastCC has less than 1% performance improvement for some particular
18753// benchmark. But theoretically, it may has benenfit for some cases.
18755 unsigned ValNo, MVT ValVT, MVT LocVT,
18756 CCValAssign::LocInfo LocInfo,
18757 ISD::ArgFlagsTy ArgFlags, CCState &State,
18758 bool IsFixed, bool IsRet, Type *OrigTy,
18759 const RISCVTargetLowering &TLI,
18760 RVVArgDispatcher &RVVDispatcher) {
18761 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18762 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18763 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18764 return false;
18765 }
18766 }
18767
18768 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18769
18770 if (LocVT == MVT::f16 &&
18771 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18772 static const MCPhysReg FPR16List[] = {
18773 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18774 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18775 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18776 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18777 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18778 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18779 return false;
18780 }
18781 }
18782
18783 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18784 static const MCPhysReg FPR32List[] = {
18785 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18786 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18787 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18788 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18789 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18790 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18791 return false;
18792 }
18793 }
18794
18795 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18796 static const MCPhysReg FPR64List[] = {
18797 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18798 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18799 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18800 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18801 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18802 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18803 return false;
18804 }
18805 }
18806
18807 // Check if there is an available GPR before hitting the stack.
18808 if ((LocVT == MVT::f16 &&
18809 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18810 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18811 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18812 Subtarget.hasStdExtZdinx())) {
18813 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18814 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18815 return false;
18816 }
18817 }
18818
18819 if (LocVT == MVT::f16) {
18820 unsigned Offset2 = State.AllocateStack(2, Align(2));
18821 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
18822 return false;
18823 }
18824
18825 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18826 unsigned Offset4 = State.AllocateStack(4, Align(4));
18827 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
18828 return false;
18829 }
18830
18831 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18832 unsigned Offset5 = State.AllocateStack(8, Align(8));
18833 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
18834 return false;
18835 }
18836
18837 if (LocVT.isVector()) {
18838 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18839 if (AllocatedVReg) {
18840 // Fixed-length vectors are located in the corresponding scalable-vector
18841 // container types.
18842 if (ValVT.isFixedLengthVector())
18843 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18844 State.addLoc(
18845 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
18846 } else {
18847 // Try and pass the address via a "fast" GPR.
18848 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18849 LocInfo = CCValAssign::Indirect;
18850 LocVT = TLI.getSubtarget().getXLenVT();
18851 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
18852 } else if (ValVT.isFixedLengthVector()) {
18853 auto StackAlign =
18855 unsigned StackOffset =
18856 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18857 State.addLoc(
18858 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18859 } else {
18860 // Can't pass scalable vectors on the stack.
18861 return true;
18862 }
18863 }
18864
18865 return false;
18866 }
18867
18868 return true; // CC didn't match.
18869}
18870
18871bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18872 CCValAssign::LocInfo LocInfo,
18873 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18874 if (ArgFlags.isNest()) {
18876 "Attribute 'nest' is not supported in GHC calling convention");
18877 }
18878
18879 static const MCPhysReg GPRList[] = {
18880 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18881 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18882
18883 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18884 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18885 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18886 if (unsigned Reg = State.AllocateReg(GPRList)) {
18887 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18888 return false;
18889 }
18890 }
18891
18892 const RISCVSubtarget &Subtarget =
18894
18895 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18896 // Pass in STG registers: F1, ..., F6
18897 // fs0 ... fs5
18898 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18899 RISCV::F18_F, RISCV::F19_F,
18900 RISCV::F20_F, RISCV::F21_F};
18901 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18902 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18903 return false;
18904 }
18905 }
18906
18907 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18908 // Pass in STG registers: D1, ..., D6
18909 // fs6 ... fs11
18910 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18911 RISCV::F24_D, RISCV::F25_D,
18912 RISCV::F26_D, RISCV::F27_D};
18913 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18914 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18915 return false;
18916 }
18917 }
18918
18919 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18920 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18921 Subtarget.is64Bit())) {
18922 if (unsigned Reg = State.AllocateReg(GPRList)) {
18923 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18924 return false;
18925 }
18926 }
18927
18928 report_fatal_error("No registers left in GHC calling convention");
18929 return true;
18930}
18931
18932// Transform physical registers into virtual registers.
18934 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18935 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18936 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18937
18939
18940 switch (CallConv) {
18941 default:
18942 report_fatal_error("Unsupported calling convention");
18943 case CallingConv::C:
18944 case CallingConv::Fast:
18946 case CallingConv::GRAAL:
18948 break;
18949 case CallingConv::GHC:
18950 if (Subtarget.hasStdExtE())
18951 report_fatal_error("GHC calling convention is not supported on RVE!");
18952 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18953 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18954 "(Zdinx/D) instruction set extensions");
18955 }
18956
18957 const Function &Func = MF.getFunction();
18958 if (Func.hasFnAttribute("interrupt")) {
18959 if (!Func.arg_empty())
18961 "Functions with the interrupt attribute cannot have arguments!");
18962
18963 StringRef Kind =
18964 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18965
18966 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18968 "Function interrupt attribute argument not supported!");
18969 }
18970
18971 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18972 MVT XLenVT = Subtarget.getXLenVT();
18973 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18974 // Used with vargs to acumulate store chains.
18975 std::vector<SDValue> OutChains;
18976
18977 // Assign locations to all of the incoming arguments.
18979 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18980
18981 if (CallConv == CallingConv::GHC)
18983 else
18984 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
18986 : RISCV::CC_RISCV);
18987
18988 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
18989 CCValAssign &VA = ArgLocs[i];
18990 SDValue ArgValue;
18991 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18992 // case.
18993 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18994 assert(VA.needsCustom());
18995 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
18996 } else if (VA.isRegLoc())
18997 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
18998 else
18999 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19000
19001 if (VA.getLocInfo() == CCValAssign::Indirect) {
19002 // If the original argument was split and passed by reference (e.g. i128
19003 // on RV32), we need to load all parts of it here (using the same
19004 // address). Vectors may be partly split to registers and partly to the
19005 // stack, in which case the base address is partly offset and subsequent
19006 // stores are relative to that.
19007 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19009 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19010 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19011 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19012 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19013 CCValAssign &PartVA = ArgLocs[i + 1];
19014 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19015 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19016 if (PartVA.getValVT().isScalableVector())
19017 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19018 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19019 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19021 ++i;
19022 ++InsIdx;
19023 }
19024 continue;
19025 }
19026 InVals.push_back(ArgValue);
19027 }
19028
19029 if (any_of(ArgLocs,
19030 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19031 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19032
19033 if (IsVarArg) {
19034 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19035 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19036 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19037 MachineFrameInfo &MFI = MF.getFrameInfo();
19038 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19040
19041 // Size of the vararg save area. For now, the varargs save area is either
19042 // zero or large enough to hold a0-a7.
19043 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19044 int FI;
19045
19046 // If all registers are allocated, then all varargs must be passed on the
19047 // stack and we don't need to save any argregs.
19048 if (VarArgsSaveSize == 0) {
19049 int VaArgOffset = CCInfo.getStackSize();
19050 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19051 } else {
19052 int VaArgOffset = -VarArgsSaveSize;
19053 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19054
19055 // If saving an odd number of registers then create an extra stack slot to
19056 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19057 // offsets to even-numbered registered remain 2*XLEN-aligned.
19058 if (Idx % 2) {
19060 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19061 VarArgsSaveSize += XLenInBytes;
19062 }
19063
19064 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19065
19066 // Copy the integer registers that may have been used for passing varargs
19067 // to the vararg save area.
19068 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19069 const Register Reg = RegInfo.createVirtualRegister(RC);
19070 RegInfo.addLiveIn(ArgRegs[I], Reg);
19071 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19072 SDValue Store = DAG.getStore(
19073 Chain, DL, ArgValue, FIN,
19074 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19075 OutChains.push_back(Store);
19076 FIN =
19077 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19078 }
19079 }
19080
19081 // Record the frame index of the first variable argument
19082 // which is a value necessary to VASTART.
19083 RVFI->setVarArgsFrameIndex(FI);
19084 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19085 }
19086
19087 // All stores are grouped in one node to allow the matching between
19088 // the size of Ins and InVals. This only happens for vararg functions.
19089 if (!OutChains.empty()) {
19090 OutChains.push_back(Chain);
19091 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19092 }
19093
19094 return Chain;
19095}
19096
19097/// isEligibleForTailCallOptimization - Check whether the call is eligible
19098/// for tail call optimization.
19099/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19100bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19101 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19102 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19103
19104 auto CalleeCC = CLI.CallConv;
19105 auto &Outs = CLI.Outs;
19106 auto &Caller = MF.getFunction();
19107 auto CallerCC = Caller.getCallingConv();
19108
19109 // Exception-handling functions need a special set of instructions to
19110 // indicate a return to the hardware. Tail-calling another function would
19111 // probably break this.
19112 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19113 // should be expanded as new function attributes are introduced.
19114 if (Caller.hasFnAttribute("interrupt"))
19115 return false;
19116
19117 // Do not tail call opt if the stack is used to pass parameters.
19118 if (CCInfo.getStackSize() != 0)
19119 return false;
19120
19121 // Do not tail call opt if any parameters need to be passed indirectly.
19122 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19123 // passed indirectly. So the address of the value will be passed in a
19124 // register, or if not available, then the address is put on the stack. In
19125 // order to pass indirectly, space on the stack often needs to be allocated
19126 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19127 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19128 // are passed CCValAssign::Indirect.
19129 for (auto &VA : ArgLocs)
19130 if (VA.getLocInfo() == CCValAssign::Indirect)
19131 return false;
19132
19133 // Do not tail call opt if either caller or callee uses struct return
19134 // semantics.
19135 auto IsCallerStructRet = Caller.hasStructRetAttr();
19136 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19137 if (IsCallerStructRet || IsCalleeStructRet)
19138 return false;
19139
19140 // The callee has to preserve all registers the caller needs to preserve.
19141 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19142 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19143 if (CalleeCC != CallerCC) {
19144 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19145 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19146 return false;
19147 }
19148
19149 // Byval parameters hand the function a pointer directly into the stack area
19150 // we want to reuse during a tail call. Working around this *is* possible
19151 // but less efficient and uglier in LowerCall.
19152 for (auto &Arg : Outs)
19153 if (Arg.Flags.isByVal())
19154 return false;
19155
19156 return true;
19157}
19158
19160 return DAG.getDataLayout().getPrefTypeAlign(
19161 VT.getTypeForEVT(*DAG.getContext()));
19162}
19163
19164// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19165// and output parameter nodes.
19167 SmallVectorImpl<SDValue> &InVals) const {
19168 SelectionDAG &DAG = CLI.DAG;
19169 SDLoc &DL = CLI.DL;
19171 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19173 SDValue Chain = CLI.Chain;
19174 SDValue Callee = CLI.Callee;
19175 bool &IsTailCall = CLI.IsTailCall;
19176 CallingConv::ID CallConv = CLI.CallConv;
19177 bool IsVarArg = CLI.IsVarArg;
19178 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19179 MVT XLenVT = Subtarget.getXLenVT();
19180
19182
19183 // Analyze the operands of the call, assigning locations to each operand.
19185 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19186
19187 if (CallConv == CallingConv::GHC) {
19188 if (Subtarget.hasStdExtE())
19189 report_fatal_error("GHC calling convention is not supported on RVE!");
19191 } else
19192 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19194 : RISCV::CC_RISCV);
19195
19196 // Check if it's really possible to do a tail call.
19197 if (IsTailCall)
19198 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19199
19200 if (IsTailCall)
19201 ++NumTailCalls;
19202 else if (CLI.CB && CLI.CB->isMustTailCall())
19203 report_fatal_error("failed to perform tail call elimination on a call "
19204 "site marked musttail");
19205
19206 // Get a count of how many bytes are to be pushed on the stack.
19207 unsigned NumBytes = ArgCCInfo.getStackSize();
19208
19209 // Create local copies for byval args
19210 SmallVector<SDValue, 8> ByValArgs;
19211 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19212 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19213 if (!Flags.isByVal())
19214 continue;
19215
19216 SDValue Arg = OutVals[i];
19217 unsigned Size = Flags.getByValSize();
19218 Align Alignment = Flags.getNonZeroByValAlign();
19219
19220 int FI =
19221 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19222 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19223 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19224
19225 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19226 /*IsVolatile=*/false,
19227 /*AlwaysInline=*/false, IsTailCall,
19229 ByValArgs.push_back(FIPtr);
19230 }
19231
19232 if (!IsTailCall)
19233 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19234
19235 // Copy argument values to their designated locations.
19237 SmallVector<SDValue, 8> MemOpChains;
19238 SDValue StackPtr;
19239 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19240 ++i, ++OutIdx) {
19241 CCValAssign &VA = ArgLocs[i];
19242 SDValue ArgValue = OutVals[OutIdx];
19243 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19244
19245 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19246 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19247 assert(VA.isRegLoc() && "Expected register VA assignment");
19248 assert(VA.needsCustom());
19249 SDValue SplitF64 = DAG.getNode(
19250 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19251 SDValue Lo = SplitF64.getValue(0);
19252 SDValue Hi = SplitF64.getValue(1);
19253
19254 Register RegLo = VA.getLocReg();
19255 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19256
19257 // Get the CCValAssign for the Hi part.
19258 CCValAssign &HiVA = ArgLocs[++i];
19259
19260 if (HiVA.isMemLoc()) {
19261 // Second half of f64 is passed on the stack.
19262 if (!StackPtr.getNode())
19263 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19265 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19266 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19267 // Emit the store.
19268 MemOpChains.push_back(
19269 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19270 } else {
19271 // Second half of f64 is passed in another GPR.
19272 Register RegHigh = HiVA.getLocReg();
19273 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19274 }
19275 continue;
19276 }
19277
19278 // Promote the value if needed.
19279 // For now, only handle fully promoted and indirect arguments.
19280 if (VA.getLocInfo() == CCValAssign::Indirect) {
19281 // Store the argument in a stack slot and pass its address.
19282 Align StackAlign =
19283 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19284 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19285 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19286 // If the original argument was split (e.g. i128), we need
19287 // to store the required parts of it here (and pass just one address).
19288 // Vectors may be partly split to registers and partly to the stack, in
19289 // which case the base address is partly offset and subsequent stores are
19290 // relative to that.
19291 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19292 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19293 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19294 // Calculate the total size to store. We don't have access to what we're
19295 // actually storing other than performing the loop and collecting the
19296 // info.
19298 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19299 SDValue PartValue = OutVals[OutIdx + 1];
19300 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19301 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19302 EVT PartVT = PartValue.getValueType();
19303 if (PartVT.isScalableVector())
19304 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19305 StoredSize += PartVT.getStoreSize();
19306 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19307 Parts.push_back(std::make_pair(PartValue, Offset));
19308 ++i;
19309 ++OutIdx;
19310 }
19311 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19312 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19313 MemOpChains.push_back(
19314 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19316 for (const auto &Part : Parts) {
19317 SDValue PartValue = Part.first;
19318 SDValue PartOffset = Part.second;
19320 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19321 MemOpChains.push_back(
19322 DAG.getStore(Chain, DL, PartValue, Address,
19324 }
19325 ArgValue = SpillSlot;
19326 } else {
19327 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19328 }
19329
19330 // Use local copy if it is a byval arg.
19331 if (Flags.isByVal())
19332 ArgValue = ByValArgs[j++];
19333
19334 if (VA.isRegLoc()) {
19335 // Queue up the argument copies and emit them at the end.
19336 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19337 } else {
19338 assert(VA.isMemLoc() && "Argument not register or memory");
19339 assert(!IsTailCall && "Tail call not allowed if stack is used "
19340 "for passing parameters");
19341
19342 // Work out the address of the stack slot.
19343 if (!StackPtr.getNode())
19344 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19346 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19348
19349 // Emit the store.
19350 MemOpChains.push_back(
19351 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19352 }
19353 }
19354
19355 // Join the stores, which are independent of one another.
19356 if (!MemOpChains.empty())
19357 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19358
19359 SDValue Glue;
19360
19361 // Build a sequence of copy-to-reg nodes, chained and glued together.
19362 for (auto &Reg : RegsToPass) {
19363 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19364 Glue = Chain.getValue(1);
19365 }
19366
19367 // Validate that none of the argument registers have been marked as
19368 // reserved, if so report an error. Do the same for the return address if this
19369 // is not a tailcall.
19370 validateCCReservedRegs(RegsToPass, MF);
19371 if (!IsTailCall &&
19374 MF.getFunction(),
19375 "Return address register required, but has been reserved."});
19376
19377 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19378 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19379 // split it and then direct call can be matched by PseudoCALL.
19380 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19381 const GlobalValue *GV = S->getGlobal();
19382 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19383 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19384 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19385 }
19386
19387 // The first call operand is the chain and the second is the target address.
19389 Ops.push_back(Chain);
19390 Ops.push_back(Callee);
19391
19392 // Add argument registers to the end of the list so that they are
19393 // known live into the call.
19394 for (auto &Reg : RegsToPass)
19395 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19396
19397 if (!IsTailCall) {
19398 // Add a register mask operand representing the call-preserved registers.
19399 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19400 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19401 assert(Mask && "Missing call preserved mask for calling convention");
19402 Ops.push_back(DAG.getRegisterMask(Mask));
19403 }
19404
19405 // Glue the call to the argument copies, if any.
19406 if (Glue.getNode())
19407 Ops.push_back(Glue);
19408
19409 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19410 "Unexpected CFI type for a direct call");
19411
19412 // Emit the call.
19413 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19414
19415 if (IsTailCall) {
19417 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19418 if (CLI.CFIType)
19419 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19420 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19421 return Ret;
19422 }
19423
19424 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19425 if (CLI.CFIType)
19426 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19427 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19428 Glue = Chain.getValue(1);
19429
19430 // Mark the end of the call, which is glued to the call itself.
19431 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19432 Glue = Chain.getValue(1);
19433
19434 // Assign locations to each value returned by this call.
19436 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19437 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19438
19439 // Copy all of the result registers out of their specified physreg.
19440 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19441 auto &VA = RVLocs[i];
19442 // Copy the value out
19443 SDValue RetValue =
19444 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19445 // Glue the RetValue to the end of the call sequence
19446 Chain = RetValue.getValue(1);
19447 Glue = RetValue.getValue(2);
19448
19449 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19450 assert(VA.needsCustom());
19451 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19452 MVT::i32, Glue);
19453 Chain = RetValue2.getValue(1);
19454 Glue = RetValue2.getValue(2);
19455 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19456 RetValue2);
19457 }
19458
19459 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19460
19461 InVals.push_back(RetValue);
19462 }
19463
19464 return Chain;
19465}
19466
19468 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19469 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19471 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19472
19473 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19474
19475 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19476 MVT VT = Outs[i].VT;
19477 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19478 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19479 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19480 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19481 nullptr, *this, Dispatcher))
19482 return false;
19483 }
19484 return true;
19485}
19486
19487SDValue
19489 bool IsVarArg,
19491 const SmallVectorImpl<SDValue> &OutVals,
19492 const SDLoc &DL, SelectionDAG &DAG) const {
19494 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19495
19496 // Stores the assignment of the return value to a location.
19498
19499 // Info about the registers and stack slot.
19500 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19501 *DAG.getContext());
19502
19503 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19504 nullptr, RISCV::CC_RISCV);
19505
19506 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19507 report_fatal_error("GHC functions return void only");
19508
19509 SDValue Glue;
19510 SmallVector<SDValue, 4> RetOps(1, Chain);
19511
19512 // Copy the result values into the output registers.
19513 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19514 SDValue Val = OutVals[OutIdx];
19515 CCValAssign &VA = RVLocs[i];
19516 assert(VA.isRegLoc() && "Can only return in registers!");
19517
19518 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19519 // Handle returning f64 on RV32D with a soft float ABI.
19520 assert(VA.isRegLoc() && "Expected return via registers");
19521 assert(VA.needsCustom());
19522 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19523 DAG.getVTList(MVT::i32, MVT::i32), Val);
19524 SDValue Lo = SplitF64.getValue(0);
19525 SDValue Hi = SplitF64.getValue(1);
19526 Register RegLo = VA.getLocReg();
19527 Register RegHi = RVLocs[++i].getLocReg();
19528
19529 if (STI.isRegisterReservedByUser(RegLo) ||
19530 STI.isRegisterReservedByUser(RegHi))
19532 MF.getFunction(),
19533 "Return value register required, but has been reserved."});
19534
19535 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19536 Glue = Chain.getValue(1);
19537 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19538 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19539 Glue = Chain.getValue(1);
19540 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19541 } else {
19542 // Handle a 'normal' return.
19543 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19544 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19545
19546 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19548 MF.getFunction(),
19549 "Return value register required, but has been reserved."});
19550
19551 // Guarantee that all emitted copies are stuck together.
19552 Glue = Chain.getValue(1);
19553 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19554 }
19555 }
19556
19557 RetOps[0] = Chain; // Update chain.
19558
19559 // Add the glue node if we have it.
19560 if (Glue.getNode()) {
19561 RetOps.push_back(Glue);
19562 }
19563
19564 if (any_of(RVLocs,
19565 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19566 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19567
19568 unsigned RetOpc = RISCVISD::RET_GLUE;
19569 // Interrupt service routines use different return instructions.
19570 const Function &Func = DAG.getMachineFunction().getFunction();
19571 if (Func.hasFnAttribute("interrupt")) {
19572 if (!Func.getReturnType()->isVoidTy())
19574 "Functions with the interrupt attribute must have void return type!");
19575
19577 StringRef Kind =
19578 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19579
19580 if (Kind == "supervisor")
19581 RetOpc = RISCVISD::SRET_GLUE;
19582 else
19583 RetOpc = RISCVISD::MRET_GLUE;
19584 }
19585
19586 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19587}
19588
19589void RISCVTargetLowering::validateCCReservedRegs(
19590 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19591 MachineFunction &MF) const {
19592 const Function &F = MF.getFunction();
19593 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19594
19595 if (llvm::any_of(Regs, [&STI](auto Reg) {
19596 return STI.isRegisterReservedByUser(Reg.first);
19597 }))
19598 F.getContext().diagnose(DiagnosticInfoUnsupported{
19599 F, "Argument register required, but has been reserved."});
19600}
19601
19602// Check if the result of the node is only used as a return value, as
19603// otherwise we can't perform a tail-call.
19605 if (N->getNumValues() != 1)
19606 return false;
19607 if (!N->hasNUsesOfValue(1, 0))
19608 return false;
19609
19610 SDNode *Copy = *N->use_begin();
19611
19612 if (Copy->getOpcode() == ISD::BITCAST) {
19613 return isUsedByReturnOnly(Copy, Chain);
19614 }
19615
19616 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19617 // with soft float ABIs.
19618 if (Copy->getOpcode() != ISD::CopyToReg) {
19619 return false;
19620 }
19621
19622 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19623 // isn't safe to perform a tail call.
19624 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19625 return false;
19626
19627 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19628 bool HasRet = false;
19629 for (SDNode *Node : Copy->uses()) {
19630 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19631 return false;
19632 HasRet = true;
19633 }
19634 if (!HasRet)
19635 return false;
19636
19637 Chain = Copy->getOperand(0);
19638 return true;
19639}
19640
19642 return CI->isTailCall();
19643}
19644
19645const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19646#define NODE_NAME_CASE(NODE) \
19647 case RISCVISD::NODE: \
19648 return "RISCVISD::" #NODE;
19649 // clang-format off
19650 switch ((RISCVISD::NodeType)Opcode) {
19652 break;
19653 NODE_NAME_CASE(RET_GLUE)
19654 NODE_NAME_CASE(SRET_GLUE)
19655 NODE_NAME_CASE(MRET_GLUE)
19656 NODE_NAME_CASE(CALL)
19657 NODE_NAME_CASE(SELECT_CC)
19658 NODE_NAME_CASE(BR_CC)
19659 NODE_NAME_CASE(BuildPairF64)
19660 NODE_NAME_CASE(SplitF64)
19661 NODE_NAME_CASE(TAIL)
19662 NODE_NAME_CASE(ADD_LO)
19663 NODE_NAME_CASE(HI)
19664 NODE_NAME_CASE(LLA)
19665 NODE_NAME_CASE(ADD_TPREL)
19666 NODE_NAME_CASE(MULHSU)
19667 NODE_NAME_CASE(SHL_ADD)
19668 NODE_NAME_CASE(SLLW)
19669 NODE_NAME_CASE(SRAW)
19670 NODE_NAME_CASE(SRLW)
19671 NODE_NAME_CASE(DIVW)
19672 NODE_NAME_CASE(DIVUW)
19673 NODE_NAME_CASE(REMUW)
19674 NODE_NAME_CASE(ROLW)
19675 NODE_NAME_CASE(RORW)
19676 NODE_NAME_CASE(CLZW)
19677 NODE_NAME_CASE(CTZW)
19678 NODE_NAME_CASE(ABSW)
19679 NODE_NAME_CASE(FMV_H_X)
19680 NODE_NAME_CASE(FMV_X_ANYEXTH)
19681 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19682 NODE_NAME_CASE(FMV_W_X_RV64)
19683 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19684 NODE_NAME_CASE(FCVT_X)
19685 NODE_NAME_CASE(FCVT_XU)
19686 NODE_NAME_CASE(FCVT_W_RV64)
19687 NODE_NAME_CASE(FCVT_WU_RV64)
19688 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19689 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19690 NODE_NAME_CASE(FP_ROUND_BF16)
19691 NODE_NAME_CASE(FP_EXTEND_BF16)
19692 NODE_NAME_CASE(FROUND)
19693 NODE_NAME_CASE(FCLASS)
19694 NODE_NAME_CASE(FMAX)
19695 NODE_NAME_CASE(FMIN)
19696 NODE_NAME_CASE(READ_COUNTER_WIDE)
19697 NODE_NAME_CASE(BREV8)
19698 NODE_NAME_CASE(ORC_B)
19699 NODE_NAME_CASE(ZIP)
19700 NODE_NAME_CASE(UNZIP)
19701 NODE_NAME_CASE(CLMUL)
19702 NODE_NAME_CASE(CLMULH)
19703 NODE_NAME_CASE(CLMULR)
19704 NODE_NAME_CASE(MOPR)
19705 NODE_NAME_CASE(MOPRR)
19706 NODE_NAME_CASE(SHA256SIG0)
19707 NODE_NAME_CASE(SHA256SIG1)
19708 NODE_NAME_CASE(SHA256SUM0)
19709 NODE_NAME_CASE(SHA256SUM1)
19710 NODE_NAME_CASE(SM4KS)
19711 NODE_NAME_CASE(SM4ED)
19712 NODE_NAME_CASE(SM3P0)
19713 NODE_NAME_CASE(SM3P1)
19714 NODE_NAME_CASE(TH_LWD)
19715 NODE_NAME_CASE(TH_LWUD)
19716 NODE_NAME_CASE(TH_LDD)
19717 NODE_NAME_CASE(TH_SWD)
19718 NODE_NAME_CASE(TH_SDD)
19719 NODE_NAME_CASE(VMV_V_V_VL)
19720 NODE_NAME_CASE(VMV_V_X_VL)
19721 NODE_NAME_CASE(VFMV_V_F_VL)
19722 NODE_NAME_CASE(VMV_X_S)
19723 NODE_NAME_CASE(VMV_S_X_VL)
19724 NODE_NAME_CASE(VFMV_S_F_VL)
19725 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19726 NODE_NAME_CASE(READ_VLENB)
19727 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19728 NODE_NAME_CASE(VSLIDEUP_VL)
19729 NODE_NAME_CASE(VSLIDE1UP_VL)
19730 NODE_NAME_CASE(VSLIDEDOWN_VL)
19731 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19732 NODE_NAME_CASE(VFSLIDE1UP_VL)
19733 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19734 NODE_NAME_CASE(VID_VL)
19735 NODE_NAME_CASE(VFNCVT_ROD_VL)
19736 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19737 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19738 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19739 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19740 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19741 NODE_NAME_CASE(VECREDUCE_AND_VL)
19742 NODE_NAME_CASE(VECREDUCE_OR_VL)
19743 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19744 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19745 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19746 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19747 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19748 NODE_NAME_CASE(ADD_VL)
19749 NODE_NAME_CASE(AND_VL)
19750 NODE_NAME_CASE(MUL_VL)
19751 NODE_NAME_CASE(OR_VL)
19752 NODE_NAME_CASE(SDIV_VL)
19753 NODE_NAME_CASE(SHL_VL)
19754 NODE_NAME_CASE(SREM_VL)
19755 NODE_NAME_CASE(SRA_VL)
19756 NODE_NAME_CASE(SRL_VL)
19757 NODE_NAME_CASE(ROTL_VL)
19758 NODE_NAME_CASE(ROTR_VL)
19759 NODE_NAME_CASE(SUB_VL)
19760 NODE_NAME_CASE(UDIV_VL)
19761 NODE_NAME_CASE(UREM_VL)
19762 NODE_NAME_CASE(XOR_VL)
19763 NODE_NAME_CASE(AVGFLOORU_VL)
19764 NODE_NAME_CASE(AVGCEILU_VL)
19765 NODE_NAME_CASE(SADDSAT_VL)
19766 NODE_NAME_CASE(UADDSAT_VL)
19767 NODE_NAME_CASE(SSUBSAT_VL)
19768 NODE_NAME_CASE(USUBSAT_VL)
19769 NODE_NAME_CASE(FADD_VL)
19770 NODE_NAME_CASE(FSUB_VL)
19771 NODE_NAME_CASE(FMUL_VL)
19772 NODE_NAME_CASE(FDIV_VL)
19773 NODE_NAME_CASE(FNEG_VL)
19774 NODE_NAME_CASE(FABS_VL)
19775 NODE_NAME_CASE(FSQRT_VL)
19776 NODE_NAME_CASE(FCLASS_VL)
19777 NODE_NAME_CASE(VFMADD_VL)
19778 NODE_NAME_CASE(VFNMADD_VL)
19779 NODE_NAME_CASE(VFMSUB_VL)
19780 NODE_NAME_CASE(VFNMSUB_VL)
19781 NODE_NAME_CASE(VFWMADD_VL)
19782 NODE_NAME_CASE(VFWNMADD_VL)
19783 NODE_NAME_CASE(VFWMSUB_VL)
19784 NODE_NAME_CASE(VFWNMSUB_VL)
19785 NODE_NAME_CASE(FCOPYSIGN_VL)
19786 NODE_NAME_CASE(SMIN_VL)
19787 NODE_NAME_CASE(SMAX_VL)
19788 NODE_NAME_CASE(UMIN_VL)
19789 NODE_NAME_CASE(UMAX_VL)
19790 NODE_NAME_CASE(BITREVERSE_VL)
19791 NODE_NAME_CASE(BSWAP_VL)
19792 NODE_NAME_CASE(CTLZ_VL)
19793 NODE_NAME_CASE(CTTZ_VL)
19794 NODE_NAME_CASE(CTPOP_VL)
19795 NODE_NAME_CASE(VFMIN_VL)
19796 NODE_NAME_CASE(VFMAX_VL)
19797 NODE_NAME_CASE(MULHS_VL)
19798 NODE_NAME_CASE(MULHU_VL)
19799 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19800 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19801 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19802 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19803 NODE_NAME_CASE(VFCVT_X_F_VL)
19804 NODE_NAME_CASE(VFCVT_XU_F_VL)
19805 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19806 NODE_NAME_CASE(SINT_TO_FP_VL)
19807 NODE_NAME_CASE(UINT_TO_FP_VL)
19808 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19809 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19810 NODE_NAME_CASE(FP_EXTEND_VL)
19811 NODE_NAME_CASE(FP_ROUND_VL)
19812 NODE_NAME_CASE(STRICT_FADD_VL)
19813 NODE_NAME_CASE(STRICT_FSUB_VL)
19814 NODE_NAME_CASE(STRICT_FMUL_VL)
19815 NODE_NAME_CASE(STRICT_FDIV_VL)
19816 NODE_NAME_CASE(STRICT_FSQRT_VL)
19817 NODE_NAME_CASE(STRICT_VFMADD_VL)
19818 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19819 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19820 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19821 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19822 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19823 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19824 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19825 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19826 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19827 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19828 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19829 NODE_NAME_CASE(STRICT_FSETCC_VL)
19830 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19831 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19832 NODE_NAME_CASE(VWMUL_VL)
19833 NODE_NAME_CASE(VWMULU_VL)
19834 NODE_NAME_CASE(VWMULSU_VL)
19835 NODE_NAME_CASE(VWADD_VL)
19836 NODE_NAME_CASE(VWADDU_VL)
19837 NODE_NAME_CASE(VWSUB_VL)
19838 NODE_NAME_CASE(VWSUBU_VL)
19839 NODE_NAME_CASE(VWADD_W_VL)
19840 NODE_NAME_CASE(VWADDU_W_VL)
19841 NODE_NAME_CASE(VWSUB_W_VL)
19842 NODE_NAME_CASE(VWSUBU_W_VL)
19843 NODE_NAME_CASE(VWSLL_VL)
19844 NODE_NAME_CASE(VFWMUL_VL)
19845 NODE_NAME_CASE(VFWADD_VL)
19846 NODE_NAME_CASE(VFWSUB_VL)
19847 NODE_NAME_CASE(VFWADD_W_VL)
19848 NODE_NAME_CASE(VFWSUB_W_VL)
19849 NODE_NAME_CASE(VWMACC_VL)
19850 NODE_NAME_CASE(VWMACCU_VL)
19851 NODE_NAME_CASE(VWMACCSU_VL)
19852 NODE_NAME_CASE(VNSRL_VL)
19853 NODE_NAME_CASE(SETCC_VL)
19854 NODE_NAME_CASE(VMERGE_VL)
19855 NODE_NAME_CASE(VMAND_VL)
19856 NODE_NAME_CASE(VMOR_VL)
19857 NODE_NAME_CASE(VMXOR_VL)
19858 NODE_NAME_CASE(VMCLR_VL)
19859 NODE_NAME_CASE(VMSET_VL)
19860 NODE_NAME_CASE(VRGATHER_VX_VL)
19861 NODE_NAME_CASE(VRGATHER_VV_VL)
19862 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19863 NODE_NAME_CASE(VSEXT_VL)
19864 NODE_NAME_CASE(VZEXT_VL)
19865 NODE_NAME_CASE(VCPOP_VL)
19866 NODE_NAME_CASE(VFIRST_VL)
19867 NODE_NAME_CASE(READ_CSR)
19868 NODE_NAME_CASE(WRITE_CSR)
19869 NODE_NAME_CASE(SWAP_CSR)
19870 NODE_NAME_CASE(CZERO_EQZ)
19871 NODE_NAME_CASE(CZERO_NEZ)
19872 NODE_NAME_CASE(SF_VC_XV_SE)
19873 NODE_NAME_CASE(SF_VC_IV_SE)
19874 NODE_NAME_CASE(SF_VC_VV_SE)
19875 NODE_NAME_CASE(SF_VC_FV_SE)
19876 NODE_NAME_CASE(SF_VC_XVV_SE)
19877 NODE_NAME_CASE(SF_VC_IVV_SE)
19878 NODE_NAME_CASE(SF_VC_VVV_SE)
19879 NODE_NAME_CASE(SF_VC_FVV_SE)
19880 NODE_NAME_CASE(SF_VC_XVW_SE)
19881 NODE_NAME_CASE(SF_VC_IVW_SE)
19882 NODE_NAME_CASE(SF_VC_VVW_SE)
19883 NODE_NAME_CASE(SF_VC_FVW_SE)
19884 NODE_NAME_CASE(SF_VC_V_X_SE)
19885 NODE_NAME_CASE(SF_VC_V_I_SE)
19886 NODE_NAME_CASE(SF_VC_V_XV_SE)
19887 NODE_NAME_CASE(SF_VC_V_IV_SE)
19888 NODE_NAME_CASE(SF_VC_V_VV_SE)
19889 NODE_NAME_CASE(SF_VC_V_FV_SE)
19890 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19891 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19892 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19893 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19894 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19895 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19896 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19897 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19898 }
19899 // clang-format on
19900 return nullptr;
19901#undef NODE_NAME_CASE
19902}
19903
19904/// getConstraintType - Given a constraint letter, return the type of
19905/// constraint it is for this target.
19908 if (Constraint.size() == 1) {
19909 switch (Constraint[0]) {
19910 default:
19911 break;
19912 case 'f':
19913 return C_RegisterClass;
19914 case 'I':
19915 case 'J':
19916 case 'K':
19917 return C_Immediate;
19918 case 'A':
19919 return C_Memory;
19920 case 's':
19921 case 'S': // A symbolic address
19922 return C_Other;
19923 }
19924 } else {
19925 if (Constraint == "vr" || Constraint == "vm")
19926 return C_RegisterClass;
19927 }
19928 return TargetLowering::getConstraintType(Constraint);
19929}
19930
19931std::pair<unsigned, const TargetRegisterClass *>
19933 StringRef Constraint,
19934 MVT VT) const {
19935 // First, see if this is a constraint that directly corresponds to a RISC-V
19936 // register class.
19937 if (Constraint.size() == 1) {
19938 switch (Constraint[0]) {
19939 case 'r':
19940 // TODO: Support fixed vectors up to XLen for P extension?
19941 if (VT.isVector())
19942 break;
19943 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19944 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19945 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19946 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19947 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19948 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19949 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19950 case 'f':
19951 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19952 return std::make_pair(0U, &RISCV::FPR16RegClass);
19953 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19954 return std::make_pair(0U, &RISCV::FPR32RegClass);
19955 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19956 return std::make_pair(0U, &RISCV::FPR64RegClass);
19957 break;
19958 default:
19959 break;
19960 }
19961 } else if (Constraint == "vr") {
19962 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19963 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19964 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19965 return std::make_pair(0U, RC);
19966 }
19967 } else if (Constraint == "vm") {
19968 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19969 return std::make_pair(0U, &RISCV::VMV0RegClass);
19970 }
19971
19972 // Clang will correctly decode the usage of register name aliases into their
19973 // official names. However, other frontends like `rustc` do not. This allows
19974 // users of these frontends to use the ABI names for registers in LLVM-style
19975 // register constraints.
19976 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19977 .Case("{zero}", RISCV::X0)
19978 .Case("{ra}", RISCV::X1)
19979 .Case("{sp}", RISCV::X2)
19980 .Case("{gp}", RISCV::X3)
19981 .Case("{tp}", RISCV::X4)
19982 .Case("{t0}", RISCV::X5)
19983 .Case("{t1}", RISCV::X6)
19984 .Case("{t2}", RISCV::X7)
19985 .Cases("{s0}", "{fp}", RISCV::X8)
19986 .Case("{s1}", RISCV::X9)
19987 .Case("{a0}", RISCV::X10)
19988 .Case("{a1}", RISCV::X11)
19989 .Case("{a2}", RISCV::X12)
19990 .Case("{a3}", RISCV::X13)
19991 .Case("{a4}", RISCV::X14)
19992 .Case("{a5}", RISCV::X15)
19993 .Case("{a6}", RISCV::X16)
19994 .Case("{a7}", RISCV::X17)
19995 .Case("{s2}", RISCV::X18)
19996 .Case("{s3}", RISCV::X19)
19997 .Case("{s4}", RISCV::X20)
19998 .Case("{s5}", RISCV::X21)
19999 .Case("{s6}", RISCV::X22)
20000 .Case("{s7}", RISCV::X23)
20001 .Case("{s8}", RISCV::X24)
20002 .Case("{s9}", RISCV::X25)
20003 .Case("{s10}", RISCV::X26)
20004 .Case("{s11}", RISCV::X27)
20005 .Case("{t3}", RISCV::X28)
20006 .Case("{t4}", RISCV::X29)
20007 .Case("{t5}", RISCV::X30)
20008 .Case("{t6}", RISCV::X31)
20009 .Default(RISCV::NoRegister);
20010 if (XRegFromAlias != RISCV::NoRegister)
20011 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20012
20013 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20014 // TableGen record rather than the AsmName to choose registers for InlineAsm
20015 // constraints, plus we want to match those names to the widest floating point
20016 // register type available, manually select floating point registers here.
20017 //
20018 // The second case is the ABI name of the register, so that frontends can also
20019 // use the ABI names in register constraint lists.
20020 if (Subtarget.hasStdExtF()) {
20021 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20022 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20023 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20024 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20025 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20026 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20027 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20028 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20029 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20030 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20031 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20032 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20033 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20034 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20035 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20036 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20037 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20038 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20039 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20040 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20041 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20042 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20043 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20044 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20045 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20046 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20047 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20048 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20049 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20050 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20051 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20052 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20053 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20054 .Default(RISCV::NoRegister);
20055 if (FReg != RISCV::NoRegister) {
20056 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20057 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20058 unsigned RegNo = FReg - RISCV::F0_F;
20059 unsigned DReg = RISCV::F0_D + RegNo;
20060 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20061 }
20062 if (VT == MVT::f32 || VT == MVT::Other)
20063 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20064 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20065 unsigned RegNo = FReg - RISCV::F0_F;
20066 unsigned HReg = RISCV::F0_H + RegNo;
20067 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20068 }
20069 }
20070 }
20071
20072 if (Subtarget.hasVInstructions()) {
20073 Register VReg = StringSwitch<Register>(Constraint.lower())
20074 .Case("{v0}", RISCV::V0)
20075 .Case("{v1}", RISCV::V1)
20076 .Case("{v2}", RISCV::V2)
20077 .Case("{v3}", RISCV::V3)
20078 .Case("{v4}", RISCV::V4)
20079 .Case("{v5}", RISCV::V5)
20080 .Case("{v6}", RISCV::V6)
20081 .Case("{v7}", RISCV::V7)
20082 .Case("{v8}", RISCV::V8)
20083 .Case("{v9}", RISCV::V9)
20084 .Case("{v10}", RISCV::V10)
20085 .Case("{v11}", RISCV::V11)
20086 .Case("{v12}", RISCV::V12)
20087 .Case("{v13}", RISCV::V13)
20088 .Case("{v14}", RISCV::V14)
20089 .Case("{v15}", RISCV::V15)
20090 .Case("{v16}", RISCV::V16)
20091 .Case("{v17}", RISCV::V17)
20092 .Case("{v18}", RISCV::V18)
20093 .Case("{v19}", RISCV::V19)
20094 .Case("{v20}", RISCV::V20)
20095 .Case("{v21}", RISCV::V21)
20096 .Case("{v22}", RISCV::V22)
20097 .Case("{v23}", RISCV::V23)
20098 .Case("{v24}", RISCV::V24)
20099 .Case("{v25}", RISCV::V25)
20100 .Case("{v26}", RISCV::V26)
20101 .Case("{v27}", RISCV::V27)
20102 .Case("{v28}", RISCV::V28)
20103 .Case("{v29}", RISCV::V29)
20104 .Case("{v30}", RISCV::V30)
20105 .Case("{v31}", RISCV::V31)
20106 .Default(RISCV::NoRegister);
20107 if (VReg != RISCV::NoRegister) {
20108 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20109 return std::make_pair(VReg, &RISCV::VMRegClass);
20110 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20111 return std::make_pair(VReg, &RISCV::VRRegClass);
20112 for (const auto *RC :
20113 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20114 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20115 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20116 return std::make_pair(VReg, RC);
20117 }
20118 }
20119 }
20120 }
20121
20122 std::pair<Register, const TargetRegisterClass *> Res =
20124
20125 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20126 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20127 // Subtarget into account.
20128 if (Res.second == &RISCV::GPRF16RegClass ||
20129 Res.second == &RISCV::GPRF32RegClass ||
20130 Res.second == &RISCV::GPRPairRegClass)
20131 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20132
20133 return Res;
20134}
20135
20138 // Currently only support length 1 constraints.
20139 if (ConstraintCode.size() == 1) {
20140 switch (ConstraintCode[0]) {
20141 case 'A':
20143 default:
20144 break;
20145 }
20146 }
20147
20148 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20149}
20150
20152 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20153 SelectionDAG &DAG) const {
20154 // Currently only support length 1 constraints.
20155 if (Constraint.size() == 1) {
20156 switch (Constraint[0]) {
20157 case 'I':
20158 // Validate & create a 12-bit signed immediate operand.
20159 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20160 uint64_t CVal = C->getSExtValue();
20161 if (isInt<12>(CVal))
20162 Ops.push_back(
20163 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20164 }
20165 return;
20166 case 'J':
20167 // Validate & create an integer zero operand.
20168 if (isNullConstant(Op))
20169 Ops.push_back(
20170 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20171 return;
20172 case 'K':
20173 // Validate & create a 5-bit unsigned immediate operand.
20174 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20175 uint64_t CVal = C->getZExtValue();
20176 if (isUInt<5>(CVal))
20177 Ops.push_back(
20178 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20179 }
20180 return;
20181 case 'S':
20183 return;
20184 default:
20185 break;
20186 }
20187 }
20188 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20189}
20190
20192 Instruction *Inst,
20193 AtomicOrdering Ord) const {
20194 if (Subtarget.hasStdExtZtso()) {
20195 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20196 return Builder.CreateFence(Ord);
20197 return nullptr;
20198 }
20199
20200 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20201 return Builder.CreateFence(Ord);
20202 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20203 return Builder.CreateFence(AtomicOrdering::Release);
20204 return nullptr;
20205}
20206
20208 Instruction *Inst,
20209 AtomicOrdering Ord) const {
20210 if (Subtarget.hasStdExtZtso()) {
20211 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20212 return Builder.CreateFence(Ord);
20213 return nullptr;
20214 }
20215
20216 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20217 return Builder.CreateFence(AtomicOrdering::Acquire);
20218 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20221 return nullptr;
20222}
20223
20226 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20227 // point operations can't be used in an lr/sc sequence without breaking the
20228 // forward-progress guarantee.
20229 if (AI->isFloatingPointOperation() ||
20233
20234 // Don't expand forced atomics, we want to have __sync libcalls instead.
20235 if (Subtarget.hasForcedAtomics())
20237
20238 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20239 if (AI->getOperation() == AtomicRMWInst::Nand) {
20240 if (Subtarget.hasStdExtZacas() &&
20241 (Size >= 32 || Subtarget.hasStdExtZabha()))
20243 if (Size < 32)
20245 }
20246
20247 if (Size < 32 && !Subtarget.hasStdExtZabha())
20249
20251}
20252
20253static Intrinsic::ID
20255 if (XLen == 32) {
20256 switch (BinOp) {
20257 default:
20258 llvm_unreachable("Unexpected AtomicRMW BinOp");
20260 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20261 case AtomicRMWInst::Add:
20262 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20263 case AtomicRMWInst::Sub:
20264 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20266 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20267 case AtomicRMWInst::Max:
20268 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20269 case AtomicRMWInst::Min:
20270 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20272 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20274 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20275 }
20276 }
20277
20278 if (XLen == 64) {
20279 switch (BinOp) {
20280 default:
20281 llvm_unreachable("Unexpected AtomicRMW BinOp");
20283 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20284 case AtomicRMWInst::Add:
20285 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20286 case AtomicRMWInst::Sub:
20287 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20289 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20290 case AtomicRMWInst::Max:
20291 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20292 case AtomicRMWInst::Min:
20293 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20295 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20297 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20298 }
20299 }
20300
20301 llvm_unreachable("Unexpected XLen\n");
20302}
20303
20305 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20306 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20307 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20308 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20309 // mask, as this produces better code than the LR/SC loop emitted by
20310 // int_riscv_masked_atomicrmw_xchg.
20311 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20312 isa<ConstantInt>(AI->getValOperand())) {
20313 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20314 if (CVal->isZero())
20315 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20316 Builder.CreateNot(Mask, "Inv_Mask"),
20317 AI->getAlign(), Ord);
20318 if (CVal->isMinusOne())
20319 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20320 AI->getAlign(), Ord);
20321 }
20322
20323 unsigned XLen = Subtarget.getXLen();
20324 Value *Ordering =
20325 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20326 Type *Tys[] = {AlignedAddr->getType()};
20327 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20328 AI->getModule(),
20330
20331 if (XLen == 64) {
20332 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20333 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20334 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20335 }
20336
20337 Value *Result;
20338
20339 // Must pass the shift amount needed to sign extend the loaded value prior
20340 // to performing a signed comparison for min/max. ShiftAmt is the number of
20341 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20342 // is the number of bits to left+right shift the value in order to
20343 // sign-extend.
20344 if (AI->getOperation() == AtomicRMWInst::Min ||
20346 const DataLayout &DL = AI->getModule()->getDataLayout();
20347 unsigned ValWidth =
20348 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20349 Value *SextShamt =
20350 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20351 Result = Builder.CreateCall(LrwOpScwLoop,
20352 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20353 } else {
20354 Result =
20355 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20356 }
20357
20358 if (XLen == 64)
20359 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20360 return Result;
20361}
20362
20365 AtomicCmpXchgInst *CI) const {
20366 // Don't expand forced atomics, we want to have __sync libcalls instead.
20367 if (Subtarget.hasForcedAtomics())
20369
20371 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20372 (Size == 8 || Size == 16))
20375}
20376
20378 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20379 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20380 unsigned XLen = Subtarget.getXLen();
20381 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20382 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20383 if (XLen == 64) {
20384 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20385 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20386 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20387 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20388 }
20389 Type *Tys[] = {AlignedAddr->getType()};
20390 Function *MaskedCmpXchg =
20391 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20392 Value *Result = Builder.CreateCall(
20393 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20394 if (XLen == 64)
20395 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20396 return Result;
20397}
20398
20400 EVT DataVT) const {
20401 // We have indexed loads for all supported EEW types. Indices are always
20402 // zero extended.
20403 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20404 isTypeLegal(Extend.getValueType()) &&
20405 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20406 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20407}
20408
20410 EVT VT) const {
20411 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20412 return false;
20413
20414 switch (FPVT.getSimpleVT().SimpleTy) {
20415 case MVT::f16:
20416 return Subtarget.hasStdExtZfhmin();
20417 case MVT::f32:
20418 return Subtarget.hasStdExtF();
20419 case MVT::f64:
20420 return Subtarget.hasStdExtD();
20421 default:
20422 return false;
20423 }
20424}
20425
20427 // If we are using the small code model, we can reduce size of jump table
20428 // entry to 4 bytes.
20429 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20432 }
20434}
20435
20437 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20438 unsigned uid, MCContext &Ctx) const {
20439 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20441 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20442}
20443
20445 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20446 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20447 // a power of two as well.
20448 // FIXME: This doesn't work for zve32, but that's already broken
20449 // elsewhere for the same reason.
20450 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20451 static_assert(RISCV::RVVBitsPerBlock == 64,
20452 "RVVBitsPerBlock changed, audit needed");
20453 return true;
20454}
20455
20457 SDValue &Offset,
20459 SelectionDAG &DAG) const {
20460 // Target does not support indexed loads.
20461 if (!Subtarget.hasVendorXTHeadMemIdx())
20462 return false;
20463
20464 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20465 return false;
20466
20467 Base = Op->getOperand(0);
20468 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20469 int64_t RHSC = RHS->getSExtValue();
20470 if (Op->getOpcode() == ISD::SUB)
20471 RHSC = -(uint64_t)RHSC;
20472
20473 // The constants that can be encoded in the THeadMemIdx instructions
20474 // are of the form (sign_extend(imm5) << imm2).
20475 bool isLegalIndexedOffset = false;
20476 for (unsigned i = 0; i < 4; i++)
20477 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20478 isLegalIndexedOffset = true;
20479 break;
20480 }
20481
20482 if (!isLegalIndexedOffset)
20483 return false;
20484
20485 Offset = Op->getOperand(1);
20486 return true;
20487 }
20488
20489 return false;
20490}
20491
20493 SDValue &Offset,
20495 SelectionDAG &DAG) const {
20496 EVT VT;
20497 SDValue Ptr;
20498 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20499 VT = LD->getMemoryVT();
20500 Ptr = LD->getBasePtr();
20501 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20502 VT = ST->getMemoryVT();
20503 Ptr = ST->getBasePtr();
20504 } else
20505 return false;
20506
20507 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20508 return false;
20509
20510 AM = ISD::PRE_INC;
20511 return true;
20512}
20513
20515 SDValue &Base,
20516 SDValue &Offset,
20518 SelectionDAG &DAG) const {
20519 EVT VT;
20520 SDValue Ptr;
20521 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20522 VT = LD->getMemoryVT();
20523 Ptr = LD->getBasePtr();
20524 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20525 VT = ST->getMemoryVT();
20526 Ptr = ST->getBasePtr();
20527 } else
20528 return false;
20529
20530 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20531 return false;
20532 // Post-indexing updates the base, so it's not a valid transform
20533 // if that's not the same as the load's pointer.
20534 if (Ptr != Base)
20535 return false;
20536
20537 AM = ISD::POST_INC;
20538 return true;
20539}
20540
20542 EVT VT) const {
20543 EVT SVT = VT.getScalarType();
20544
20545 if (!SVT.isSimple())
20546 return false;
20547
20548 switch (SVT.getSimpleVT().SimpleTy) {
20549 case MVT::f16:
20550 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20551 : Subtarget.hasStdExtZfhOrZhinx();
20552 case MVT::f32:
20553 return Subtarget.hasStdExtFOrZfinx();
20554 case MVT::f64:
20555 return Subtarget.hasStdExtDOrZdinx();
20556 default:
20557 break;
20558 }
20559
20560 return false;
20561}
20562
20564 // Zacas will use amocas.w which does not require extension.
20565 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20566}
20567
20569 const Constant *PersonalityFn) const {
20570 return RISCV::X10;
20571}
20572
20574 const Constant *PersonalityFn) const {
20575 return RISCV::X11;
20576}
20577
20579 // Return false to suppress the unnecessary extensions if the LibCall
20580 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20581 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20582 Type.getSizeInBits() < Subtarget.getXLen()))
20583 return false;
20584
20585 return true;
20586}
20587
20589 if (Subtarget.is64Bit() && Type == MVT::i32)
20590 return true;
20591
20592 return IsSigned;
20593}
20594
20596 SDValue C) const {
20597 // Check integral scalar types.
20598 const bool HasExtMOrZmmul =
20599 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20600 if (!VT.isScalarInteger())
20601 return false;
20602
20603 // Omit the optimization if the sub target has the M extension and the data
20604 // size exceeds XLen.
20605 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20606 return false;
20607
20608 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20609 // Break the MUL to a SLLI and an ADD/SUB.
20610 const APInt &Imm = ConstNode->getAPIntValue();
20611 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20612 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20613 return true;
20614
20615 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20616 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20617 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20618 (Imm - 8).isPowerOf2()))
20619 return true;
20620
20621 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20622 // a pair of LUI/ADDI.
20623 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20624 ConstNode->hasOneUse()) {
20625 APInt ImmS = Imm.ashr(Imm.countr_zero());
20626 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20627 (1 - ImmS).isPowerOf2())
20628 return true;
20629 }
20630 }
20631
20632 return false;
20633}
20634
20636 SDValue ConstNode) const {
20637 // Let the DAGCombiner decide for vectors.
20638 EVT VT = AddNode.getValueType();
20639 if (VT.isVector())
20640 return true;
20641
20642 // Let the DAGCombiner decide for larger types.
20643 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20644 return true;
20645
20646 // It is worse if c1 is simm12 while c1*c2 is not.
20647 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20648 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20649 const APInt &C1 = C1Node->getAPIntValue();
20650 const APInt &C2 = C2Node->getAPIntValue();
20651 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20652 return false;
20653
20654 // Default to true and let the DAGCombiner decide.
20655 return true;
20656}
20657
20659 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20660 unsigned *Fast) const {
20661 if (!VT.isVector()) {
20662 if (Fast)
20663 *Fast = Subtarget.enableUnalignedScalarMem();
20664 return Subtarget.enableUnalignedScalarMem();
20665 }
20666
20667 // All vector implementations must support element alignment
20668 EVT ElemVT = VT.getVectorElementType();
20669 if (Alignment >= ElemVT.getStoreSize()) {
20670 if (Fast)
20671 *Fast = 1;
20672 return true;
20673 }
20674
20675 // Note: We lower an unmasked unaligned vector access to an equally sized
20676 // e8 element type access. Given this, we effectively support all unmasked
20677 // misaligned accesses. TODO: Work through the codegen implications of
20678 // allowing such accesses to be formed, and considered fast.
20679 if (Fast)
20680 *Fast = Subtarget.enableUnalignedVectorMem();
20681 return Subtarget.enableUnalignedVectorMem();
20682}
20683
20684
20686 const AttributeList &FuncAttributes) const {
20687 if (!Subtarget.hasVInstructions())
20688 return MVT::Other;
20689
20690 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20691 return MVT::Other;
20692
20693 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20694 // has an expansion threshold, and we want the number of hardware memory
20695 // operations to correspond roughly to that threshold. LMUL>1 operations
20696 // are typically expanded linearly internally, and thus correspond to more
20697 // than one actual memory operation. Note that store merging and load
20698 // combining will typically form larger LMUL operations from the LMUL1
20699 // operations emitted here, and that's okay because combining isn't
20700 // introducing new memory operations; it's just merging existing ones.
20701 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20702 if (Op.size() < MinVLenInBytes)
20703 // TODO: Figure out short memops. For the moment, do the default thing
20704 // which ends up using scalar sequences.
20705 return MVT::Other;
20706
20707 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20708 // a large scalar constant and instead use vmv.v.x/i to do the
20709 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20710 // maximize the chance we can encode the size in the vsetvli.
20711 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20712 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20713
20714 // Do we have sufficient alignment for our preferred VT? If not, revert
20715 // to largest size allowed by our alignment criteria.
20716 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20717 Align RequiredAlign(PreferredVT.getStoreSize());
20718 if (Op.isFixedDstAlign())
20719 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20720 if (Op.isMemcpy())
20721 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20722 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20723 }
20724 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20725}
20726
20728 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20729 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20730 bool IsABIRegCopy = CC.has_value();
20731 EVT ValueVT = Val.getValueType();
20732 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20733 PartVT == MVT::f32) {
20734 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20735 // nan, and cast to f32.
20736 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20737 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20738 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20739 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20740 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20741 Parts[0] = Val;
20742 return true;
20743 }
20744
20745 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20746 LLVMContext &Context = *DAG.getContext();
20747 EVT ValueEltVT = ValueVT.getVectorElementType();
20748 EVT PartEltVT = PartVT.getVectorElementType();
20749 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20750 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20751 if (PartVTBitSize % ValueVTBitSize == 0) {
20752 assert(PartVTBitSize >= ValueVTBitSize);
20753 // If the element types are different, bitcast to the same element type of
20754 // PartVT first.
20755 // Give an example here, we want copy a <vscale x 1 x i8> value to
20756 // <vscale x 4 x i16>.
20757 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20758 // subvector, then we can bitcast to <vscale x 4 x i16>.
20759 if (ValueEltVT != PartEltVT) {
20760 if (PartVTBitSize > ValueVTBitSize) {
20761 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20762 assert(Count != 0 && "The number of element should not be zero.");
20763 EVT SameEltTypeVT =
20764 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20765 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20766 DAG.getUNDEF(SameEltTypeVT), Val,
20767 DAG.getVectorIdxConstant(0, DL));
20768 }
20769 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20770 } else {
20771 Val =
20772 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20773 Val, DAG.getVectorIdxConstant(0, DL));
20774 }
20775 Parts[0] = Val;
20776 return true;
20777 }
20778 }
20779 return false;
20780}
20781
20783 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20784 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20785 bool IsABIRegCopy = CC.has_value();
20786 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20787 PartVT == MVT::f32) {
20788 SDValue Val = Parts[0];
20789
20790 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20791 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20792 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20793 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20794 return Val;
20795 }
20796
20797 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20798 LLVMContext &Context = *DAG.getContext();
20799 SDValue Val = Parts[0];
20800 EVT ValueEltVT = ValueVT.getVectorElementType();
20801 EVT PartEltVT = PartVT.getVectorElementType();
20802 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20803 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20804 if (PartVTBitSize % ValueVTBitSize == 0) {
20805 assert(PartVTBitSize >= ValueVTBitSize);
20806 EVT SameEltTypeVT = ValueVT;
20807 // If the element types are different, convert it to the same element type
20808 // of PartVT.
20809 // Give an example here, we want copy a <vscale x 1 x i8> value from
20810 // <vscale x 4 x i16>.
20811 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20812 // then we can extract <vscale x 1 x i8>.
20813 if (ValueEltVT != PartEltVT) {
20814 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20815 assert(Count != 0 && "The number of element should not be zero.");
20816 SameEltTypeVT =
20817 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20818 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
20819 }
20820 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
20821 DAG.getVectorIdxConstant(0, DL));
20822 return Val;
20823 }
20824 }
20825 return SDValue();
20826}
20827
20829 // When aggressively optimizing for code size, we prefer to use a div
20830 // instruction, as it is usually smaller than the alternative sequence.
20831 // TODO: Add vector division?
20832 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20833 return OptSize && !VT.isVector();
20834}
20835
20837 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20838 // some situation.
20839 unsigned Opc = N->getOpcode();
20840 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20841 return false;
20842 return true;
20843}
20844
20845static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20846 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20847 Function *ThreadPointerFunc =
20848 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20849 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
20850 IRB.CreateCall(ThreadPointerFunc), Offset);
20851}
20852
20854 // Fuchsia provides a fixed TLS slot for the stack cookie.
20855 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20856 if (Subtarget.isTargetFuchsia())
20857 return useTpOffset(IRB, -0x10);
20858
20860}
20861
20863 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20864 const DataLayout &DL) const {
20865 EVT VT = getValueType(DL, VTy);
20866 // Don't lower vlseg/vsseg for vector types that can't be split.
20867 if (!isTypeLegal(VT))
20868 return false;
20869
20871 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
20872 Alignment))
20873 return false;
20874
20875 MVT ContainerVT = VT.getSimpleVT();
20876
20877 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20878 if (!Subtarget.useRVVForFixedLengthVectors())
20879 return false;
20880 // Sometimes the interleaved access pass picks up splats as interleaves of
20881 // one element. Don't lower these.
20882 if (FVTy->getNumElements() < 2)
20883 return false;
20884
20886 }
20887
20888 // Need to make sure that EMUL * NFIELDS ≤ 8
20889 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20890 if (Fractional)
20891 return true;
20892 return Factor * LMUL <= 8;
20893}
20894
20896 Align Alignment) const {
20897 if (!Subtarget.hasVInstructions())
20898 return false;
20899
20900 // Only support fixed vectors if we know the minimum vector size.
20901 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20902 return false;
20903
20904 EVT ScalarType = DataType.getScalarType();
20905 if (!isLegalElementTypeForRVV(ScalarType))
20906 return false;
20907
20908 if (!Subtarget.enableUnalignedVectorMem() &&
20909 Alignment < ScalarType.getStoreSize())
20910 return false;
20911
20912 return true;
20913}
20914
20916 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20917 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20918 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20919 Intrinsic::riscv_seg8_load};
20920
20921/// Lower an interleaved load into a vlsegN intrinsic.
20922///
20923/// E.g. Lower an interleaved load (Factor = 2):
20924/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20925/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20926/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20927///
20928/// Into:
20929/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20930/// %ptr, i64 4)
20931/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20932/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20935 ArrayRef<unsigned> Indices, unsigned Factor) const {
20936 IRBuilder<> Builder(LI);
20937
20938 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
20939 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
20941 LI->getModule()->getDataLayout()))
20942 return false;
20943
20944 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20945
20946 Function *VlsegNFunc =
20948 {VTy, LI->getPointerOperandType(), XLenTy});
20949
20950 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20951
20952 CallInst *VlsegN =
20953 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
20954
20955 for (unsigned i = 0; i < Shuffles.size(); i++) {
20956 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
20957 Shuffles[i]->replaceAllUsesWith(SubVec);
20958 }
20959
20960 return true;
20961}
20962
20964 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20965 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20966 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20967 Intrinsic::riscv_seg8_store};
20968
20969/// Lower an interleaved store into a vssegN intrinsic.
20970///
20971/// E.g. Lower an interleaved store (Factor = 3):
20972/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20973/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20974/// store <12 x i32> %i.vec, <12 x i32>* %ptr
20975///
20976/// Into:
20977/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20978/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20979/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20980/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20981/// %ptr, i32 4)
20982///
20983/// Note that the new shufflevectors will be removed and we'll only generate one
20984/// vsseg3 instruction in CodeGen.
20986 ShuffleVectorInst *SVI,
20987 unsigned Factor) const {
20988 IRBuilder<> Builder(SI);
20989 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
20990 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20991 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
20992 ShuffleVTy->getNumElements() / Factor);
20993 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
20994 SI->getPointerAddressSpace(),
20995 SI->getModule()->getDataLayout()))
20996 return false;
20997
20998 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20999
21000 Function *VssegNFunc =
21001 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21002 {VTy, SI->getPointerOperandType(), XLenTy});
21003
21004 auto Mask = SVI->getShuffleMask();
21006
21007 for (unsigned i = 0; i < Factor; i++) {
21008 Value *Shuffle = Builder.CreateShuffleVector(
21009 SVI->getOperand(0), SVI->getOperand(1),
21010 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21011 Ops.push_back(Shuffle);
21012 }
21013 // This VL should be OK (should be executable in one vsseg instruction,
21014 // potentially under larger LMULs) because we checked that the fixed vector
21015 // type fits in isLegalInterleavedAccessType
21016 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21017 Ops.append({SI->getPointerOperand(), VL});
21018
21019 Builder.CreateCall(VssegNFunc, Ops);
21020
21021 return true;
21022}
21023
21025 LoadInst *LI) const {
21026 assert(LI->isSimple());
21027 IRBuilder<> Builder(LI);
21028
21029 // Only deinterleave2 supported at present.
21030 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21031 return false;
21032
21033 unsigned Factor = 2;
21034
21035 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21036 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21037
21038 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21040 LI->getModule()->getDataLayout()))
21041 return false;
21042
21043 Function *VlsegNFunc;
21044 Value *VL;
21045 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21047
21048 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21049 VlsegNFunc = Intrinsic::getDeclaration(
21050 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21051 {ResVTy, LI->getPointerOperandType(), XLenTy});
21052 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21053 } else {
21054 static const Intrinsic::ID IntrIds[] = {
21055 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21056 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21057 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21058 Intrinsic::riscv_vlseg8};
21059
21060 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21061 {ResVTy, XLenTy});
21062 VL = Constant::getAllOnesValue(XLenTy);
21063 Ops.append(Factor, PoisonValue::get(ResVTy));
21064 }
21065
21066 Ops.append({LI->getPointerOperand(), VL});
21067
21068 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21069 DI->replaceAllUsesWith(Vlseg);
21070
21071 return true;
21072}
21073
21075 StoreInst *SI) const {
21076 assert(SI->isSimple());
21077 IRBuilder<> Builder(SI);
21078
21079 // Only interleave2 supported at present.
21080 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21081 return false;
21082
21083 unsigned Factor = 2;
21084
21085 VectorType *VTy = cast<VectorType>(II->getType());
21086 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21087
21088 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21089 SI->getPointerAddressSpace(),
21090 SI->getModule()->getDataLayout()))
21091 return false;
21092
21093 Function *VssegNFunc;
21094 Value *VL;
21095 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21096
21097 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21098 VssegNFunc = Intrinsic::getDeclaration(
21099 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21100 {InVTy, SI->getPointerOperandType(), XLenTy});
21101 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21102 } else {
21103 static const Intrinsic::ID IntrIds[] = {
21104 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21105 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21106 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21107 Intrinsic::riscv_vsseg8};
21108
21109 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21110 {InVTy, XLenTy});
21111 VL = Constant::getAllOnesValue(XLenTy);
21112 }
21113
21114 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21115 SI->getPointerOperand(), VL});
21116
21117 return true;
21118}
21119
21123 const TargetInstrInfo *TII) const {
21124 assert(MBBI->isCall() && MBBI->getCFIType() &&
21125 "Invalid call instruction for a KCFI check");
21126 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21127 MBBI->getOpcode()));
21128
21129 MachineOperand &Target = MBBI->getOperand(0);
21130 Target.setIsRenamable(false);
21131
21132 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21133 .addReg(Target.getReg())
21134 .addImm(MBBI->getCFIType())
21135 .getInstr();
21136}
21137
21138#define GET_REGISTER_MATCHER
21139#include "RISCVGenAsmMatcher.inc"
21140
21143 const MachineFunction &MF) const {
21145 if (Reg == RISCV::NoRegister)
21147 if (Reg == RISCV::NoRegister)
21149 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21150 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21151 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21152 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21153 StringRef(RegName) + "\"."));
21154 return Reg;
21155}
21156
21159 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21160
21161 if (NontemporalInfo == nullptr)
21163
21164 // 1 for default value work as __RISCV_NTLH_ALL
21165 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21166 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21167 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21168 // 5 -> __RISCV_NTLH_ALL
21169 int NontemporalLevel = 5;
21170 const MDNode *RISCVNontemporalInfo =
21171 I.getMetadata("riscv-nontemporal-domain");
21172 if (RISCVNontemporalInfo != nullptr)
21173 NontemporalLevel =
21174 cast<ConstantInt>(
21175 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21176 ->getValue())
21177 ->getZExtValue();
21178
21179 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21180 "RISC-V target doesn't support this non-temporal domain.");
21181
21182 NontemporalLevel -= 2;
21184 if (NontemporalLevel & 0b1)
21185 Flags |= MONontemporalBit0;
21186 if (NontemporalLevel & 0b10)
21187 Flags |= MONontemporalBit1;
21188
21189 return Flags;
21190}
21191
21194
21195 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21197 TargetFlags |= (NodeFlags & MONontemporalBit0);
21198 TargetFlags |= (NodeFlags & MONontemporalBit1);
21199 return TargetFlags;
21200}
21201
21203 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21204 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21205}
21206
21208 if (VT.isScalableVector())
21209 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21210 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21211 return true;
21212 return Subtarget.hasStdExtZbb() &&
21213 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21214}
21215
21217 ISD::CondCode Cond) const {
21218 return isCtpopFast(VT) ? 0 : 1;
21219}
21220
21222
21223 // GISel support is in progress or complete for these opcodes.
21224 unsigned Op = Inst.getOpcode();
21225 if (Op == Instruction::Add || Op == Instruction::Sub ||
21226 Op == Instruction::And || Op == Instruction::Or ||
21227 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21228 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21229 return false;
21230
21231 if (Inst.getType()->isScalableTy())
21232 return true;
21233
21234 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21235 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21236 !isa<ReturnInst>(&Inst))
21237 return true;
21238
21239 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21240 if (AI->getAllocatedType()->isScalableTy())
21241 return true;
21242 }
21243
21244 return false;
21245}
21246
21247SDValue
21248RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21249 SelectionDAG &DAG,
21250 SmallVectorImpl<SDNode *> &Created) const {
21252 if (isIntDivCheap(N->getValueType(0), Attr))
21253 return SDValue(N, 0); // Lower SDIV as SDIV
21254
21255 // Only perform this transform if short forward branch opt is supported.
21256 if (!Subtarget.hasShortForwardBranchOpt())
21257 return SDValue();
21258 EVT VT = N->getValueType(0);
21259 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21260 return SDValue();
21261
21262 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21263 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21264 return SDValue();
21265 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21266}
21267
21268bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21269 EVT VT, const APInt &AndMask) const {
21270 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21271 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21273}
21274
21275unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21276 return Subtarget.getMinimumJumpTableEntries();
21277}
21278
21279// Handle single arg such as return value.
21280template <typename Arg>
21281void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21282 // This lambda determines whether an array of types are constructed by
21283 // homogeneous vector types.
21284 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21285 // First, extract the first element in the argument type.
21286 auto It = ArgList.begin();
21287 MVT FirstArgRegType = It->VT;
21288
21289 // Return if there is no return or the type needs split.
21290 if (It == ArgList.end() || It->Flags.isSplit())
21291 return false;
21292
21293 ++It;
21294
21295 // Return if this argument type contains only 1 element, or it's not a
21296 // vector type.
21297 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21298 return false;
21299
21300 // Second, check if the following elements in this argument type are all the
21301 // same.
21302 for (; It != ArgList.end(); ++It)
21303 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21304 return false;
21305
21306 return true;
21307 };
21308
21309 if (isHomogeneousScalableVectorType(ArgList)) {
21310 // Handle as tuple type
21311 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21312 } else {
21313 // Handle as normal vector type
21314 bool FirstVMaskAssigned = false;
21315 for (const auto &OutArg : ArgList) {
21316 MVT RegisterVT = OutArg.VT;
21317
21318 // Skip non-RVV register type
21319 if (!RegisterVT.isVector())
21320 continue;
21321
21322 if (RegisterVT.isFixedLengthVector())
21323 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21324
21325 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21326 RVVArgInfos.push_back({1, RegisterVT, true});
21327 FirstVMaskAssigned = true;
21328 continue;
21329 }
21330
21331 RVVArgInfos.push_back({1, RegisterVT, false});
21332 }
21333 }
21334}
21335
21336// Handle multiple args.
21337template <>
21338void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21339 const DataLayout &DL = MF->getDataLayout();
21340 const Function &F = MF->getFunction();
21341 LLVMContext &Context = F.getContext();
21342
21343 bool FirstVMaskAssigned = false;
21344 for (Type *Ty : TypeList) {
21345 StructType *STy = dyn_cast<StructType>(Ty);
21346 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21347 Type *ElemTy = STy->getTypeAtIndex(0U);
21348 EVT VT = TLI->getValueType(DL, ElemTy);
21349 MVT RegisterVT =
21350 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21351 unsigned NumRegs =
21352 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21353
21354 RVVArgInfos.push_back(
21355 {NumRegs * STy->getNumElements(), RegisterVT, false});
21356 } else {
21357 SmallVector<EVT, 4> ValueVTs;
21358 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21359
21360 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21361 ++Value) {
21362 EVT VT = ValueVTs[Value];
21363 MVT RegisterVT =
21364 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21365 unsigned NumRegs =
21366 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21367
21368 // Skip non-RVV register type
21369 if (!RegisterVT.isVector())
21370 continue;
21371
21372 if (RegisterVT.isFixedLengthVector())
21373 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21374
21375 if (!FirstVMaskAssigned &&
21376 RegisterVT.getVectorElementType() == MVT::i1) {
21377 RVVArgInfos.push_back({1, RegisterVT, true});
21378 FirstVMaskAssigned = true;
21379 --NumRegs;
21380 }
21381
21382 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21383 }
21384 }
21385 }
21386}
21387
21388void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21389 unsigned StartReg) {
21390 assert((StartReg % LMul) == 0 &&
21391 "Start register number should be multiple of lmul");
21392 const MCPhysReg *VRArrays;
21393 switch (LMul) {
21394 default:
21395 report_fatal_error("Invalid lmul");
21396 case 1:
21397 VRArrays = ArgVRs;
21398 break;
21399 case 2:
21400 VRArrays = ArgVRM2s;
21401 break;
21402 case 4:
21403 VRArrays = ArgVRM4s;
21404 break;
21405 case 8:
21406 VRArrays = ArgVRM8s;
21407 break;
21408 }
21409
21410 for (unsigned i = 0; i < NF; ++i)
21411 if (StartReg)
21412 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21413 else
21414 AllocatedPhysRegs.push_back(MCPhysReg());
21415}
21416
21417/// This function determines if each RVV argument is passed by register, if the
21418/// argument can be assigned to a VR, then give it a specific register.
21419/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21420void RVVArgDispatcher::compute() {
21421 uint32_t AssignedMap = 0;
21422 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21423 // Allocate first vector mask argument to V0.
21424 if (ArgInfo.FirstVMask) {
21425 AllocatedPhysRegs.push_back(RISCV::V0);
21426 return;
21427 }
21428
21429 unsigned RegsNeeded = divideCeil(
21430 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21431 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21432 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21433 StartReg += RegsNeeded) {
21434 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21435 if ((AssignedMap & Map) == 0) {
21436 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21437 AssignedMap |= Map;
21438 return;
21439 }
21440 }
21441
21442 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21443 };
21444
21445 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21446 allocate(RVVArgInfos[i]);
21447}
21448
21450 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21451 return AllocatedPhysRegs[CurIdx++];
21452}
21453
21455
21456#define GET_RISCVVIntrinsicsTable_IMPL
21457#include "RISCVGenSearchableTables.inc"
21458
21459} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:299
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
iterator_range< arg_iterator > args()
Definition: Function.h:842
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Argument * getArg(unsigned i) const
Definition: Function.h:836
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:398
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:243
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1346
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:886
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1195
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ STRICT_LROUND
Definition: ISDOpcodes.h:432
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:587
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ STRICT_LRINT
Definition: ISDOpcodes.h:434
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:592
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:430
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:637
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:435
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:613
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ STRICT_LLROUND
Definition: ISDOpcodes.h:433
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:581
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1413
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:560
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1509
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)