LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (Subtarget.hasVInstructions()) {
667
669 if (RV64LegalI32 && Subtarget.is64Bit())
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
702
703 static const unsigned FloatingPointVPOps[] = {
704 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
705 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
706 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
707 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
708 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
709 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
710 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
711 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
712 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
713 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
717 ISD::VP_REDUCE_FMAXIMUM};
718
719 static const unsigned IntegerVecReduceOps[] = {
723
724 static const unsigned FloatingPointVecReduceOps[] = {
727
728 if (!Subtarget.is64Bit()) {
729 // We must custom-lower certain vXi64 operations on RV32 due to the vector
730 // element type being illegal.
732 MVT::i64, Custom);
733
734 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
735
736 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
737 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
738 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
739 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
740 MVT::i64, Custom);
741 }
742
743 for (MVT VT : BoolVecVTs) {
744 if (!isTypeLegal(VT))
745 continue;
746
748
749 // Mask VTs are custom-expanded into a series of standard nodes
753 VT, Custom);
754
756 Custom);
757
760 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
761 Expand);
762
763 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
764 Custom);
765
766 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
767
770 Custom);
771
773 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
774 Custom);
775
776 // RVV has native int->float & float->int conversions where the
777 // element type sizes are within one power-of-two of each other. Any
778 // wider distances between type sizes have to be lowered as sequences
779 // which progressively narrow the gap in stages.
784 VT, Custom);
786 Custom);
787
788 // Expand all extending loads to types larger than this, and truncating
789 // stores from types larger than this.
791 setTruncStoreAction(VT, OtherVT, Expand);
793 OtherVT, Expand);
794 }
795
796 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
797 ISD::VP_TRUNCATE, ISD::VP_SETCC},
798 VT, Custom);
799
802
804
805 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
806 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
807
810 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
811 }
812
813 for (MVT VT : IntVecVTs) {
814 if (!isTypeLegal(VT))
815 continue;
816
819
820 // Vectors implement MULHS/MULHU.
822
823 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
824 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
826
828 Legal);
829
831
832 // Custom-lower extensions and truncations from/to mask types.
834 VT, Custom);
835
836 // RVV has native int->float & float->int conversions where the
837 // element type sizes are within one power-of-two of each other. Any
838 // wider distances between type sizes have to be lowered as sequences
839 // which progressively narrow the gap in stages.
844 VT, Custom);
846 Custom);
849 VT, Legal);
850
851 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
852 // nodes which truncate by one power of two at a time.
854
855 // Custom-lower insert/extract operations to simplify patterns.
857 Custom);
858
859 // Custom-lower reduction operations to set up the corresponding custom
860 // nodes' operands.
861 setOperationAction(IntegerVecReduceOps, VT, Custom);
862
863 setOperationAction(IntegerVPOps, VT, Custom);
864
866
868 VT, Custom);
869
871 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
872 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
873 VT, Custom);
874
877 VT, Custom);
878
881
883
885 setTruncStoreAction(VT, OtherVT, Expand);
887 OtherVT, Expand);
888 }
889
892
893 // Splice
895
896 if (Subtarget.hasStdExtZvkb()) {
898 setOperationAction(ISD::VP_BSWAP, VT, Custom);
899 } else {
900 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
902 }
903
904 if (Subtarget.hasStdExtZvbb()) {
906 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
907 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
908 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
909 VT, Custom);
910 } else {
911 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Expand);
916
917 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
918 // range of f32.
919 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
920 if (isTypeLegal(FloatVT)) {
922 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
923 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
924 VT, Custom);
925 }
926 }
927 }
928
929 // Expand various CCs to best match the RVV ISA, which natively supports UNE
930 // but no other unordered comparisons, and supports all ordered comparisons
931 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
932 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
933 // and we pattern-match those back to the "original", swapping operands once
934 // more. This way we catch both operations and both "vf" and "fv" forms with
935 // fewer patterns.
936 static const ISD::CondCode VFPCCToExpand[] = {
940 };
941
942 // TODO: support more ops.
943 static const unsigned ZvfhminPromoteOps[] = {
951
952 // TODO: support more vp ops.
953 static const unsigned ZvfhminPromoteVPOps[] = {
954 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
955 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
956 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
957 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
958 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
959 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
960 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
961 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
962 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
963
964 // Sets common operation actions on RVV floating-point vector types.
965 const auto SetCommonVFPActions = [&](MVT VT) {
967 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
968 // sizes are within one power-of-two of each other. Therefore conversions
969 // between vXf16 and vXf64 must be lowered as sequences which convert via
970 // vXf32.
973 // Custom-lower insert/extract operations to simplify patterns.
975 Custom);
976 // Expand various condition codes (explained above).
977 setCondCodeAction(VFPCCToExpand, VT, Expand);
978
981
985 VT, Custom);
986
987 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
988
989 // Expand FP operations that need libcalls.
1001
1003
1005
1007 VT, Custom);
1008
1010 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1011 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1012 VT, Custom);
1013
1016
1019 VT, Custom);
1020
1023
1025
1026 setOperationAction(FloatingPointVPOps, VT, Custom);
1027
1029 Custom);
1032 VT, Legal);
1037 VT, Custom);
1038 };
1039
1040 // Sets common extload/truncstore actions on RVV floating-point vector
1041 // types.
1042 const auto SetCommonVFPExtLoadTruncStoreActions =
1043 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1044 for (auto SmallVT : SmallerVTs) {
1045 setTruncStoreAction(VT, SmallVT, Expand);
1046 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1047 }
1048 };
1049
1050 if (Subtarget.hasVInstructionsF16()) {
1051 for (MVT VT : F16VecVTs) {
1052 if (!isTypeLegal(VT))
1053 continue;
1054 SetCommonVFPActions(VT);
1055 }
1056 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1057 for (MVT VT : F16VecVTs) {
1058 if (!isTypeLegal(VT))
1059 continue;
1062 Custom);
1063 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1064 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1065 Custom);
1068 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1069 VT, Custom);
1072 VT, Custom);
1073 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1075 // load/store
1077
1078 // Custom split nxv32f16 since nxv32f32 if not legal.
1079 if (VT == MVT::nxv32f16) {
1080 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1081 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1082 continue;
1083 }
1084 // Add more promote ops.
1085 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1086 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1087 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1088 }
1089 }
1090
1091 // TODO: Could we merge some code with zvfhmin?
1092 if (Subtarget.hasVInstructionsBF16()) {
1093 for (MVT VT : BF16VecVTs) {
1094 if (!isTypeLegal(VT))
1095 continue;
1097 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1099 Custom);
1102 VT, Custom);
1104 // TODO: Promote to fp32.
1105 }
1106 }
1107
1108 if (Subtarget.hasVInstructionsF32()) {
1109 for (MVT VT : F32VecVTs) {
1110 if (!isTypeLegal(VT))
1111 continue;
1112 SetCommonVFPActions(VT);
1113 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1114 }
1115 }
1116
1117 if (Subtarget.hasVInstructionsF64()) {
1118 for (MVT VT : F64VecVTs) {
1119 if (!isTypeLegal(VT))
1120 continue;
1121 SetCommonVFPActions(VT);
1122 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1123 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1124 }
1125 }
1126
1127 if (Subtarget.useRVVForFixedLengthVectors()) {
1129 if (!useRVVForFixedLengthVectorVT(VT))
1130 continue;
1131
1132 // By default everything must be expanded.
1133 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1136 setTruncStoreAction(VT, OtherVT, Expand);
1138 OtherVT, Expand);
1139 }
1140
1141 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1142 // expansion to a build_vector of 0s.
1144
1145 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1147 Custom);
1148
1150 Custom);
1151
1153 VT, Custom);
1154
1156
1158
1160
1162
1164
1166
1169 Custom);
1170
1172 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1173 Custom);
1174
1176 {
1185 },
1186 VT, Custom);
1188 Custom);
1189
1191
1192 // Operations below are different for between masks and other vectors.
1193 if (VT.getVectorElementType() == MVT::i1) {
1194 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1195 ISD::OR, ISD::XOR},
1196 VT, Custom);
1197
1198 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1199 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1200 VT, Custom);
1201
1202 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1203 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1204 continue;
1205 }
1206
1207 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1208 // it before type legalization for i64 vectors on RV32. It will then be
1209 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1210 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1211 // improvements first.
1212 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1215 }
1216
1219
1220 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1221 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1222 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1223 ISD::VP_SCATTER},
1224 VT, Custom);
1225
1229 VT, Custom);
1230
1233
1235
1236 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1237 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1239
1242 VT, Custom);
1243
1246
1249
1250 // Custom-lower reduction operations to set up the corresponding custom
1251 // nodes' operands.
1255 VT, Custom);
1256
1257 setOperationAction(IntegerVPOps, VT, Custom);
1258
1259 if (Subtarget.hasStdExtZvkb())
1261
1262 if (Subtarget.hasStdExtZvbb()) {
1265 VT, Custom);
1266 } else {
1267 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1268 // range of f32.
1269 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1270 if (isTypeLegal(FloatVT))
1273 Custom);
1274 }
1275 }
1276
1278 // There are no extending loads or truncating stores.
1279 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1280 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1281 setTruncStoreAction(VT, InnerVT, Expand);
1282 }
1283
1284 if (!useRVVForFixedLengthVectorVT(VT))
1285 continue;
1286
1287 // By default everything must be expanded.
1288 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1290
1291 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1292 // expansion to a build_vector of 0s.
1294
1295 if (VT.getVectorElementType() == MVT::f16 &&
1296 !Subtarget.hasVInstructionsF16()) {
1299 Custom);
1300 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1302 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1303 Custom);
1305 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1306 VT, Custom);
1309 VT, Custom);
1312 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313 // Don't promote f16 vector operations to f32 if f32 vector type is
1314 // not legal.
1315 // TODO: could split the f16 vector into two vectors and do promotion.
1316 if (!isTypeLegal(F32VecVT))
1317 continue;
1318 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1319 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1320 continue;
1321 }
1322
1323 if (VT.getVectorElementType() == MVT::bf16) {
1325 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1327 Custom);
1330 VT, Custom);
1332 // TODO: Promote to fp32.
1333 continue;
1334 }
1335
1336 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1338 Custom);
1339
1343 VT, Custom);
1344
1347 VT, Custom);
1348
1349 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1350 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1352 ISD::VP_SCATTER},
1353 VT, Custom);
1354
1359 VT, Custom);
1360
1362
1365 VT, Custom);
1366
1367 setCondCodeAction(VFPCCToExpand, VT, Expand);
1368
1372
1374
1375 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1376
1377 setOperationAction(FloatingPointVPOps, VT, Custom);
1378
1380 Custom);
1387 VT, Custom);
1388 }
1389
1390 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1391 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1392 Custom);
1393 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1395 if (Subtarget.hasStdExtFOrZfinx())
1397 if (Subtarget.hasStdExtDOrZdinx())
1399 }
1400 }
1401
1402 if (Subtarget.hasStdExtA()) {
1404 if (RV64LegalI32 && Subtarget.is64Bit())
1406 }
1407
1408 if (Subtarget.hasForcedAtomics()) {
1409 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1415 XLenVT, LibCall);
1416 }
1417
1418 if (Subtarget.hasVendorXTHeadMemIdx()) {
1419 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1420 setIndexedLoadAction(im, MVT::i8, Legal);
1421 setIndexedStoreAction(im, MVT::i8, Legal);
1422 setIndexedLoadAction(im, MVT::i16, Legal);
1423 setIndexedStoreAction(im, MVT::i16, Legal);
1424 setIndexedLoadAction(im, MVT::i32, Legal);
1425 setIndexedStoreAction(im, MVT::i32, Legal);
1426
1427 if (Subtarget.is64Bit()) {
1428 setIndexedLoadAction(im, MVT::i64, Legal);
1429 setIndexedStoreAction(im, MVT::i64, Legal);
1430 }
1431 }
1432 }
1433
1434 // Function alignments.
1435 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1436 setMinFunctionAlignment(FunctionAlignment);
1437 // Set preferred alignments.
1440
1444 if (Subtarget.is64Bit())
1446
1447 if (Subtarget.hasStdExtFOrZfinx())
1449
1450 if (Subtarget.hasStdExtZbb())
1452
1453 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1455
1456 if (Subtarget.hasStdExtZbkb())
1458 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1460 if (Subtarget.hasStdExtFOrZfinx())
1463 if (Subtarget.hasVInstructions())
1465 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1468 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1471 if (Subtarget.hasVendorXTHeadMemPair())
1473 if (Subtarget.useRVVForFixedLengthVectors())
1475
1476 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1477 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1478
1479 // Disable strict node mutation.
1480 IsStrictFPEnabled = true;
1481}
1482
1484 LLVMContext &Context,
1485 EVT VT) const {
1486 if (!VT.isVector())
1487 return getPointerTy(DL);
1488 if (Subtarget.hasVInstructions() &&
1489 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1490 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1492}
1493
1494MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1495 return Subtarget.getXLenVT();
1496}
1497
1498// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1499bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1500 unsigned VF,
1501 bool IsScalable) const {
1502 if (!Subtarget.hasVInstructions())
1503 return true;
1504
1505 if (!IsScalable)
1506 return true;
1507
1508 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1509 return true;
1510
1511 // Don't allow VF=1 if those types are't legal.
1512 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1513 return true;
1514
1515 // VLEN=32 support is incomplete.
1516 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1517 return true;
1518
1519 // The maximum VF is for the smallest element width with LMUL=8.
1520 // VF must be a power of 2.
1521 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1522 return VF > MaxVF || !isPowerOf2_32(VF);
1523}
1524
1526 return !Subtarget.hasVInstructions() ||
1527 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1528}
1529
1531 const CallInst &I,
1532 MachineFunction &MF,
1533 unsigned Intrinsic) const {
1534 auto &DL = I.getModule()->getDataLayout();
1535
1536 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1537 bool IsUnitStrided, bool UsePtrVal = false) {
1539 // We can't use ptrVal if the intrinsic can access memory before the
1540 // pointer. This means we can't use it for strided or indexed intrinsics.
1541 if (UsePtrVal)
1542 Info.ptrVal = I.getArgOperand(PtrOp);
1543 else
1544 Info.fallbackAddressSpace =
1545 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1546 Type *MemTy;
1547 if (IsStore) {
1548 // Store value is the first operand.
1549 MemTy = I.getArgOperand(0)->getType();
1550 } else {
1551 // Use return type. If it's segment load, return type is a struct.
1552 MemTy = I.getType();
1553 if (MemTy->isStructTy())
1554 MemTy = MemTy->getStructElementType(0);
1555 }
1556 if (!IsUnitStrided)
1557 MemTy = MemTy->getScalarType();
1558
1559 Info.memVT = getValueType(DL, MemTy);
1560 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1562 Info.flags |=
1564 return true;
1565 };
1566
1567 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1569
1571 switch (Intrinsic) {
1572 default:
1573 return false;
1574 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1575 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1576 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1577 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1578 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1579 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1580 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1581 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1582 case Intrinsic::riscv_masked_cmpxchg_i32:
1584 Info.memVT = MVT::i32;
1585 Info.ptrVal = I.getArgOperand(0);
1586 Info.offset = 0;
1587 Info.align = Align(4);
1590 return true;
1591 case Intrinsic::riscv_masked_strided_load:
1592 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1593 /*IsUnitStrided*/ false);
1594 case Intrinsic::riscv_masked_strided_store:
1595 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1596 /*IsUnitStrided*/ false);
1597 case Intrinsic::riscv_seg2_load:
1598 case Intrinsic::riscv_seg3_load:
1599 case Intrinsic::riscv_seg4_load:
1600 case Intrinsic::riscv_seg5_load:
1601 case Intrinsic::riscv_seg6_load:
1602 case Intrinsic::riscv_seg7_load:
1603 case Intrinsic::riscv_seg8_load:
1604 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1605 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1606 case Intrinsic::riscv_seg2_store:
1607 case Intrinsic::riscv_seg3_store:
1608 case Intrinsic::riscv_seg4_store:
1609 case Intrinsic::riscv_seg5_store:
1610 case Intrinsic::riscv_seg6_store:
1611 case Intrinsic::riscv_seg7_store:
1612 case Intrinsic::riscv_seg8_store:
1613 // Operands are (vec, ..., vec, ptr, vl)
1614 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1615 /*IsStore*/ true,
1616 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1617 case Intrinsic::riscv_vle:
1618 case Intrinsic::riscv_vle_mask:
1619 case Intrinsic::riscv_vleff:
1620 case Intrinsic::riscv_vleff_mask:
1621 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1622 /*IsStore*/ false,
1623 /*IsUnitStrided*/ true,
1624 /*UsePtrVal*/ true);
1625 case Intrinsic::riscv_vse:
1626 case Intrinsic::riscv_vse_mask:
1627 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1628 /*IsStore*/ true,
1629 /*IsUnitStrided*/ true,
1630 /*UsePtrVal*/ true);
1631 case Intrinsic::riscv_vlse:
1632 case Intrinsic::riscv_vlse_mask:
1633 case Intrinsic::riscv_vloxei:
1634 case Intrinsic::riscv_vloxei_mask:
1635 case Intrinsic::riscv_vluxei:
1636 case Intrinsic::riscv_vluxei_mask:
1637 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1638 /*IsStore*/ false,
1639 /*IsUnitStrided*/ false);
1640 case Intrinsic::riscv_vsse:
1641 case Intrinsic::riscv_vsse_mask:
1642 case Intrinsic::riscv_vsoxei:
1643 case Intrinsic::riscv_vsoxei_mask:
1644 case Intrinsic::riscv_vsuxei:
1645 case Intrinsic::riscv_vsuxei_mask:
1646 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1647 /*IsStore*/ true,
1648 /*IsUnitStrided*/ false);
1649 case Intrinsic::riscv_vlseg2:
1650 case Intrinsic::riscv_vlseg3:
1651 case Intrinsic::riscv_vlseg4:
1652 case Intrinsic::riscv_vlseg5:
1653 case Intrinsic::riscv_vlseg6:
1654 case Intrinsic::riscv_vlseg7:
1655 case Intrinsic::riscv_vlseg8:
1656 case Intrinsic::riscv_vlseg2ff:
1657 case Intrinsic::riscv_vlseg3ff:
1658 case Intrinsic::riscv_vlseg4ff:
1659 case Intrinsic::riscv_vlseg5ff:
1660 case Intrinsic::riscv_vlseg6ff:
1661 case Intrinsic::riscv_vlseg7ff:
1662 case Intrinsic::riscv_vlseg8ff:
1663 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1664 /*IsStore*/ false,
1665 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1666 case Intrinsic::riscv_vlseg2_mask:
1667 case Intrinsic::riscv_vlseg3_mask:
1668 case Intrinsic::riscv_vlseg4_mask:
1669 case Intrinsic::riscv_vlseg5_mask:
1670 case Intrinsic::riscv_vlseg6_mask:
1671 case Intrinsic::riscv_vlseg7_mask:
1672 case Intrinsic::riscv_vlseg8_mask:
1673 case Intrinsic::riscv_vlseg2ff_mask:
1674 case Intrinsic::riscv_vlseg3ff_mask:
1675 case Intrinsic::riscv_vlseg4ff_mask:
1676 case Intrinsic::riscv_vlseg5ff_mask:
1677 case Intrinsic::riscv_vlseg6ff_mask:
1678 case Intrinsic::riscv_vlseg7ff_mask:
1679 case Intrinsic::riscv_vlseg8ff_mask:
1680 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1681 /*IsStore*/ false,
1682 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1683 case Intrinsic::riscv_vlsseg2:
1684 case Intrinsic::riscv_vlsseg3:
1685 case Intrinsic::riscv_vlsseg4:
1686 case Intrinsic::riscv_vlsseg5:
1687 case Intrinsic::riscv_vlsseg6:
1688 case Intrinsic::riscv_vlsseg7:
1689 case Intrinsic::riscv_vlsseg8:
1690 case Intrinsic::riscv_vloxseg2:
1691 case Intrinsic::riscv_vloxseg3:
1692 case Intrinsic::riscv_vloxseg4:
1693 case Intrinsic::riscv_vloxseg5:
1694 case Intrinsic::riscv_vloxseg6:
1695 case Intrinsic::riscv_vloxseg7:
1696 case Intrinsic::riscv_vloxseg8:
1697 case Intrinsic::riscv_vluxseg2:
1698 case Intrinsic::riscv_vluxseg3:
1699 case Intrinsic::riscv_vluxseg4:
1700 case Intrinsic::riscv_vluxseg5:
1701 case Intrinsic::riscv_vluxseg6:
1702 case Intrinsic::riscv_vluxseg7:
1703 case Intrinsic::riscv_vluxseg8:
1704 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1705 /*IsStore*/ false,
1706 /*IsUnitStrided*/ false);
1707 case Intrinsic::riscv_vlsseg2_mask:
1708 case Intrinsic::riscv_vlsseg3_mask:
1709 case Intrinsic::riscv_vlsseg4_mask:
1710 case Intrinsic::riscv_vlsseg5_mask:
1711 case Intrinsic::riscv_vlsseg6_mask:
1712 case Intrinsic::riscv_vlsseg7_mask:
1713 case Intrinsic::riscv_vlsseg8_mask:
1714 case Intrinsic::riscv_vloxseg2_mask:
1715 case Intrinsic::riscv_vloxseg3_mask:
1716 case Intrinsic::riscv_vloxseg4_mask:
1717 case Intrinsic::riscv_vloxseg5_mask:
1718 case Intrinsic::riscv_vloxseg6_mask:
1719 case Intrinsic::riscv_vloxseg7_mask:
1720 case Intrinsic::riscv_vloxseg8_mask:
1721 case Intrinsic::riscv_vluxseg2_mask:
1722 case Intrinsic::riscv_vluxseg3_mask:
1723 case Intrinsic::riscv_vluxseg4_mask:
1724 case Intrinsic::riscv_vluxseg5_mask:
1725 case Intrinsic::riscv_vluxseg6_mask:
1726 case Intrinsic::riscv_vluxseg7_mask:
1727 case Intrinsic::riscv_vluxseg8_mask:
1728 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1729 /*IsStore*/ false,
1730 /*IsUnitStrided*/ false);
1731 case Intrinsic::riscv_vsseg2:
1732 case Intrinsic::riscv_vsseg3:
1733 case Intrinsic::riscv_vsseg4:
1734 case Intrinsic::riscv_vsseg5:
1735 case Intrinsic::riscv_vsseg6:
1736 case Intrinsic::riscv_vsseg7:
1737 case Intrinsic::riscv_vsseg8:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1739 /*IsStore*/ true,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vsseg2_mask:
1742 case Intrinsic::riscv_vsseg3_mask:
1743 case Intrinsic::riscv_vsseg4_mask:
1744 case Intrinsic::riscv_vsseg5_mask:
1745 case Intrinsic::riscv_vsseg6_mask:
1746 case Intrinsic::riscv_vsseg7_mask:
1747 case Intrinsic::riscv_vsseg8_mask:
1748 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1749 /*IsStore*/ true,
1750 /*IsUnitStrided*/ false);
1751 case Intrinsic::riscv_vssseg2:
1752 case Intrinsic::riscv_vssseg3:
1753 case Intrinsic::riscv_vssseg4:
1754 case Intrinsic::riscv_vssseg5:
1755 case Intrinsic::riscv_vssseg6:
1756 case Intrinsic::riscv_vssseg7:
1757 case Intrinsic::riscv_vssseg8:
1758 case Intrinsic::riscv_vsoxseg2:
1759 case Intrinsic::riscv_vsoxseg3:
1760 case Intrinsic::riscv_vsoxseg4:
1761 case Intrinsic::riscv_vsoxseg5:
1762 case Intrinsic::riscv_vsoxseg6:
1763 case Intrinsic::riscv_vsoxseg7:
1764 case Intrinsic::riscv_vsoxseg8:
1765 case Intrinsic::riscv_vsuxseg2:
1766 case Intrinsic::riscv_vsuxseg3:
1767 case Intrinsic::riscv_vsuxseg4:
1768 case Intrinsic::riscv_vsuxseg5:
1769 case Intrinsic::riscv_vsuxseg6:
1770 case Intrinsic::riscv_vsuxseg7:
1771 case Intrinsic::riscv_vsuxseg8:
1772 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1773 /*IsStore*/ true,
1774 /*IsUnitStrided*/ false);
1775 case Intrinsic::riscv_vssseg2_mask:
1776 case Intrinsic::riscv_vssseg3_mask:
1777 case Intrinsic::riscv_vssseg4_mask:
1778 case Intrinsic::riscv_vssseg5_mask:
1779 case Intrinsic::riscv_vssseg6_mask:
1780 case Intrinsic::riscv_vssseg7_mask:
1781 case Intrinsic::riscv_vssseg8_mask:
1782 case Intrinsic::riscv_vsoxseg2_mask:
1783 case Intrinsic::riscv_vsoxseg3_mask:
1784 case Intrinsic::riscv_vsoxseg4_mask:
1785 case Intrinsic::riscv_vsoxseg5_mask:
1786 case Intrinsic::riscv_vsoxseg6_mask:
1787 case Intrinsic::riscv_vsoxseg7_mask:
1788 case Intrinsic::riscv_vsoxseg8_mask:
1789 case Intrinsic::riscv_vsuxseg2_mask:
1790 case Intrinsic::riscv_vsuxseg3_mask:
1791 case Intrinsic::riscv_vsuxseg4_mask:
1792 case Intrinsic::riscv_vsuxseg5_mask:
1793 case Intrinsic::riscv_vsuxseg6_mask:
1794 case Intrinsic::riscv_vsuxseg7_mask:
1795 case Intrinsic::riscv_vsuxseg8_mask:
1796 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1797 /*IsStore*/ true,
1798 /*IsUnitStrided*/ false);
1799 }
1800}
1801
1803 const AddrMode &AM, Type *Ty,
1804 unsigned AS,
1805 Instruction *I) const {
1806 // No global is ever allowed as a base.
1807 if (AM.BaseGV)
1808 return false;
1809
1810 // RVV instructions only support register addressing.
1811 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1812 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1813
1814 // Require a 12-bit signed offset.
1815 if (!isInt<12>(AM.BaseOffs))
1816 return false;
1817
1818 switch (AM.Scale) {
1819 case 0: // "r+i" or just "i", depending on HasBaseReg.
1820 break;
1821 case 1:
1822 if (!AM.HasBaseReg) // allow "r+i".
1823 break;
1824 return false; // disallow "r+r" or "r+r+i".
1825 default:
1826 return false;
1827 }
1828
1829 return true;
1830}
1831
1833 return isInt<12>(Imm);
1834}
1835
1837 return isInt<12>(Imm);
1838}
1839
1840// On RV32, 64-bit integers are split into their high and low parts and held
1841// in two different registers, so the trunc is free since the low register can
1842// just be used.
1843// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1844// isTruncateFree?
1846 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1847 return false;
1848 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1849 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1850 return (SrcBits == 64 && DestBits == 32);
1851}
1852
1854 // We consider i64->i32 free on RV64 since we have good selection of W
1855 // instructions that make promoting operations back to i64 free in many cases.
1856 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1857 !DstVT.isInteger())
1858 return false;
1859 unsigned SrcBits = SrcVT.getSizeInBits();
1860 unsigned DestBits = DstVT.getSizeInBits();
1861 return (SrcBits == 64 && DestBits == 32);
1862}
1863
1865 // Zexts are free if they can be combined with a load.
1866 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1867 // poorly with type legalization of compares preferring sext.
1868 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1869 EVT MemVT = LD->getMemoryVT();
1870 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1871 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1872 LD->getExtensionType() == ISD::ZEXTLOAD))
1873 return true;
1874 }
1875
1876 return TargetLowering::isZExtFree(Val, VT2);
1877}
1878
1880 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1881}
1882
1884 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1885}
1886
1888 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1889}
1890
1892 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1893 Subtarget.hasVendorXCVbitmanip();
1894}
1895
1897 const Instruction &AndI) const {
1898 // We expect to be able to match a bit extraction instruction if the Zbs
1899 // extension is supported and the mask is a power of two. However, we
1900 // conservatively return false if the mask would fit in an ANDI instruction,
1901 // on the basis that it's possible the sinking+duplication of the AND in
1902 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1903 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1904 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1905 return false;
1906 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1907 if (!Mask)
1908 return false;
1909 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1910}
1911
1913 EVT VT = Y.getValueType();
1914
1915 // FIXME: Support vectors once we have tests.
1916 if (VT.isVector())
1917 return false;
1918
1919 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1920 !isa<ConstantSDNode>(Y);
1921}
1922
1924 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1925 if (Subtarget.hasStdExtZbs())
1926 return X.getValueType().isScalarInteger();
1927 auto *C = dyn_cast<ConstantSDNode>(Y);
1928 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1929 if (Subtarget.hasVendorXTHeadBs())
1930 return C != nullptr;
1931 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1932 return C && C->getAPIntValue().ule(10);
1933}
1934
1936 EVT VT) const {
1937 // Only enable for rvv.
1938 if (!VT.isVector() || !Subtarget.hasVInstructions())
1939 return false;
1940
1941 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1942 return false;
1943
1944 return true;
1945}
1946
1948 Type *Ty) const {
1949 assert(Ty->isIntegerTy());
1950
1951 unsigned BitSize = Ty->getIntegerBitWidth();
1952 if (BitSize > Subtarget.getXLen())
1953 return false;
1954
1955 // Fast path, assume 32-bit immediates are cheap.
1956 int64_t Val = Imm.getSExtValue();
1957 if (isInt<32>(Val))
1958 return true;
1959
1960 // A constant pool entry may be more aligned thant he load we're trying to
1961 // replace. If we don't support unaligned scalar mem, prefer the constant
1962 // pool.
1963 // TODO: Can the caller pass down the alignment?
1964 if (!Subtarget.enableUnalignedScalarMem())
1965 return true;
1966
1967 // Prefer to keep the load if it would require many instructions.
1968 // This uses the same threshold we use for constant pools but doesn't
1969 // check useConstantPoolForLargeInts.
1970 // TODO: Should we keep the load only when we're definitely going to emit a
1971 // constant pool?
1972
1974 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1975}
1976
1980 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1981 SelectionDAG &DAG) const {
1982 // One interesting pattern that we'd want to form is 'bit extract':
1983 // ((1 >> Y) & 1) ==/!= 0
1984 // But we also need to be careful not to try to reverse that fold.
1985
1986 // Is this '((1 >> Y) & 1)'?
1987 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1988 return false; // Keep the 'bit extract' pattern.
1989
1990 // Will this be '((1 >> Y) & 1)' after the transform?
1991 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1992 return true; // Do form the 'bit extract' pattern.
1993
1994 // If 'X' is a constant, and we transform, then we will immediately
1995 // try to undo the fold, thus causing endless combine loop.
1996 // So only do the transform if X is not a constant. This matches the default
1997 // implementation of this function.
1998 return !XC;
1999}
2000
2001bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2002 switch (Opcode) {
2003 case Instruction::Add:
2004 case Instruction::Sub:
2005 case Instruction::Mul:
2006 case Instruction::And:
2007 case Instruction::Or:
2008 case Instruction::Xor:
2009 case Instruction::FAdd:
2010 case Instruction::FSub:
2011 case Instruction::FMul:
2012 case Instruction::FDiv:
2013 case Instruction::ICmp:
2014 case Instruction::FCmp:
2015 return true;
2016 case Instruction::Shl:
2017 case Instruction::LShr:
2018 case Instruction::AShr:
2019 case Instruction::UDiv:
2020 case Instruction::SDiv:
2021 case Instruction::URem:
2022 case Instruction::SRem:
2023 case Instruction::Select:
2024 return Operand == 1;
2025 default:
2026 return false;
2027 }
2028}
2029
2030
2032 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2033 return false;
2034
2035 if (canSplatOperand(I->getOpcode(), Operand))
2036 return true;
2037
2038 auto *II = dyn_cast<IntrinsicInst>(I);
2039 if (!II)
2040 return false;
2041
2042 switch (II->getIntrinsicID()) {
2043 case Intrinsic::fma:
2044 case Intrinsic::vp_fma:
2045 return Operand == 0 || Operand == 1;
2046 case Intrinsic::vp_shl:
2047 case Intrinsic::vp_lshr:
2048 case Intrinsic::vp_ashr:
2049 case Intrinsic::vp_udiv:
2050 case Intrinsic::vp_sdiv:
2051 case Intrinsic::vp_urem:
2052 case Intrinsic::vp_srem:
2053 case Intrinsic::ssub_sat:
2054 case Intrinsic::vp_ssub_sat:
2055 case Intrinsic::usub_sat:
2056 case Intrinsic::vp_usub_sat:
2057 return Operand == 1;
2058 // These intrinsics are commutative.
2059 case Intrinsic::vp_add:
2060 case Intrinsic::vp_mul:
2061 case Intrinsic::vp_and:
2062 case Intrinsic::vp_or:
2063 case Intrinsic::vp_xor:
2064 case Intrinsic::vp_fadd:
2065 case Intrinsic::vp_fmul:
2066 case Intrinsic::vp_icmp:
2067 case Intrinsic::vp_fcmp:
2068 case Intrinsic::smin:
2069 case Intrinsic::vp_smin:
2070 case Intrinsic::umin:
2071 case Intrinsic::vp_umin:
2072 case Intrinsic::smax:
2073 case Intrinsic::vp_smax:
2074 case Intrinsic::umax:
2075 case Intrinsic::vp_umax:
2076 case Intrinsic::sadd_sat:
2077 case Intrinsic::vp_sadd_sat:
2078 case Intrinsic::uadd_sat:
2079 case Intrinsic::vp_uadd_sat:
2080 // These intrinsics have 'vr' versions.
2081 case Intrinsic::vp_sub:
2082 case Intrinsic::vp_fsub:
2083 case Intrinsic::vp_fdiv:
2084 return Operand == 0 || Operand == 1;
2085 default:
2086 return false;
2087 }
2088}
2089
2090/// Check if sinking \p I's operands to I's basic block is profitable, because
2091/// the operands can be folded into a target instruction, e.g.
2092/// splats of scalars can fold into vector instructions.
2094 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2095 using namespace llvm::PatternMatch;
2096
2097 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2098 return false;
2099
2100 // Don't sink splat operands if the target prefers it. Some targets requires
2101 // S2V transfer buffers and we can run out of them copying the same value
2102 // repeatedly.
2103 // FIXME: It could still be worth doing if it would improve vector register
2104 // pressure and prevent a vector spill.
2105 if (!Subtarget.sinkSplatOperands())
2106 return false;
2107
2108 for (auto OpIdx : enumerate(I->operands())) {
2109 if (!canSplatOperand(I, OpIdx.index()))
2110 continue;
2111
2112 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2113 // Make sure we are not already sinking this operand
2114 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2115 continue;
2116
2117 // We are looking for a splat that can be sunk.
2119 m_Undef(), m_ZeroMask())))
2120 continue;
2121
2122 // Don't sink i1 splats.
2123 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2124 continue;
2125
2126 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2127 // and vector registers
2128 for (Use &U : Op->uses()) {
2129 Instruction *Insn = cast<Instruction>(U.getUser());
2130 if (!canSplatOperand(Insn, U.getOperandNo()))
2131 return false;
2132 }
2133
2134 Ops.push_back(&Op->getOperandUse(0));
2135 Ops.push_back(&OpIdx.value());
2136 }
2137 return true;
2138}
2139
2141 unsigned Opc = VecOp.getOpcode();
2142
2143 // Assume target opcodes can't be scalarized.
2144 // TODO - do we have any exceptions?
2145 if (Opc >= ISD::BUILTIN_OP_END)
2146 return false;
2147
2148 // If the vector op is not supported, try to convert to scalar.
2149 EVT VecVT = VecOp.getValueType();
2150 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2151 return true;
2152
2153 // If the vector op is supported, but the scalar op is not, the transform may
2154 // not be worthwhile.
2155 // Permit a vector binary operation can be converted to scalar binary
2156 // operation which is custom lowered with illegal type.
2157 EVT ScalarVT = VecVT.getScalarType();
2158 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2159 isOperationCustom(Opc, ScalarVT);
2160}
2161
2163 const GlobalAddressSDNode *GA) const {
2164 // In order to maximise the opportunity for common subexpression elimination,
2165 // keep a separate ADD node for the global address offset instead of folding
2166 // it in the global address node. Later peephole optimisations may choose to
2167 // fold it back in when profitable.
2168 return false;
2169}
2170
2171// Return one of the followings:
2172// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2173// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2174// positive counterpart, which will be materialized from the first returned
2175// element. The second returned element indicated that there should be a FNEG
2176// followed.
2177// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2178std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2179 EVT VT) const {
2180 if (!Subtarget.hasStdExtZfa())
2181 return std::make_pair(-1, false);
2182
2183 bool IsSupportedVT = false;
2184 if (VT == MVT::f16) {
2185 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2186 } else if (VT == MVT::f32) {
2187 IsSupportedVT = true;
2188 } else if (VT == MVT::f64) {
2189 assert(Subtarget.hasStdExtD() && "Expect D extension");
2190 IsSupportedVT = true;
2191 }
2192
2193 if (!IsSupportedVT)
2194 return std::make_pair(-1, false);
2195
2197 if (Index < 0 && Imm.isNegative())
2198 // Try the combination of its positive counterpart + FNEG.
2199 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2200 else
2201 return std::make_pair(Index, false);
2202}
2203
2205 bool ForCodeSize) const {
2206 bool IsLegalVT = false;
2207 if (VT == MVT::f16)
2208 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2209 else if (VT == MVT::f32)
2210 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2211 else if (VT == MVT::f64)
2212 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2213 else if (VT == MVT::bf16)
2214 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2215
2216 if (!IsLegalVT)
2217 return false;
2218
2219 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2220 return true;
2221
2222 // Cannot create a 64 bit floating-point immediate value for rv32.
2223 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2224 // td can handle +0.0 or -0.0 already.
2225 // -0.0 can be created by fmv + fneg.
2226 return Imm.isZero();
2227 }
2228
2229 // Special case: fmv + fneg
2230 if (Imm.isNegZero())
2231 return true;
2232
2233 // Building an integer and then converting requires a fmv at the end of
2234 // the integer sequence.
2235 const int Cost =
2236 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2237 Subtarget);
2238 return Cost <= FPImmCost;
2239}
2240
2241// TODO: This is very conservative.
2243 unsigned Index) const {
2245 return false;
2246
2247 // Only support extracting a fixed from a fixed vector for now.
2248 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2249 return false;
2250
2251 EVT EltVT = ResVT.getVectorElementType();
2252 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2253
2254 // The smallest type we can slide is i8.
2255 // TODO: We can extract index 0 from a mask vector without a slide.
2256 if (EltVT == MVT::i1)
2257 return false;
2258
2259 unsigned ResElts = ResVT.getVectorNumElements();
2260 unsigned SrcElts = SrcVT.getVectorNumElements();
2261
2262 unsigned MinVLen = Subtarget.getRealMinVLen();
2263 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2264
2265 // If we're extracting only data from the first VLEN bits of the source
2266 // then we can always do this with an m1 vslidedown.vx. Restricting the
2267 // Index ensures we can use a vslidedown.vi.
2268 // TODO: We can generalize this when the exact VLEN is known.
2269 if (Index + ResElts <= MinVLMAX && Index < 31)
2270 return true;
2271
2272 // Convervatively only handle extracting half of a vector.
2273 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2274 // a cheap extract. However, this case is important in practice for
2275 // shuffled extracts of longer vectors. How resolve?
2276 if ((ResElts * 2) != SrcElts)
2277 return false;
2278
2279 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2280 // cheap.
2281 if (Index >= 32)
2282 return false;
2283
2284 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2285 // the upper half of a vector until we have more test coverage.
2286 return Index == 0 || Index == ResElts;
2287}
2288
2291 EVT VT) const {
2292 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2293 // We might still end up using a GPR but that will be decided based on ABI.
2294 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2295 !Subtarget.hasStdExtZfhminOrZhinxmin())
2296 return MVT::f32;
2297
2299
2300 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2301 return MVT::i64;
2302
2303 return PartVT;
2304}
2305
2308 EVT VT) const {
2309 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2310 // We might still end up using a GPR but that will be decided based on ABI.
2311 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2312 !Subtarget.hasStdExtZfhminOrZhinxmin())
2313 return 1;
2314
2316}
2317
2319 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2320 unsigned &NumIntermediates, MVT &RegisterVT) const {
2322 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2323
2324 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2325 IntermediateVT = MVT::i64;
2326
2327 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2328 RegisterVT = MVT::i64;
2329
2330 return NumRegs;
2331}
2332
2333// Changes the condition code and swaps operands if necessary, so the SetCC
2334// operation matches one of the comparisons supported directly by branches
2335// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2336// with 1/-1.
2337static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2338 ISD::CondCode &CC, SelectionDAG &DAG) {
2339 // If this is a single bit test that can't be handled by ANDI, shift the
2340 // bit to be tested to the MSB and perform a signed compare with 0.
2341 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2342 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2343 isa<ConstantSDNode>(LHS.getOperand(1))) {
2344 uint64_t Mask = LHS.getConstantOperandVal(1);
2345 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2346 unsigned ShAmt = 0;
2347 if (isPowerOf2_64(Mask)) {
2349 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2350 } else {
2351 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2352 }
2353
2354 LHS = LHS.getOperand(0);
2355 if (ShAmt != 0)
2356 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2357 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2358 return;
2359 }
2360 }
2361
2362 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2363 int64_t C = RHSC->getSExtValue();
2364 switch (CC) {
2365 default: break;
2366 case ISD::SETGT:
2367 // Convert X > -1 to X >= 0.
2368 if (C == -1) {
2369 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2370 CC = ISD::SETGE;
2371 return;
2372 }
2373 break;
2374 case ISD::SETLT:
2375 // Convert X < 1 to 0 >= X.
2376 if (C == 1) {
2377 RHS = LHS;
2378 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2379 CC = ISD::SETGE;
2380 return;
2381 }
2382 break;
2383 }
2384 }
2385
2386 switch (CC) {
2387 default:
2388 break;
2389 case ISD::SETGT:
2390 case ISD::SETLE:
2391 case ISD::SETUGT:
2392 case ISD::SETULE:
2394 std::swap(LHS, RHS);
2395 break;
2396 }
2397}
2398
2400 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2401 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2402 if (VT.getVectorElementType() == MVT::i1)
2403 KnownSize *= 8;
2404
2405 switch (KnownSize) {
2406 default:
2407 llvm_unreachable("Invalid LMUL.");
2408 case 8:
2410 case 16:
2412 case 32:
2414 case 64:
2416 case 128:
2418 case 256:
2420 case 512:
2422 }
2423}
2424
2426 switch (LMul) {
2427 default:
2428 llvm_unreachable("Invalid LMUL.");
2433 return RISCV::VRRegClassID;
2435 return RISCV::VRM2RegClassID;
2437 return RISCV::VRM4RegClassID;
2439 return RISCV::VRM8RegClassID;
2440 }
2441}
2442
2444 RISCVII::VLMUL LMUL = getLMUL(VT);
2445 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2446 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2447 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2448 LMUL == RISCVII::VLMUL::LMUL_1) {
2449 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2450 "Unexpected subreg numbering");
2451 return RISCV::sub_vrm1_0 + Index;
2452 }
2453 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2454 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2455 "Unexpected subreg numbering");
2456 return RISCV::sub_vrm2_0 + Index;
2457 }
2458 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2459 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2460 "Unexpected subreg numbering");
2461 return RISCV::sub_vrm4_0 + Index;
2462 }
2463 llvm_unreachable("Invalid vector type.");
2464}
2465
2467 if (VT.getVectorElementType() == MVT::i1)
2468 return RISCV::VRRegClassID;
2469 return getRegClassIDForLMUL(getLMUL(VT));
2470}
2471
2472// Attempt to decompose a subvector insert/extract between VecVT and
2473// SubVecVT via subregister indices. Returns the subregister index that
2474// can perform the subvector insert/extract with the given element index, as
2475// well as the index corresponding to any leftover subvectors that must be
2476// further inserted/extracted within the register class for SubVecVT.
2477std::pair<unsigned, unsigned>
2479 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2480 const RISCVRegisterInfo *TRI) {
2481 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2482 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2483 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2484 "Register classes not ordered");
2485 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2486 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2487 // Try to compose a subregister index that takes us from the incoming
2488 // LMUL>1 register class down to the outgoing one. At each step we half
2489 // the LMUL:
2490 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2491 // Note that this is not guaranteed to find a subregister index, such as
2492 // when we are extracting from one VR type to another.
2493 unsigned SubRegIdx = RISCV::NoSubRegister;
2494 for (const unsigned RCID :
2495 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2496 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2497 VecVT = VecVT.getHalfNumVectorElementsVT();
2498 bool IsHi =
2499 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2500 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2501 getSubregIndexByMVT(VecVT, IsHi));
2502 if (IsHi)
2503 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2504 }
2505 return {SubRegIdx, InsertExtractIdx};
2506}
2507
2508// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2509// stores for those types.
2510bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2511 return !Subtarget.useRVVForFixedLengthVectors() ||
2512 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2513}
2514
2516 if (!ScalarTy.isSimple())
2517 return false;
2518 switch (ScalarTy.getSimpleVT().SimpleTy) {
2519 case MVT::iPTR:
2520 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2521 case MVT::i8:
2522 case MVT::i16:
2523 case MVT::i32:
2524 return true;
2525 case MVT::i64:
2526 return Subtarget.hasVInstructionsI64();
2527 case MVT::f16:
2528 return Subtarget.hasVInstructionsF16();
2529 case MVT::f32:
2530 return Subtarget.hasVInstructionsF32();
2531 case MVT::f64:
2532 return Subtarget.hasVInstructionsF64();
2533 default:
2534 return false;
2535 }
2536}
2537
2538
2539unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2540 return NumRepeatedDivisors;
2541}
2542
2544 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2545 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2546 "Unexpected opcode");
2547 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2548 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2550 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2551 if (!II)
2552 return SDValue();
2553 return Op.getOperand(II->VLOperand + 1 + HasChain);
2554}
2555
2557 const RISCVSubtarget &Subtarget) {
2558 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2559 if (!Subtarget.useRVVForFixedLengthVectors())
2560 return false;
2561
2562 // We only support a set of vector types with a consistent maximum fixed size
2563 // across all supported vector element types to avoid legalization issues.
2564 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2565 // fixed-length vector type we support is 1024 bytes.
2566 if (VT.getFixedSizeInBits() > 1024 * 8)
2567 return false;
2568
2569 unsigned MinVLen = Subtarget.getRealMinVLen();
2570
2571 MVT EltVT = VT.getVectorElementType();
2572
2573 // Don't use RVV for vectors we cannot scalarize if required.
2574 switch (EltVT.SimpleTy) {
2575 // i1 is supported but has different rules.
2576 default:
2577 return false;
2578 case MVT::i1:
2579 // Masks can only use a single register.
2580 if (VT.getVectorNumElements() > MinVLen)
2581 return false;
2582 MinVLen /= 8;
2583 break;
2584 case MVT::i8:
2585 case MVT::i16:
2586 case MVT::i32:
2587 break;
2588 case MVT::i64:
2589 if (!Subtarget.hasVInstructionsI64())
2590 return false;
2591 break;
2592 case MVT::f16:
2593 if (!Subtarget.hasVInstructionsF16Minimal())
2594 return false;
2595 break;
2596 case MVT::bf16:
2597 if (!Subtarget.hasVInstructionsBF16())
2598 return false;
2599 break;
2600 case MVT::f32:
2601 if (!Subtarget.hasVInstructionsF32())
2602 return false;
2603 break;
2604 case MVT::f64:
2605 if (!Subtarget.hasVInstructionsF64())
2606 return false;
2607 break;
2608 }
2609
2610 // Reject elements larger than ELEN.
2611 if (EltVT.getSizeInBits() > Subtarget.getELen())
2612 return false;
2613
2614 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2615 // Don't use RVV for types that don't fit.
2616 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2617 return false;
2618
2619 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2620 // the base fixed length RVV support in place.
2621 if (!VT.isPow2VectorType())
2622 return false;
2623
2624 return true;
2625}
2626
2627bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2628 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2629}
2630
2631// Return the largest legal scalable vector type that matches VT's element type.
2633 const RISCVSubtarget &Subtarget) {
2634 // This may be called before legal types are setup.
2635 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2636 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2637 "Expected legal fixed length vector!");
2638
2639 unsigned MinVLen = Subtarget.getRealMinVLen();
2640 unsigned MaxELen = Subtarget.getELen();
2641
2642 MVT EltVT = VT.getVectorElementType();
2643 switch (EltVT.SimpleTy) {
2644 default:
2645 llvm_unreachable("unexpected element type for RVV container");
2646 case MVT::i1:
2647 case MVT::i8:
2648 case MVT::i16:
2649 case MVT::i32:
2650 case MVT::i64:
2651 case MVT::bf16:
2652 case MVT::f16:
2653 case MVT::f32:
2654 case MVT::f64: {
2655 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2656 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2657 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2658 unsigned NumElts =
2660 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2661 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2662 return MVT::getScalableVectorVT(EltVT, NumElts);
2663 }
2664 }
2665}
2666
2668 const RISCVSubtarget &Subtarget) {
2670 Subtarget);
2671}
2672
2674 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2675}
2676
2677// Grow V to consume an entire RVV register.
2679 const RISCVSubtarget &Subtarget) {
2680 assert(VT.isScalableVector() &&
2681 "Expected to convert into a scalable vector!");
2682 assert(V.getValueType().isFixedLengthVector() &&
2683 "Expected a fixed length vector operand!");
2684 SDLoc DL(V);
2685 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2686 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2687}
2688
2689// Shrink V so it's just big enough to maintain a VT's worth of data.
2691 const RISCVSubtarget &Subtarget) {
2693 "Expected to convert into a fixed length vector!");
2694 assert(V.getValueType().isScalableVector() &&
2695 "Expected a scalable vector operand!");
2696 SDLoc DL(V);
2697 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2698 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2699}
2700
2701/// Return the type of the mask type suitable for masking the provided
2702/// vector type. This is simply an i1 element type vector of the same
2703/// (possibly scalable) length.
2704static MVT getMaskTypeFor(MVT VecVT) {
2705 assert(VecVT.isVector());
2707 return MVT::getVectorVT(MVT::i1, EC);
2708}
2709
2710/// Creates an all ones mask suitable for masking a vector of type VecTy with
2711/// vector length VL. .
2712static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2713 SelectionDAG &DAG) {
2714 MVT MaskVT = getMaskTypeFor(VecVT);
2715 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2716}
2717
2718static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2719 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2720 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2721 // canonicalize the representation. InsertVSETVLI will pick the immediate
2722 // encoding later if profitable.
2723 const auto [MinVLMAX, MaxVLMAX] =
2724 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2725 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2726 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2727
2728 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2729}
2730
2731static std::pair<SDValue, SDValue>
2733 const RISCVSubtarget &Subtarget) {
2734 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2735 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2736 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2737 return {Mask, VL};
2738}
2739
2740static std::pair<SDValue, SDValue>
2741getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2742 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2743 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2744 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2745 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2746 return {Mask, VL};
2747}
2748
2749// Gets the two common "VL" operands: an all-ones mask and the vector length.
2750// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2751// the vector type that the fixed-length vector is contained in. Otherwise if
2752// VecVT is scalable, then ContainerVT should be the same as VecVT.
2753static std::pair<SDValue, SDValue>
2754getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2755 const RISCVSubtarget &Subtarget) {
2756 if (VecVT.isFixedLengthVector())
2757 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2758 Subtarget);
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2761}
2762
2764 SelectionDAG &DAG) const {
2765 assert(VecVT.isScalableVector() && "Expected scalable vector");
2766 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2767 VecVT.getVectorElementCount());
2768}
2769
2770std::pair<unsigned, unsigned>
2772 const RISCVSubtarget &Subtarget) {
2773 assert(VecVT.isScalableVector() && "Expected scalable vector");
2774
2775 unsigned EltSize = VecVT.getScalarSizeInBits();
2776 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2777
2778 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2779 unsigned MaxVLMAX =
2780 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2781
2782 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2783 unsigned MinVLMAX =
2784 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2785
2786 return std::make_pair(MinVLMAX, MaxVLMAX);
2787}
2788
2789// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2790// of either is (currently) supported. This can get us into an infinite loop
2791// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2792// as a ..., etc.
2793// Until either (or both) of these can reliably lower any node, reporting that
2794// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2795// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2796// which is not desirable.
2798 EVT VT, unsigned DefinedValues) const {
2799 return false;
2800}
2801
2803 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2804 // implementation-defined.
2805 if (!VT.isVector())
2807 unsigned DLenFactor = Subtarget.getDLenFactor();
2808 unsigned Cost;
2809 if (VT.isScalableVector()) {
2810 unsigned LMul;
2811 bool Fractional;
2812 std::tie(LMul, Fractional) =
2814 if (Fractional)
2815 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2816 else
2817 Cost = (LMul * DLenFactor);
2818 } else {
2819 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2820 }
2821 return Cost;
2822}
2823
2824
2825/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2826/// is generally quadratic in the number of vreg implied by LMUL. Note that
2827/// operand (index and possibly mask) are handled separately.
2829 return getLMULCost(VT) * getLMULCost(VT);
2830}
2831
2832/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2833/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2834/// or may track the vrgather.vv cost. It is implementation-dependent.
2836 return getLMULCost(VT);
2837}
2838
2839/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2840/// for the type VT. (This does not cover the vslide1up or vslide1down
2841/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2842/// or may track the vrgather.vv cost. It is implementation-dependent.
2844 return getLMULCost(VT);
2845}
2846
2847/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2848/// for the type VT. (This does not cover the vslide1up or vslide1down
2849/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2856 const RISCVSubtarget &Subtarget) {
2857 // RISC-V FP-to-int conversions saturate to the destination register size, but
2858 // don't produce 0 for nan. We can use a conversion instruction and fix the
2859 // nan case with a compare and a select.
2860 SDValue Src = Op.getOperand(0);
2861
2862 MVT DstVT = Op.getSimpleValueType();
2863 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2864
2865 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2866
2867 if (!DstVT.isVector()) {
2868 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2869 // the result.
2870 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2871 Src.getValueType() == MVT::bf16) {
2872 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2873 }
2874
2875 unsigned Opc;
2876 if (SatVT == DstVT)
2877 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2878 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2880 else
2881 return SDValue();
2882 // FIXME: Support other SatVTs by clamping before or after the conversion.
2883
2884 SDLoc DL(Op);
2885 SDValue FpToInt = DAG.getNode(
2886 Opc, DL, DstVT, Src,
2888
2889 if (Opc == RISCVISD::FCVT_WU_RV64)
2890 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2891
2892 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2893 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2895 }
2896
2897 // Vectors.
2898
2899 MVT DstEltVT = DstVT.getVectorElementType();
2900 MVT SrcVT = Src.getSimpleValueType();
2901 MVT SrcEltVT = SrcVT.getVectorElementType();
2902 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2903 unsigned DstEltSize = DstEltVT.getSizeInBits();
2904
2905 // Only handle saturating to the destination type.
2906 if (SatVT != DstEltVT)
2907 return SDValue();
2908
2909 // FIXME: Don't support narrowing by more than 1 steps for now.
2910 if (SrcEltSize > (2 * DstEltSize))
2911 return SDValue();
2912
2913 MVT DstContainerVT = DstVT;
2914 MVT SrcContainerVT = SrcVT;
2915 if (DstVT.isFixedLengthVector()) {
2916 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2917 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2918 assert(DstContainerVT.getVectorElementCount() ==
2919 SrcContainerVT.getVectorElementCount() &&
2920 "Expected same element count");
2921 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2922 }
2923
2924 SDLoc DL(Op);
2925
2926 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2927
2928 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2929 {Src, Src, DAG.getCondCode(ISD::SETNE),
2930 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2931
2932 // Need to widen by more than 1 step, promote the FP type, then do a widening
2933 // convert.
2934 if (DstEltSize > (2 * SrcEltSize)) {
2935 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2936 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2937 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2938 }
2939
2940 unsigned RVVOpc =
2942 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2943
2944 SDValue SplatZero = DAG.getNode(
2945 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2946 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2947 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2948 Res, DAG.getUNDEF(DstContainerVT), VL);
2949
2950 if (DstVT.isFixedLengthVector())
2951 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2952
2953 return Res;
2954}
2955
2957 switch (Opc) {
2958 case ISD::FROUNDEVEN:
2960 case ISD::VP_FROUNDEVEN:
2961 return RISCVFPRndMode::RNE;
2962 case ISD::FTRUNC:
2963 case ISD::STRICT_FTRUNC:
2964 case ISD::VP_FROUNDTOZERO:
2965 return RISCVFPRndMode::RTZ;
2966 case ISD::FFLOOR:
2967 case ISD::STRICT_FFLOOR:
2968 case ISD::VP_FFLOOR:
2969 return RISCVFPRndMode::RDN;
2970 case ISD::FCEIL:
2971 case ISD::STRICT_FCEIL:
2972 case ISD::VP_FCEIL:
2973 return RISCVFPRndMode::RUP;
2974 case ISD::FROUND:
2975 case ISD::STRICT_FROUND:
2976 case ISD::VP_FROUND:
2977 return RISCVFPRndMode::RMM;
2978 case ISD::FRINT:
2979 return RISCVFPRndMode::DYN;
2980 }
2981
2983}
2984
2985// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2986// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2987// the integer domain and back. Taking care to avoid converting values that are
2988// nan or already correct.
2989static SDValue
2991 const RISCVSubtarget &Subtarget) {
2992 MVT VT = Op.getSimpleValueType();
2993 assert(VT.isVector() && "Unexpected type");
2994
2995 SDLoc DL(Op);
2996
2997 SDValue Src = Op.getOperand(0);
2998
2999 MVT ContainerVT = VT;
3000 if (VT.isFixedLengthVector()) {
3001 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3002 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3003 }
3004
3005 SDValue Mask, VL;
3006 if (Op->isVPOpcode()) {
3007 Mask = Op.getOperand(1);
3008 if (VT.isFixedLengthVector())
3009 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3010 Subtarget);
3011 VL = Op.getOperand(2);
3012 } else {
3013 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3014 }
3015
3016 // Freeze the source since we are increasing the number of uses.
3017 Src = DAG.getFreeze(Src);
3018
3019 // We do the conversion on the absolute value and fix the sign at the end.
3020 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3021
3022 // Determine the largest integer that can be represented exactly. This and
3023 // values larger than it don't have any fractional bits so don't need to
3024 // be converted.
3025 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3026 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3027 APFloat MaxVal = APFloat(FltSem);
3028 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3029 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3030 SDValue MaxValNode =
3031 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3032 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3033 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3034
3035 // If abs(Src) was larger than MaxVal or nan, keep it.
3036 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3037 Mask =
3038 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3039 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3040 Mask, Mask, VL});
3041
3042 // Truncate to integer and convert back to FP.
3043 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3044 MVT XLenVT = Subtarget.getXLenVT();
3045 SDValue Truncated;
3046
3047 switch (Op.getOpcode()) {
3048 default:
3049 llvm_unreachable("Unexpected opcode");
3050 case ISD::FCEIL:
3051 case ISD::VP_FCEIL:
3052 case ISD::FFLOOR:
3053 case ISD::VP_FFLOOR:
3054 case ISD::FROUND:
3055 case ISD::FROUNDEVEN:
3056 case ISD::VP_FROUND:
3057 case ISD::VP_FROUNDEVEN:
3058 case ISD::VP_FROUNDTOZERO: {
3061 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3062 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3063 break;
3064 }
3065 case ISD::FTRUNC:
3066 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3067 Mask, VL);
3068 break;
3069 case ISD::FRINT:
3070 case ISD::VP_FRINT:
3071 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3072 break;
3073 case ISD::FNEARBYINT:
3074 case ISD::VP_FNEARBYINT:
3075 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3076 Mask, VL);
3077 break;
3078 }
3079
3080 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3081 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3082 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3083 Mask, VL);
3084
3085 // Restore the original sign so that -0.0 is preserved.
3086 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3087 Src, Src, Mask, VL);
3088
3089 if (!VT.isFixedLengthVector())
3090 return Truncated;
3091
3092 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3093}
3094
3095// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3096// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3097// qNan and coverting the new source to integer and back to FP.
3098static SDValue
3100 const RISCVSubtarget &Subtarget) {
3101 SDLoc DL(Op);
3102 MVT VT = Op.getSimpleValueType();
3103 SDValue Chain = Op.getOperand(0);
3104 SDValue Src = Op.getOperand(1);
3105
3106 MVT ContainerVT = VT;
3107 if (VT.isFixedLengthVector()) {
3108 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3109 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3110 }
3111
3112 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3113
3114 // Freeze the source since we are increasing the number of uses.
3115 Src = DAG.getFreeze(Src);
3116
3117 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3118 MVT MaskVT = Mask.getSimpleValueType();
3120 DAG.getVTList(MaskVT, MVT::Other),
3121 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3122 DAG.getUNDEF(MaskVT), Mask, VL});
3123 Chain = Unorder.getValue(1);
3125 DAG.getVTList(ContainerVT, MVT::Other),
3126 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3127 Chain = Src.getValue(1);
3128
3129 // We do the conversion on the absolute value and fix the sign at the end.
3130 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3131
3132 // Determine the largest integer that can be represented exactly. This and
3133 // values larger than it don't have any fractional bits so don't need to
3134 // be converted.
3135 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3136 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3137 APFloat MaxVal = APFloat(FltSem);
3138 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3139 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3140 SDValue MaxValNode =
3141 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3142 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3143 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3144
3145 // If abs(Src) was larger than MaxVal or nan, keep it.
3146 Mask = DAG.getNode(
3147 RISCVISD::SETCC_VL, DL, MaskVT,
3148 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3149
3150 // Truncate to integer and convert back to FP.
3151 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3152 MVT XLenVT = Subtarget.getXLenVT();
3153 SDValue Truncated;
3154
3155 switch (Op.getOpcode()) {
3156 default:
3157 llvm_unreachable("Unexpected opcode");
3158 case ISD::STRICT_FCEIL:
3159 case ISD::STRICT_FFLOOR:
3160 case ISD::STRICT_FROUND:
3164 Truncated = DAG.getNode(
3165 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3166 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3167 break;
3168 }
3169 case ISD::STRICT_FTRUNC:
3170 Truncated =
3172 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3173 break;
3176 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3177 Mask, VL);
3178 break;
3179 }
3180 Chain = Truncated.getValue(1);
3181
3182 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3183 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3184 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3185 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3186 Truncated, Mask, VL);
3187 Chain = Truncated.getValue(1);
3188 }
3189
3190 // Restore the original sign so that -0.0 is preserved.
3191 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3192 Src, Src, Mask, VL);
3193
3194 if (VT.isFixedLengthVector())
3195 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3196 return DAG.getMergeValues({Truncated, Chain}, DL);
3197}
3198
3199static SDValue
3201 const RISCVSubtarget &Subtarget) {
3202 MVT VT = Op.getSimpleValueType();
3203 if (VT.isVector())
3204 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3205
3206 if (DAG.shouldOptForSize())
3207 return SDValue();
3208
3209 SDLoc DL(Op);
3210 SDValue Src = Op.getOperand(0);
3211
3212 // Create an integer the size of the mantissa with the MSB set. This and all
3213 // values larger than it don't have any fractional bits so don't need to be
3214 // converted.
3215 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3216 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3217 APFloat MaxVal = APFloat(FltSem);
3218 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3219 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3220 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3221
3223 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3224 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3225}
3226
3227// Expand vector LRINT and LLRINT by converting to the integer domain.
3229 const RISCVSubtarget &Subtarget) {
3230 MVT VT = Op.getSimpleValueType();
3231 assert(VT.isVector() && "Unexpected type");
3232
3233 SDLoc DL(Op);
3234 SDValue Src = Op.getOperand(0);
3235 MVT ContainerVT = VT;
3236
3237 if (VT.isFixedLengthVector()) {
3238 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3239 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3240 }
3241
3242 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3243 SDValue Truncated =
3244 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3245
3246 if (!VT.isFixedLengthVector())
3247 return Truncated;
3248
3249 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3250}
3251
3252static SDValue
3254 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3255 SDValue Offset, SDValue Mask, SDValue VL,
3257 if (Merge.isUndef())
3259 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3260 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3261 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3262}
3263
3264static SDValue
3265getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3267 SDValue VL,
3269 if (Merge.isUndef())
3271 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3272 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3273 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3274}
3275
3276static MVT getLMUL1VT(MVT VT) {
3278 "Unexpected vector MVT");
3282}
3283
3287 int64_t Addend;
3288};
3289
3290static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3292 // We will use a SINT_TO_FP to materialize this constant so we should use a
3293 // signed APSInt here.
3294 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3295 // We use an arbitrary rounding mode here. If a floating-point is an exact
3296 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3297 // the rounding mode changes the output value, then it is not an exact
3298 // integer.
3300 bool IsExact;
3301 // If it is out of signed integer range, it will return an invalid operation.
3302 // If it is not an exact integer, IsExact is false.
3303 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3305 !IsExact)
3306 return std::nullopt;
3307 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3308}
3309
3310// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3311// to the (non-zero) step S and start value X. This can be then lowered as the
3312// RVV sequence (VID * S) + X, for example.
3313// The step S is represented as an integer numerator divided by a positive
3314// denominator. Note that the implementation currently only identifies
3315// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3316// cannot detect 2/3, for example.
3317// Note that this method will also match potentially unappealing index
3318// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3319// determine whether this is worth generating code for.
3320static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3321 unsigned EltSizeInBits) {
3322 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3323 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3324 return std::nullopt;
3325 bool IsInteger = Op.getValueType().isInteger();
3326
3327 std::optional<unsigned> SeqStepDenom;
3328 std::optional<int64_t> SeqStepNum, SeqAddend;
3329 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3330 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3331
3332 // First extract the ops into a list of constant integer values. This may not
3333 // be possible for floats if they're not all representable as integers.
3335 const unsigned OpSize = Op.getScalarValueSizeInBits();
3336 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3337 if (Elt.isUndef()) {
3338 Elts[Idx] = std::nullopt;
3339 continue;
3340 }
3341 if (IsInteger) {
3342 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3343 } else {
3344 auto ExactInteger =
3345 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3346 if (!ExactInteger)
3347 return std::nullopt;
3348 Elts[Idx] = *ExactInteger;
3349 }
3350 }
3351
3352 for (auto [Idx, Elt] : enumerate(Elts)) {
3353 // Assume undef elements match the sequence; we just have to be careful
3354 // when interpolating across them.
3355 if (!Elt)
3356 continue;
3357
3358 if (PrevElt) {
3359 // Calculate the step since the last non-undef element, and ensure
3360 // it's consistent across the entire sequence.
3361 unsigned IdxDiff = Idx - PrevElt->second;
3362 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3363
3364 // A zero-value value difference means that we're somewhere in the middle
3365 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3366 // step change before evaluating the sequence.
3367 if (ValDiff == 0)
3368 continue;
3369
3370 int64_t Remainder = ValDiff % IdxDiff;
3371 // Normalize the step if it's greater than 1.
3372 if (Remainder != ValDiff) {
3373 // The difference must cleanly divide the element span.
3374 if (Remainder != 0)
3375 return std::nullopt;
3376 ValDiff /= IdxDiff;
3377 IdxDiff = 1;
3378 }
3379
3380 if (!SeqStepNum)
3381 SeqStepNum = ValDiff;
3382 else if (ValDiff != SeqStepNum)
3383 return std::nullopt;
3384
3385 if (!SeqStepDenom)
3386 SeqStepDenom = IdxDiff;
3387 else if (IdxDiff != *SeqStepDenom)
3388 return std::nullopt;
3389 }
3390
3391 // Record this non-undef element for later.
3392 if (!PrevElt || PrevElt->first != *Elt)
3393 PrevElt = std::make_pair(*Elt, Idx);
3394 }
3395
3396 // We need to have logged a step for this to count as a legal index sequence.
3397 if (!SeqStepNum || !SeqStepDenom)
3398 return std::nullopt;
3399
3400 // Loop back through the sequence and validate elements we might have skipped
3401 // while waiting for a valid step. While doing this, log any sequence addend.
3402 for (auto [Idx, Elt] : enumerate(Elts)) {
3403 if (!Elt)
3404 continue;
3405 uint64_t ExpectedVal =
3406 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3407 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3408 if (!SeqAddend)
3409 SeqAddend = Addend;
3410 else if (Addend != SeqAddend)
3411 return std::nullopt;
3412 }
3413
3414 assert(SeqAddend && "Must have an addend if we have a step");
3415
3416 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3417}
3418
3419// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3420// and lower it as a VRGATHER_VX_VL from the source vector.
3421static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3422 SelectionDAG &DAG,
3423 const RISCVSubtarget &Subtarget) {
3424 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3425 return SDValue();
3426 SDValue Vec = SplatVal.getOperand(0);
3427 // Only perform this optimization on vectors of the same size for simplicity.
3428 // Don't perform this optimization for i1 vectors.
3429 // FIXME: Support i1 vectors, maybe by promoting to i8?
3430 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3431 return SDValue();
3432 SDValue Idx = SplatVal.getOperand(1);
3433 // The index must be a legal type.
3434 if (Idx.getValueType() != Subtarget.getXLenVT())
3435 return SDValue();
3436
3437 MVT ContainerVT = VT;
3438 if (VT.isFixedLengthVector()) {
3439 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3440 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3441 }
3442
3443 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3444
3445 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3446 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3447
3448 if (!VT.isFixedLengthVector())
3449 return Gather;
3450
3451 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3452}
3453
3454
3455/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3456/// which constitute a large proportion of the elements. In such cases we can
3457/// splat a vector with the dominant element and make up the shortfall with
3458/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3459/// Note that this includes vectors of 2 elements by association. The
3460/// upper-most element is the "dominant" one, allowing us to use a splat to
3461/// "insert" the upper element, and an insert of the lower element at position
3462/// 0, which improves codegen.
3464 const RISCVSubtarget &Subtarget) {
3465 MVT VT = Op.getSimpleValueType();
3466 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3467
3468 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3469
3470 SDLoc DL(Op);
3471 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3472
3473 MVT XLenVT = Subtarget.getXLenVT();
3474 unsigned NumElts = Op.getNumOperands();
3475
3476 SDValue DominantValue;
3477 unsigned MostCommonCount = 0;
3478 DenseMap<SDValue, unsigned> ValueCounts;
3479 unsigned NumUndefElts =
3480 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3481
3482 // Track the number of scalar loads we know we'd be inserting, estimated as
3483 // any non-zero floating-point constant. Other kinds of element are either
3484 // already in registers or are materialized on demand. The threshold at which
3485 // a vector load is more desirable than several scalar materializion and
3486 // vector-insertion instructions is not known.
3487 unsigned NumScalarLoads = 0;
3488
3489 for (SDValue V : Op->op_values()) {
3490 if (V.isUndef())
3491 continue;
3492
3493 ValueCounts.insert(std::make_pair(V, 0));
3494 unsigned &Count = ValueCounts[V];
3495 if (0 == Count)
3496 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3497 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3498
3499 // Is this value dominant? In case of a tie, prefer the highest element as
3500 // it's cheaper to insert near the beginning of a vector than it is at the
3501 // end.
3502 if (++Count >= MostCommonCount) {
3503 DominantValue = V;
3504 MostCommonCount = Count;
3505 }
3506 }
3507
3508 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3509 unsigned NumDefElts = NumElts - NumUndefElts;
3510 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3511
3512 // Don't perform this optimization when optimizing for size, since
3513 // materializing elements and inserting them tends to cause code bloat.
3514 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3515 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3516 ((MostCommonCount > DominantValueCountThreshold) ||
3517 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3518 // Start by splatting the most common element.
3519 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3520
3521 DenseSet<SDValue> Processed{DominantValue};
3522
3523 // We can handle an insert into the last element (of a splat) via
3524 // v(f)slide1down. This is slightly better than the vslideup insert
3525 // lowering as it avoids the need for a vector group temporary. It
3526 // is also better than using vmerge.vx as it avoids the need to
3527 // materialize the mask in a vector register.
3528 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3529 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3530 LastOp != DominantValue) {
3531 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3532 auto OpCode =
3534 if (!VT.isFloatingPoint())
3535 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3536 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3537 LastOp, Mask, VL);
3538 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3539 Processed.insert(LastOp);
3540 }
3541
3542 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3543 for (const auto &OpIdx : enumerate(Op->ops())) {
3544 const SDValue &V = OpIdx.value();
3545 if (V.isUndef() || !Processed.insert(V).second)
3546 continue;
3547 if (ValueCounts[V] == 1) {
3548 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3549 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3550 } else {
3551 // Blend in all instances of this value using a VSELECT, using a
3552 // mask where each bit signals whether that element is the one
3553 // we're after.
3555 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3556 return DAG.getConstant(V == V1, DL, XLenVT);
3557 });
3558 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3559 DAG.getBuildVector(SelMaskTy, DL, Ops),
3560 DAG.getSplatBuildVector(VT, DL, V), Vec);
3561 }
3562 }
3563
3564 return Vec;
3565 }
3566
3567 return SDValue();
3568}
3569
3571 const RISCVSubtarget &Subtarget) {
3572 MVT VT = Op.getSimpleValueType();
3573 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3574
3575 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3576
3577 SDLoc DL(Op);
3578 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3579
3580 MVT XLenVT = Subtarget.getXLenVT();
3581 unsigned NumElts = Op.getNumOperands();
3582
3583 if (VT.getVectorElementType() == MVT::i1) {
3584 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3585 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3586 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3587 }
3588
3589 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3590 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3591 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3592 }
3593
3594 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3595 // scalar integer chunks whose bit-width depends on the number of mask
3596 // bits and XLEN.
3597 // First, determine the most appropriate scalar integer type to use. This
3598 // is at most XLenVT, but may be shrunk to a smaller vector element type
3599 // according to the size of the final vector - use i8 chunks rather than
3600 // XLenVT if we're producing a v8i1. This results in more consistent
3601 // codegen across RV32 and RV64.
3602 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3603 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3604 // If we have to use more than one INSERT_VECTOR_ELT then this
3605 // optimization is likely to increase code size; avoid peforming it in
3606 // such a case. We can use a load from a constant pool in this case.
3607 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3608 return SDValue();
3609 // Now we can create our integer vector type. Note that it may be larger
3610 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3611 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3612 MVT IntegerViaVecVT =
3613 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3614 IntegerViaVecElts);
3615
3616 uint64_t Bits = 0;
3617 unsigned BitPos = 0, IntegerEltIdx = 0;
3618 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3619
3620 for (unsigned I = 0; I < NumElts;) {
3621 SDValue V = Op.getOperand(I);
3622 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3623 Bits |= ((uint64_t)BitValue << BitPos);
3624 ++BitPos;
3625 ++I;
3626
3627 // Once we accumulate enough bits to fill our scalar type or process the
3628 // last element, insert into our vector and clear our accumulated data.
3629 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3630 if (NumViaIntegerBits <= 32)
3631 Bits = SignExtend64<32>(Bits);
3632 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3633 Elts[IntegerEltIdx] = Elt;
3634 Bits = 0;
3635 BitPos = 0;
3636 IntegerEltIdx++;
3637 }
3638 }
3639
3640 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3641
3642 if (NumElts < NumViaIntegerBits) {
3643 // If we're producing a smaller vector than our minimum legal integer
3644 // type, bitcast to the equivalent (known-legal) mask type, and extract
3645 // our final mask.
3646 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3647 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3648 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3649 DAG.getConstant(0, DL, XLenVT));
3650 } else {
3651 // Else we must have produced an integer type with the same size as the
3652 // mask type; bitcast for the final result.
3653 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3654 Vec = DAG.getBitcast(VT, Vec);
3655 }
3656
3657 return Vec;
3658 }
3659
3660 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3661 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3663 if (!VT.isFloatingPoint())
3664 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3665 Splat =
3666 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3667 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3668 }
3669
3670 // Try and match index sequences, which we can lower to the vid instruction
3671 // with optional modifications. An all-undef vector is matched by
3672 // getSplatValue, above.
3673 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3674 int64_t StepNumerator = SimpleVID->StepNumerator;
3675 unsigned StepDenominator = SimpleVID->StepDenominator;
3676 int64_t Addend = SimpleVID->Addend;
3677
3678 assert(StepNumerator != 0 && "Invalid step");
3679 bool Negate = false;
3680 int64_t SplatStepVal = StepNumerator;
3681 unsigned StepOpcode = ISD::MUL;
3682 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3683 // anyway as the shift of 63 won't fit in uimm5.
3684 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3685 isPowerOf2_64(std::abs(StepNumerator))) {
3686 Negate = StepNumerator < 0;
3687 StepOpcode = ISD::SHL;
3688 SplatStepVal = Log2_64(std::abs(StepNumerator));
3689 }
3690
3691 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3692 // threshold since it's the immediate value many RVV instructions accept.
3693 // There is no vmul.vi instruction so ensure multiply constant can fit in
3694 // a single addi instruction.
3695 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3696 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3697 isPowerOf2_32(StepDenominator) &&
3698 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3699 MVT VIDVT =
3701 MVT VIDContainerVT =
3702 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3703 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3704 // Convert right out of the scalable type so we can use standard ISD
3705 // nodes for the rest of the computation. If we used scalable types with
3706 // these, we'd lose the fixed-length vector info and generate worse
3707 // vsetvli code.
3708 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3709 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3710 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3711 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3712 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3713 }
3714 if (StepDenominator != 1) {
3715 SDValue SplatStep =
3716 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3717 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3718 }
3719 if (Addend != 0 || Negate) {
3720 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3721 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3722 VID);
3723 }
3724 if (VT.isFloatingPoint()) {
3725 // TODO: Use vfwcvt to reduce register pressure.
3726 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3727 }
3728 return VID;
3729 }
3730 }
3731
3732 // For very small build_vectors, use a single scalar insert of a constant.
3733 // TODO: Base this on constant rematerialization cost, not size.
3734 const unsigned EltBitSize = VT.getScalarSizeInBits();
3735 if (VT.getSizeInBits() <= 32 &&
3737 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3738 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3739 "Unexpected sequence type");
3740 // If we can use the original VL with the modified element type, this
3741 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3742 // be moved into InsertVSETVLI?
3743 unsigned ViaVecLen =
3744 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3745 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3746
3747 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3748 uint64_t SplatValue = 0;
3749 // Construct the amalgamated value at this larger vector type.
3750 for (const auto &OpIdx : enumerate(Op->op_values())) {
3751 const auto &SeqV = OpIdx.value();
3752 if (!SeqV.isUndef())
3753 SplatValue |=
3754 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3755 }
3756
3757 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3758 // achieve better constant materializion.
3759 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3760 SplatValue = SignExtend64<32>(SplatValue);
3761
3762 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3763 DAG.getUNDEF(ViaVecVT),
3764 DAG.getConstant(SplatValue, DL, XLenVT),
3765 DAG.getVectorIdxConstant(0, DL));
3766 if (ViaVecLen != 1)
3768 MVT::getVectorVT(ViaIntVT, 1), Vec,
3769 DAG.getConstant(0, DL, XLenVT));
3770 return DAG.getBitcast(VT, Vec);
3771 }
3772
3773
3774 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3775 // when re-interpreted as a vector with a larger element type. For example,
3776 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3777 // could be instead splat as
3778 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3779 // TODO: This optimization could also work on non-constant splats, but it
3780 // would require bit-manipulation instructions to construct the splat value.
3781 SmallVector<SDValue> Sequence;
3782 const auto *BV = cast<BuildVectorSDNode>(Op);
3783 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3785 BV->getRepeatedSequence(Sequence) &&
3786 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3787 unsigned SeqLen = Sequence.size();
3788 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3789 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3790 ViaIntVT == MVT::i64) &&
3791 "Unexpected sequence type");
3792
3793 // If we can use the original VL with the modified element type, this
3794 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3795 // be moved into InsertVSETVLI?
3796 const unsigned RequiredVL = NumElts / SeqLen;
3797 const unsigned ViaVecLen =
3798 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3799 NumElts : RequiredVL;
3800 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3801
3802 unsigned EltIdx = 0;
3803 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3804 uint64_t SplatValue = 0;
3805 // Construct the amalgamated value which can be splatted as this larger
3806 // vector type.
3807 for (const auto &SeqV : Sequence) {
3808 if (!SeqV.isUndef())
3809 SplatValue |=
3810 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3811 EltIdx++;
3812 }
3813
3814 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3815 // achieve better constant materializion.
3816 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3817 SplatValue = SignExtend64<32>(SplatValue);
3818
3819 // Since we can't introduce illegal i64 types at this stage, we can only
3820 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3821 // way we can use RVV instructions to splat.
3822 assert((ViaIntVT.bitsLE(XLenVT) ||
3823 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3824 "Unexpected bitcast sequence");
3825 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3826 SDValue ViaVL =
3827 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3828 MVT ViaContainerVT =
3829 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3830 SDValue Splat =
3831 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3832 DAG.getUNDEF(ViaContainerVT),
3833 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3834 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3835 if (ViaVecLen != RequiredVL)
3837 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3838 DAG.getConstant(0, DL, XLenVT));
3839 return DAG.getBitcast(VT, Splat);
3840 }
3841 }
3842
3843 // If the number of signbits allows, see if we can lower as a <N x i8>.
3844 // Our main goal here is to reduce LMUL (and thus work) required to
3845 // build the constant, but we will also narrow if the resulting
3846 // narrow vector is known to materialize cheaply.
3847 // TODO: We really should be costing the smaller vector. There are
3848 // profitable cases this misses.
3849 if (EltBitSize > 8 && VT.isInteger() &&
3850 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3851 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3852 if (EltBitSize - SignBits < 8) {
3853 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3854 DL, Op->ops());
3855 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3856 Source, DAG, Subtarget);
3857 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3858 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3859 }
3860 }
3861
3862 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3863 return Res;
3864
3865 // For constant vectors, use generic constant pool lowering. Otherwise,
3866 // we'd have to materialize constants in GPRs just to move them into the
3867 // vector.
3868 return SDValue();
3869}
3870
3872 const RISCVSubtarget &Subtarget) {
3873 MVT VT = Op.getSimpleValueType();
3874 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3875
3876 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3878 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3879
3880 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3881
3882 SDLoc DL(Op);
3883 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3884
3885 MVT XLenVT = Subtarget.getXLenVT();
3886
3887 if (VT.getVectorElementType() == MVT::i1) {
3888 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3889 // vector type, we have a legal equivalently-sized i8 type, so we can use
3890 // that.
3891 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3892 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3893
3894 SDValue WideVec;
3895 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3896 // For a splat, perform a scalar truncate before creating the wider
3897 // vector.
3898 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3899 DAG.getConstant(1, DL, Splat.getValueType()));
3900 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3901 } else {
3902 SmallVector<SDValue, 8> Ops(Op->op_values());
3903 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3904 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3905 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3906 }
3907
3908 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3909 }
3910
3911 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3912 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3913 return Gather;
3914 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3916 if (!VT.isFloatingPoint())
3917 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3918 Splat =
3919 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3920 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3921 }
3922
3923 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3924 return Res;
3925
3926 // If we're compiling for an exact VLEN value, we can split our work per
3927 // register in the register group.
3928 if (const auto VLen = Subtarget.getRealVLen();
3929 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3930 MVT ElemVT = VT.getVectorElementType();
3931 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3932 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3933 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3934 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3935 assert(M1VT == getLMUL1VT(M1VT));
3936
3937 // The following semantically builds up a fixed length concat_vector
3938 // of the component build_vectors. We eagerly lower to scalable and
3939 // insert_subvector here to avoid DAG combining it back to a large
3940 // build_vector.
3941 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3942 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3943 SDValue Vec = DAG.getUNDEF(ContainerVT);
3944 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3945 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3946 SDValue SubBV =
3947 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3948 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3949 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3950 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3951 DAG.getVectorIdxConstant(InsertIdx, DL));
3952 }
3953 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3954 }
3955
3956 // For m1 vectors, if we have non-undef values in both halves of our vector,
3957 // split the vector into low and high halves, build them separately, then
3958 // use a vselect to combine them. For long vectors, this cuts the critical
3959 // path of the vslide1down sequence in half, and gives us an opportunity
3960 // to special case each half independently. Note that we don't change the
3961 // length of the sub-vectors here, so if both fallback to the generic
3962 // vslide1down path, we should be able to fold the vselect into the final
3963 // vslidedown (for the undef tail) for the first half w/ masking.
3964 unsigned NumElts = VT.getVectorNumElements();
3965 unsigned NumUndefElts =
3966 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3967 unsigned NumDefElts = NumElts - NumUndefElts;
3968 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3969 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3970 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3971 SmallVector<SDValue> MaskVals;
3972 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3973 SubVecAOps.reserve(NumElts);
3974 SubVecBOps.reserve(NumElts);
3975 for (unsigned i = 0; i < NumElts; i++) {
3976 SDValue Elem = Op->getOperand(i);
3977 if (i < NumElts / 2) {
3978 SubVecAOps.push_back(Elem);
3979 SubVecBOps.push_back(UndefElem);
3980 } else {
3981 SubVecAOps.push_back(UndefElem);
3982 SubVecBOps.push_back(Elem);
3983 }
3984 bool SelectMaskVal = (i < NumElts / 2);
3985 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3986 }
3987 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3988 MaskVals.size() == NumElts);
3989
3990 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3991 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3992 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3993 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3994 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3995 }
3996
3997 // Cap the cost at a value linear to the number of elements in the vector.
3998 // The default lowering is to use the stack. The vector store + scalar loads
3999 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4000 // being (at least) linear in LMUL. As a result, using the vslidedown
4001 // lowering for every element ends up being VL*LMUL..
4002 // TODO: Should we be directly costing the stack alternative? Doing so might
4003 // give us a more accurate upper bound.
4004 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4005
4006 // TODO: unify with TTI getSlideCost.
4007 InstructionCost PerSlideCost = 1;
4008 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4009 default: break;
4011 PerSlideCost = 2;
4012 break;
4014 PerSlideCost = 4;
4015 break;
4017 PerSlideCost = 8;
4018 break;
4019 }
4020
4021 // TODO: Should we be using the build instseq then cost + evaluate scheme
4022 // we use for integer constants here?
4023 unsigned UndefCount = 0;
4024 for (const SDValue &V : Op->ops()) {
4025 if (V.isUndef()) {
4026 UndefCount++;
4027 continue;
4028 }
4029 if (UndefCount) {
4030 LinearBudget -= PerSlideCost;
4031 UndefCount = 0;
4032 }
4033 LinearBudget -= PerSlideCost;
4034 }
4035 if (UndefCount) {
4036 LinearBudget -= PerSlideCost;
4037 }
4038
4039 if (LinearBudget < 0)
4040 return SDValue();
4041
4042 assert((!VT.isFloatingPoint() ||
4043 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4044 "Illegal type which will result in reserved encoding");
4045
4046 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4047
4048 SDValue Vec;
4049 UndefCount = 0;
4050 for (SDValue V : Op->ops()) {
4051 if (V.isUndef()) {
4052 UndefCount++;
4053 continue;
4054 }
4055
4056 // Start our sequence with a TA splat in the hopes that hardware is able to
4057 // recognize there's no dependency on the prior value of our temporary
4058 // register.
4059 if (!Vec) {
4060 Vec = DAG.getSplatVector(VT, DL, V);
4061 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4062 UndefCount = 0;
4063 continue;
4064 }
4065
4066 if (UndefCount) {
4067 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4068 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4069 Vec, Offset, Mask, VL, Policy);
4070 UndefCount = 0;
4071 }
4072 auto OpCode =
4074 if (!VT.isFloatingPoint())
4075 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4076 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4077 V, Mask, VL);
4078 }
4079 if (UndefCount) {
4080 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4081 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4082 Vec, Offset, Mask, VL, Policy);
4083 }
4084 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4085}
4086
4087static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4089 SelectionDAG &DAG) {
4090 if (!Passthru)
4091 Passthru = DAG.getUNDEF(VT);
4092 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4093 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4094 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4095 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4096 // node in order to try and match RVV vector/scalar instructions.
4097 if ((LoC >> 31) == HiC)
4098 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4099
4100 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4101 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4102 // vlmax vsetvli or vsetivli to change the VL.
4103 // FIXME: Support larger constants?
4104 // FIXME: Support non-constant VLs by saturating?
4105 if (LoC == HiC) {
4106 SDValue NewVL;
4107 if (isAllOnesConstant(VL) ||
4108 (isa<RegisterSDNode>(VL) &&
4109 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4110 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4111 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4112 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4113
4114 if (NewVL) {
4115 MVT InterVT =
4116 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4117 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4118 DAG.getUNDEF(InterVT), Lo, NewVL);
4119 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4120 }
4121 }
4122 }
4123
4124 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4125 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4126 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4127 Hi.getConstantOperandVal(1) == 31)
4128 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4129
4130 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4131 // even if it might be sign extended.
4132 if (Hi.isUndef())
4133 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4134
4135 // Fall back to a stack store and stride x0 vector load.
4136 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4137 Hi, VL);
4138}
4139
4140// Called by type legalization to handle splat of i64 on RV32.
4141// FIXME: We can optimize this when the type has sign or zero bits in one
4142// of the halves.
4143static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4144 SDValue Scalar, SDValue VL,
4145 SelectionDAG &DAG) {
4146 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4147 SDValue Lo, Hi;
4148 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4149 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4150}
4151
4152// This function lowers a splat of a scalar operand Splat with the vector
4153// length VL. It ensures the final sequence is type legal, which is useful when
4154// lowering a splat after type legalization.
4155static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4156 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4157 const RISCVSubtarget &Subtarget) {
4158 bool HasPassthru = Passthru && !Passthru.isUndef();
4159 if (!HasPassthru && !Passthru)
4160 Passthru = DAG.getUNDEF(VT);
4161 if (VT.isFloatingPoint())
4162 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4163
4164 MVT XLenVT = Subtarget.getXLenVT();
4165
4166 // Simplest case is that the operand needs to be promoted to XLenVT.
4167 if (Scalar.getValueType().bitsLE(XLenVT)) {
4168 // If the operand is a constant, sign extend to increase our chances
4169 // of being able to use a .vi instruction. ANY_EXTEND would become a
4170 // a zero extend and the simm5 check in isel would fail.
4171 // FIXME: Should we ignore the upper bits in isel instead?
4172 unsigned ExtOpc =
4173 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4174 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4175 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4176 }
4177
4178 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4179 "Unexpected scalar for splat lowering!");
4180
4181 if (isOneConstant(VL) && isNullConstant(Scalar))
4182 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4183 DAG.getConstant(0, DL, XLenVT), VL);
4184
4185 // Otherwise use the more complicated splatting algorithm.
4186 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4187}
4188
4189// This function lowers an insert of a scalar operand Scalar into lane
4190// 0 of the vector regardless of the value of VL. The contents of the
4191// remaining lanes of the result vector are unspecified. VL is assumed
4192// to be non-zero.
4194 const SDLoc &DL, SelectionDAG &DAG,
4195 const RISCVSubtarget &Subtarget) {
4196 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4197
4198 const MVT XLenVT = Subtarget.getXLenVT();
4199 SDValue Passthru = DAG.getUNDEF(VT);
4200
4201 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4202 isNullConstant(Scalar.getOperand(1))) {
4203 SDValue ExtractedVal = Scalar.getOperand(0);
4204 // The element types must be the same.
4205 if (ExtractedVal.getValueType().getVectorElementType() ==
4206 VT.getVectorElementType()) {
4207 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4208 MVT ExtractedContainerVT = ExtractedVT;
4209 if (ExtractedContainerVT.isFixedLengthVector()) {
4210 ExtractedContainerVT = getContainerForFixedLengthVector(
4211 DAG, ExtractedContainerVT, Subtarget);
4212 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4213 ExtractedVal, DAG, Subtarget);
4214 }
4215 if (ExtractedContainerVT.bitsLE(VT))
4216 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4217 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4218 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4219 DAG.getVectorIdxConstant(0, DL));
4220 }
4221 }
4222
4223
4224 if (VT.isFloatingPoint())
4225 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4226 DAG.getUNDEF(VT), Scalar, VL);
4227
4228 // Avoid the tricky legalization cases by falling back to using the
4229 // splat code which already handles it gracefully.
4230 if (!Scalar.getValueType().bitsLE(XLenVT))
4231 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4232 DAG.getConstant(1, DL, XLenVT),
4233 VT, DL, DAG, Subtarget);
4234
4235 // If the operand is a constant, sign extend to increase our chances
4236 // of being able to use a .vi instruction. ANY_EXTEND would become a
4237 // a zero extend and the simm5 check in isel would fail.
4238 // FIXME: Should we ignore the upper bits in isel instead?
4239 unsigned ExtOpc =
4240 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4241 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4242 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4243 DAG.getUNDEF(VT), Scalar, VL);
4244}
4245
4246// Is this a shuffle extracts either the even or odd elements of a vector?
4247// That is, specifically, either (a) or (b) below.
4248// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4249// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4250// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4251// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4252// Returns {Src Vector, Even Elements} om success
4253static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4254 SDValue V2, ArrayRef<int> Mask,
4255 const RISCVSubtarget &Subtarget) {
4256 // Need to be able to widen the vector.
4257 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4258 return false;
4259
4260 // Both input must be extracts.
4261 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4262 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4263 return false;
4264
4265 // Extracting from the same source.
4266 SDValue Src = V1.getOperand(0);
4267 if (Src != V2.getOperand(0))
4268 return false;
4269
4270 // Src needs to have twice the number of elements.
4271 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4272 return false;
4273
4274 // The extracts must extract the two halves of the source.
4275 if (V1.getConstantOperandVal(1) != 0 ||
4276 V2.getConstantOperandVal(1) != Mask.size())
4277 return false;
4278
4279 // First index must be the first even or odd element from V1.
4280 if (Mask[0] != 0 && Mask[0] != 1)
4281 return false;
4282
4283 // The others must increase by 2 each time.
4284 // TODO: Support undef elements?
4285 for (unsigned i = 1; i != Mask.size(); ++i)
4286 if (Mask[i] != Mask[i - 1] + 2)
4287 return false;
4288
4289 return true;
4290}
4291
4292/// Is this shuffle interleaving contiguous elements from one vector into the
4293/// even elements and contiguous elements from another vector into the odd
4294/// elements. \p EvenSrc will contain the element that should be in the first
4295/// even element. \p OddSrc will contain the element that should be in the first
4296/// odd element. These can be the first element in a source or the element half
4297/// way through the source.
4298static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4299 int &OddSrc, const RISCVSubtarget &Subtarget) {
4300 // We need to be able to widen elements to the next larger integer type.
4301 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4302 return false;
4303
4304 int Size = Mask.size();
4305 int NumElts = VT.getVectorNumElements();
4306 assert(Size == (int)NumElts && "Unexpected mask size");
4307
4308 SmallVector<unsigned, 2> StartIndexes;
4309 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4310 return false;
4311
4312 EvenSrc = StartIndexes[0];
4313 OddSrc = StartIndexes[1];
4314
4315 // One source should be low half of first vector.
4316 if (EvenSrc != 0 && OddSrc != 0)
4317 return false;
4318
4319 // Subvectors will be subtracted from either at the start of the two input
4320 // vectors, or at the start and middle of the first vector if it's an unary
4321 // interleave.
4322 // In both cases, HalfNumElts will be extracted.
4323 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4324 // we'll create an illegal extract_subvector.
4325 // FIXME: We could support other values using a slidedown first.
4326 int HalfNumElts = NumElts / 2;
4327 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4328}
4329
4330/// Match shuffles that concatenate two vectors, rotate the concatenation,
4331/// and then extract the original number of elements from the rotated result.
4332/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4333/// returned rotation amount is for a rotate right, where elements move from
4334/// higher elements to lower elements. \p LoSrc indicates the first source
4335/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4336/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4337/// 0 or 1 if a rotation is found.
4338///
4339/// NOTE: We talk about rotate to the right which matches how bit shift and
4340/// rotate instructions are described where LSBs are on the right, but LLVM IR
4341/// and the table below write vectors with the lowest elements on the left.
4342static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4343 int Size = Mask.size();
4344
4345 // We need to detect various ways of spelling a rotation:
4346 // [11, 12, 13, 14, 15, 0, 1, 2]
4347 // [-1, 12, 13, 14, -1, -1, 1, -1]
4348 // [-1, -1, -1, -1, -1, -1, 1, 2]
4349 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4350 // [-1, 4, 5, 6, -1, -1, 9, -1]
4351 // [-1, 4, 5, 6, -1, -1, -1, -1]
4352 int Rotation = 0;
4353 LoSrc = -1;
4354 HiSrc = -1;
4355 for (int i = 0; i != Size; ++i) {
4356 int M = Mask[i];
4357 if (M < 0)
4358 continue;
4359
4360 // Determine where a rotate vector would have started.
4361 int StartIdx = i - (M % Size);
4362 // The identity rotation isn't interesting, stop.
4363 if (StartIdx == 0)
4364 return -1;
4365
4366 // If we found the tail of a vector the rotation must be the missing
4367 // front. If we found the head of a vector, it must be how much of the
4368 // head.
4369 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4370
4371 if (Rotation == 0)
4372 Rotation = CandidateRotation;
4373 else if (Rotation != CandidateRotation)
4374 // The rotations don't match, so we can't match this mask.
4375 return -1;
4376
4377 // Compute which value this mask is pointing at.
4378 int MaskSrc = M < Size ? 0 : 1;
4379
4380 // Compute which of the two target values this index should be assigned to.
4381 // This reflects whether the high elements are remaining or the low elemnts
4382 // are remaining.
4383 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4384
4385 // Either set up this value if we've not encountered it before, or check
4386 // that it remains consistent.
4387 if (TargetSrc < 0)
4388 TargetSrc = MaskSrc;
4389 else if (TargetSrc != MaskSrc)
4390 // This may be a rotation, but it pulls from the inputs in some
4391 // unsupported interleaving.
4392 return -1;
4393 }
4394
4395 // Check that we successfully analyzed the mask, and normalize the results.
4396 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4397 assert((LoSrc >= 0 || HiSrc >= 0) &&
4398 "Failed to find a rotated input vector!");
4399
4400 return Rotation;
4401}
4402
4403// Lower a deinterleave shuffle to vnsrl.
4404// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4405// -> [p, q, r, s] (EvenElts == false)
4406// VT is the type of the vector to return, <[vscale x ]n x ty>
4407// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4409 bool EvenElts,
4410 const RISCVSubtarget &Subtarget,
4411 SelectionDAG &DAG) {
4412 // The result is a vector of type <m x n x ty>
4413 MVT ContainerVT = VT;
4414 // Convert fixed vectors to scalable if needed
4415 if (ContainerVT.isFixedLengthVector()) {
4416 assert(Src.getSimpleValueType().isFixedLengthVector());
4417 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4418
4419 // The source is a vector of type <m x n*2 x ty>
4420 MVT SrcContainerVT =
4422 ContainerVT.getVectorElementCount() * 2);
4423 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4424 }
4425
4426 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4427
4428 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4429 // This also converts FP to int.
4430 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4431 MVT WideSrcContainerVT = MVT::getVectorVT(
4432 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4433 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4434
4435 // The integer version of the container type.
4436 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4437
4438 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4439 // the original element size.
4440 unsigned Shift = EvenElts ? 0 : EltBits;
4441 SDValue SplatShift = DAG.getNode(
4442 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4443 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4444 SDValue Res =
4445 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4446 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4447 // Cast back to FP if needed.
4448 Res = DAG.getBitcast(ContainerVT, Res);
4449
4450 if (VT.isFixedLengthVector())
4451 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4452 return Res;
4453}
4454
4455// Lower the following shuffle to vslidedown.
4456// a)
4457// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4458// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4459// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4460// b)
4461// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4462// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4463// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4464// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4465// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4466// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4468 SDValue V1, SDValue V2,
4469 ArrayRef<int> Mask,
4470 const RISCVSubtarget &Subtarget,
4471 SelectionDAG &DAG) {
4472 auto findNonEXTRACT_SUBVECTORParent =
4473 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4474 uint64_t Offset = 0;
4475 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4476 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4477 // a scalable vector. But we don't want to match the case.
4478 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4479 Offset += Parent.getConstantOperandVal(1);
4480 Parent = Parent.getOperand(0);
4481 }
4482 return std::make_pair(Parent, Offset);
4483 };
4484
4485 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4486 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4487
4488 // Extracting from the same source.
4489 SDValue Src = V1Src;
4490 if (Src != V2Src)
4491 return SDValue();
4492
4493 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4494 SmallVector<int, 16> NewMask(Mask);
4495 for (size_t i = 0; i != NewMask.size(); ++i) {
4496 if (NewMask[i] == -1)
4497 continue;
4498
4499 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4500 NewMask[i] = NewMask[i] + V1IndexOffset;
4501 } else {
4502 // Minus NewMask.size() is needed. Otherwise, the b case would be
4503 // <5,6,7,12> instead of <5,6,7,8>.
4504 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4505 }
4506 }
4507
4508 // First index must be known and non-zero. It will be used as the slidedown
4509 // amount.
4510 if (NewMask[0] <= 0)
4511 return SDValue();
4512
4513 // NewMask is also continuous.
4514 for (unsigned i = 1; i != NewMask.size(); ++i)
4515 if (NewMask[i - 1] + 1 != NewMask[i])
4516 return SDValue();
4517
4518 MVT XLenVT = Subtarget.getXLenVT();
4519 MVT SrcVT = Src.getSimpleValueType();
4520 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4521 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4522 SDValue Slidedown =
4523 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4524 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4525 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4526 return DAG.getNode(
4528 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4529 DAG.getConstant(0, DL, XLenVT));
4530}
4531
4532// Because vslideup leaves the destination elements at the start intact, we can
4533// use it to perform shuffles that insert subvectors:
4534//
4535// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4536// ->
4537// vsetvli zero, 8, e8, mf2, ta, ma
4538// vslideup.vi v8, v9, 4
4539//
4540// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4541// ->
4542// vsetvli zero, 5, e8, mf2, tu, ma
4543// vslideup.v1 v8, v9, 2
4545 SDValue V1, SDValue V2,
4546 ArrayRef<int> Mask,
4547 const RISCVSubtarget &Subtarget,
4548 SelectionDAG &DAG) {
4549 unsigned NumElts = VT.getVectorNumElements();
4550 int NumSubElts, Index;
4551 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4552 Index))
4553 return SDValue();
4554
4555 bool OpsSwapped = Mask[Index] < (int)NumElts;
4556 SDValue InPlace = OpsSwapped ? V2 : V1;
4557 SDValue ToInsert = OpsSwapped ? V1 : V2;
4558
4559 MVT XLenVT = Subtarget.getXLenVT();
4560 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4561 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4562 // We slide up by the index that the subvector is being inserted at, and set
4563 // VL to the index + the number of elements being inserted.
4565 // If the we're adding a suffix to the in place vector, i.e. inserting right
4566 // up to the very end of it, then we don't actually care about the tail.
4567 if (NumSubElts + Index >= (int)NumElts)
4568 Policy |= RISCVII::TAIL_AGNOSTIC;
4569
4570 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4571 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4572 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4573
4574 SDValue Res;
4575 // If we're inserting into the lowest elements, use a tail undisturbed
4576 // vmv.v.v.
4577 if (Index == 0)
4578 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4579 VL);
4580 else
4581 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4582 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4583 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4584}
4585
4586/// Match v(f)slide1up/down idioms. These operations involve sliding
4587/// N-1 elements to make room for an inserted scalar at one end.
4589 SDValue V1, SDValue V2,
4590 ArrayRef<int> Mask,
4591 const RISCVSubtarget &Subtarget,
4592 SelectionDAG &DAG) {
4593 bool OpsSwapped = false;
4594 if (!isa<BuildVectorSDNode>(V1)) {
4595 if (!isa<BuildVectorSDNode>(V2))
4596 return SDValue();
4597 std::swap(V1, V2);
4598 OpsSwapped = true;
4599 }
4600 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4601 if (!Splat)
4602 return SDValue();
4603
4604 // Return true if the mask could describe a slide of Mask.size() - 1
4605 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4606 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4607 const unsigned S = (Offset > 0) ? 0 : -Offset;
4608 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4609 for (unsigned i = S; i != E; ++i)
4610 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4611 return false;
4612 return true;
4613 };
4614
4615 const unsigned NumElts = VT.getVectorNumElements();
4616 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4617 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4618 return SDValue();
4619
4620 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4621 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4622 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4623 return SDValue();
4624
4625 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4626 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4627 auto OpCode = IsVSlidedown ?
4630 if (!VT.isFloatingPoint())
4631 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4632 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4633 DAG.getUNDEF(ContainerVT),
4634 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4635 Splat, TrueMask, VL);
4636 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4637}
4638
4639// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4640// to create an interleaved vector of <[vscale x] n*2 x ty>.
4641// This requires that the size of ty is less than the subtarget's maximum ELEN.
4643 const SDLoc &DL, SelectionDAG &DAG,
4644 const RISCVSubtarget &Subtarget) {
4645 MVT VecVT = EvenV.getSimpleValueType();
4646 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4647 // Convert fixed vectors to scalable if needed
4648 if (VecContainerVT.isFixedLengthVector()) {
4649 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4650 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4651 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4652 }
4653
4654 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4655
4656 // We're working with a vector of the same size as the resulting
4657 // interleaved vector, but with half the number of elements and
4658 // twice the SEW (Hence the restriction on not using the maximum
4659 // ELEN)
4660 MVT WideVT =
4662 VecVT.getVectorElementCount());
4663 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4664 if (WideContainerVT.isFixedLengthVector())
4665 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4666
4667 // Bitcast the input vectors to integers in case they are FP
4668 VecContainerVT = VecContainerVT.changeTypeToInteger();
4669 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4670 OddV = DAG.getBitcast(VecContainerVT, OddV);
4671
4672 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4673 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4674
4675 SDValue Interleaved;
4676 if (OddV.isUndef()) {
4677 // If OddV is undef, this is a zero extend.
4678 // FIXME: Not only does this optimize the code, it fixes some correctness
4679 // issues because MIR does not have freeze.
4680 Interleaved =
4681 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4682 } else if (Subtarget.hasStdExtZvbb()) {
4683 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4684 SDValue OffsetVec =
4685 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4686 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4687 OffsetVec, Passthru, Mask, VL);
4688 if (!EvenV.isUndef())
4689 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4690 Interleaved, EvenV, Passthru, Mask, VL);
4691 } else if (EvenV.isUndef()) {
4692 Interleaved =
4693 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4694
4695 SDValue OffsetVec =
4696 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4697 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4698 Interleaved, OffsetVec, Passthru, Mask, VL);
4699 } else {
4700 // FIXME: We should freeze the odd vector here. We already handled the case
4701 // of provably undef/poison above.
4702
4703 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4704 // vwaddu.vv
4705 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4706 OddV, Passthru, Mask, VL);
4707
4708 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4709 SDValue AllOnesVec = DAG.getSplatVector(
4710 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4711 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4712 OddV, AllOnesVec, Passthru, Mask, VL);
4713
4714 // Add the two together so we get
4715 // (OddV * 0xff...ff) + (OddV + EvenV)
4716 // = (OddV * 0x100...00) + EvenV
4717 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4718 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4719 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4720 Interleaved, OddsMul, Passthru, Mask, VL);
4721 }
4722
4723 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4724 MVT ResultContainerVT = MVT::getVectorVT(
4725 VecVT.getVectorElementType(), // Make sure to use original type
4726 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4727 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4728
4729 // Convert back to a fixed vector if needed
4730 MVT ResultVT =
4733 if (ResultVT.isFixedLengthVector())
4734 Interleaved =
4735 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4736
4737 return Interleaved;
4738}
4739
4740// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4741// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4743 SelectionDAG &DAG,
4744 const RISCVSubtarget &Subtarget) {
4745 SDLoc DL(SVN);
4746 MVT VT = SVN->getSimpleValueType(0);
4747 SDValue V = SVN->getOperand(0);
4748 unsigned NumElts = VT.getVectorNumElements();
4749
4750 assert(VT.getVectorElementType() == MVT::i1);
4751
4753 SVN->getMask().size()) ||
4754 !SVN->getOperand(1).isUndef())
4755 return SDValue();
4756
4757 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4758 EVT ViaVT = EVT::getVectorVT(
4759 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4760 EVT ViaBitVT =
4761 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4762
4763 // If we don't have zvbb or the larger element type > ELEN, the operation will
4764 // be illegal.
4766 ViaVT) ||
4767 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4768 return SDValue();
4769
4770 // If the bit vector doesn't fit exactly into the larger element type, we need
4771 // to insert it into the larger vector and then shift up the reversed bits
4772 // afterwards to get rid of the gap introduced.
4773 if (ViaEltSize > NumElts)
4774 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4775 V, DAG.getVectorIdxConstant(0, DL));
4776
4777 SDValue Res =
4778 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4779
4780 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4781 // element type.
4782 if (ViaEltSize > NumElts)
4783 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4784 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4785
4786 Res = DAG.getBitcast(ViaBitVT, Res);
4787
4788 if (ViaEltSize > NumElts)
4789 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4790 DAG.getVectorIdxConstant(0, DL));
4791 return Res;
4792}
4793
4795 SelectionDAG &DAG,
4796 const RISCVSubtarget &Subtarget,
4797 MVT &RotateVT, unsigned &RotateAmt) {
4798 SDLoc DL(SVN);
4799
4800 EVT VT = SVN->getValueType(0);
4801 unsigned NumElts = VT.getVectorNumElements();
4802 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4803 unsigned NumSubElts;
4804 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4805 NumElts, NumSubElts, RotateAmt))
4806 return false;
4807 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4808 NumElts / NumSubElts);
4809
4810 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4811 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4812}
4813
4814// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4815// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4816// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4818 SelectionDAG &DAG,
4819 const RISCVSubtarget &Subtarget) {
4820 SDLoc DL(SVN);
4821
4822 EVT VT = SVN->getValueType(0);
4823 unsigned RotateAmt;
4824 MVT RotateVT;
4825 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4826 return SDValue();
4827
4828 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4829
4830 SDValue Rotate;
4831 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4832 // so canonicalize to vrev8.
4833 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4834 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4835 else
4836 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4837 DAG.getConstant(RotateAmt, DL, RotateVT));
4838
4839 return DAG.getBitcast(VT, Rotate);
4840}
4841
4842// If compiling with an exactly known VLEN, see if we can split a
4843// shuffle on m2 or larger into a small number of m1 sized shuffles
4844// which write each destination registers exactly once.
4846 SelectionDAG &DAG,
4847 const RISCVSubtarget &Subtarget) {
4848 SDLoc DL(SVN);
4849 MVT VT = SVN->getSimpleValueType(0);
4850 SDValue V1 = SVN->getOperand(0);
4851 SDValue V2 = SVN->getOperand(1);
4852 ArrayRef<int> Mask = SVN->getMask();
4853 unsigned NumElts = VT.getVectorNumElements();
4854
4855 // If we don't know exact data layout, not much we can do. If this
4856 // is already m1 or smaller, no point in splitting further.
4857 const auto VLen = Subtarget.getRealVLen();
4858 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4859 return SDValue();
4860
4861 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4862 // expansion for.
4863 unsigned RotateAmt;
4864 MVT RotateVT;
4865 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4866 return SDValue();
4867
4868 MVT ElemVT = VT.getVectorElementType();
4869 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4870 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4871
4873 OutMasks(VRegsPerSrc, {-1, {}});
4874
4875 // Check if our mask can be done as a 1-to-1 mapping from source
4876 // to destination registers in the group without needing to
4877 // write each destination more than once.
4878 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4879 int DstVecIdx = DstIdx / ElemsPerVReg;
4880 int DstSubIdx = DstIdx % ElemsPerVReg;
4881 int SrcIdx = Mask[DstIdx];
4882 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4883 continue;
4884 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4885 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4886 if (OutMasks[DstVecIdx].first == -1)
4887 OutMasks[DstVecIdx].first = SrcVecIdx;
4888 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4889 // Note: This case could easily be handled by keeping track of a chain
4890 // of source values and generating two element shuffles below. This is
4891 // less an implementation question, and more a profitability one.
4892 return SDValue();
4893
4894 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4895 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4896 }
4897
4898 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4899 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4900 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4901 assert(M1VT == getLMUL1VT(M1VT));
4902 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4903 SDValue Vec = DAG.getUNDEF(ContainerVT);
4904 // The following semantically builds up a fixed length concat_vector
4905 // of the component shuffle_vectors. We eagerly lower to scalable here
4906 // to avoid DAG combining it back to a large shuffle_vector again.
4907 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4908 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4909 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4910 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4911 if (SrcVecIdx == -1)
4912 continue;
4913 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4914 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4915 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4916 DAG.getVectorIdxConstant(ExtractIdx, DL));
4917 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4918 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4919 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4920 unsigned InsertIdx = DstVecIdx * NumOpElts;
4921 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4922 DAG.getVectorIdxConstant(InsertIdx, DL));
4923 }
4924 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4925}
4926
4928 const RISCVSubtarget &Subtarget) {
4929 SDValue V1 = Op.getOperand(0);
4930 SDValue V2 = Op.getOperand(1);
4931 SDLoc DL(Op);
4932 MVT XLenVT = Subtarget.getXLenVT();
4933 MVT VT = Op.getSimpleValueType();
4934 unsigned NumElts = VT.getVectorNumElements();
4935 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4936
4937 if (VT.getVectorElementType() == MVT::i1) {
4938 // Lower to a vror.vi of a larger element type if possible before we promote
4939 // i1s to i8s.
4940 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4941 return V;
4942 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4943 return V;
4944
4945 // Promote i1 shuffle to i8 shuffle.
4946 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4947 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4948 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4949 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4950 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4951 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4952 ISD::SETNE);
4953 }
4954
4955 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4956
4957 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4958
4959 if (SVN->isSplat()) {
4960 const int Lane = SVN->getSplatIndex();
4961 if (Lane >= 0) {
4962 MVT SVT = VT.getVectorElementType();
4963
4964 // Turn splatted vector load into a strided load with an X0 stride.
4965 SDValue V = V1;
4966 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4967 // with undef.
4968 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4969 int Offset = Lane;
4970 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4971 int OpElements =
4972 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4973 V = V.getOperand(Offset / OpElements);
4974 Offset %= OpElements;
4975 }
4976
4977 // We need to ensure the load isn't atomic or volatile.
4978 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4979 auto *Ld = cast<LoadSDNode>(V);
4980 Offset *= SVT.getStoreSize();
4981 SDValue NewAddr = DAG.getMemBasePlusOffset(
4982 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4983
4984 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4985 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4986 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4987 SDValue IntID =
4988 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4989 SDValue Ops[] = {Ld->getChain(),
4990 IntID,
4991 DAG.getUNDEF(ContainerVT),
4992 NewAddr,
4993 DAG.getRegister(RISCV::X0, XLenVT),
4994 VL};
4995 SDValue NewLoad = DAG.getMemIntrinsicNode(
4996 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4998 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4999 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5000 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5001 }
5002
5003 // Otherwise use a scalar load and splat. This will give the best
5004 // opportunity to fold a splat into the operation. ISel can turn it into
5005 // the x0 strided load if we aren't able to fold away the select.
5006 if (SVT.isFloatingPoint())
5007 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5008 Ld->getPointerInfo().getWithOffset(Offset),
5009 Ld->getOriginalAlign(),
5010 Ld->getMemOperand()->getFlags());
5011 else
5012 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5013 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5014 Ld->getOriginalAlign(),
5015 Ld->getMemOperand()->getFlags());
5017
5018 unsigned Opc =
5020 SDValue Splat =
5021 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
5022 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5023 }
5024
5025 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5026 assert(Lane < (int)NumElts && "Unexpected lane!");
5027 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5028 V1, DAG.getConstant(Lane, DL, XLenVT),
5029 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5030 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5031 }
5032 }
5033
5034 // For exact VLEN m2 or greater, try to split to m1 operations if we
5035 // can split cleanly.
5036 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5037 return V;
5038
5039 ArrayRef<int> Mask = SVN->getMask();
5040
5041 if (SDValue V =
5042 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5043 return V;
5044
5045 if (SDValue V =
5046 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5047 return V;
5048
5049 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5050 // available.
5051 if (Subtarget.hasStdExtZvkb())
5052 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5053 return V;
5054
5055 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5056 // be undef which can be handled with a single SLIDEDOWN/UP.
5057 int LoSrc, HiSrc;
5058 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5059 if (Rotation > 0) {
5060 SDValue LoV, HiV;
5061 if (LoSrc >= 0) {
5062 LoV = LoSrc == 0 ? V1 : V2;
5063 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5064 }
5065 if (HiSrc >= 0) {
5066 HiV = HiSrc == 0 ? V1 : V2;
5067 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5068 }
5069
5070 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5071 // to slide LoV up by (NumElts - Rotation).
5072 unsigned InvRotate = NumElts - Rotation;
5073
5074 SDValue Res = DAG.getUNDEF(ContainerVT);
5075 if (HiV) {
5076 // Even though we could use a smaller VL, don't to avoid a vsetivli
5077 // toggle.
5078 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5079 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5080 }
5081 if (LoV)
5082 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5083 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5085
5086 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5087 }
5088
5089 // If this is a deinterleave and we can widen the vector, then we can use
5090 // vnsrl to deinterleave.
5091 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5092 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5093 Subtarget, DAG);
5094 }
5095
5096 if (SDValue V =
5097 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5098 return V;
5099
5100 // Detect an interleave shuffle and lower to
5101 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5102 int EvenSrc, OddSrc;
5103 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5104 // Extract the halves of the vectors.
5105 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5106
5107 int Size = Mask.size();
5108 SDValue EvenV, OddV;
5109 assert(EvenSrc >= 0 && "Undef source?");
5110 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5111 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5112 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5113
5114 assert(OddSrc >= 0 && "Undef source?");
5115 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5116 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5117 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5118
5119 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5120 }
5121
5122
5123 // Handle any remaining single source shuffles
5124 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5125 if (V2.isUndef()) {
5126 // We might be able to express the shuffle as a bitrotate. But even if we
5127 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5128 // shifts and a vor will have a higher throughput than a vrgather.
5129 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5130 return V;
5131
5132 if (VT.getScalarSizeInBits() == 8 &&
5133 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5134 // On such a vector we're unable to use i8 as the index type.
5135 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5136 // may involve vector splitting if we're already at LMUL=8, or our
5137 // user-supplied maximum fixed-length LMUL.
5138 return SDValue();
5139 }
5140
5141 // Base case for the two operand recursion below - handle the worst case
5142 // single source shuffle.
5143 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5144 MVT IndexVT = VT.changeTypeToInteger();
5145 // Since we can't introduce illegal index types at this stage, use i16 and
5146 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5147 // than XLenVT.
5148 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5149 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5150 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5151 }
5152
5153 // If the mask allows, we can do all the index computation in 16 bits. This
5154 // requires less work and less register pressure at high LMUL, and creates
5155 // smaller constants which may be cheaper to materialize.
5156 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5157 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5158 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5159 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5160 }
5161
5162 MVT IndexContainerVT =
5163 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5164
5165 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5166 SmallVector<SDValue> GatherIndicesLHS;
5167 for (int MaskIndex : Mask) {
5168 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5169 GatherIndicesLHS.push_back(IsLHSIndex
5170 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5171 : DAG.getUNDEF(XLenVT));
5172 }
5173 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5174 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5175 Subtarget);
5176 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5177 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5178 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5179 }
5180
5181 // By default we preserve the original operand order, and use a mask to
5182 // select LHS as true and RHS as false. However, since RVV vector selects may
5183 // feature splats but only on the LHS, we may choose to invert our mask and
5184 // instead select between RHS and LHS.
5185 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5186
5187 // Detect shuffles which can be re-expressed as vector selects; these are
5188 // shuffles in which each element in the destination is taken from an element
5189 // at the corresponding index in either source vectors.
5190 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5191 int MaskIndex = MaskIdx.value();
5192 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5193 });
5194 if (IsSelect) {
5195 // Now construct the mask that will be used by the vselect operation.
5196 SmallVector<SDValue> MaskVals;
5197 for (int MaskIndex : Mask) {
5198 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5199 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5200 }
5201
5202 if (SwapOps)
5203 std::swap(V1, V2);
5204
5205 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5206 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5207 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5208 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5209 }
5210
5211 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5212 // merged with a second vrgather.
5213 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5214 SmallVector<SDValue> MaskVals;
5215
5216 // Now construct the mask that will be used by the blended vrgather operation.
5217 // Cconstruct the appropriate indices into each vector.
5218 for (int MaskIndex : Mask) {
5219 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5220 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5221 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5222 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5223 ? MaskIndex : -1);
5224 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5225 }
5226
5227 if (SwapOps) {
5228 std::swap(V1, V2);
5229 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5230 }
5231
5232 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5233 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5234 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5235
5236 // Recursively invoke lowering for each operand if we had two
5237 // independent single source shuffles, and then combine the result via a
5238 // vselect. Note that the vselect will likely be folded back into the
5239 // second permute (vrgather, or other) by the post-isel combine.
5240 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5241 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5242 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5243}
5244
5246 // Support splats for any type. These should type legalize well.
5247 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5248 return true;
5249
5250 // Only support legal VTs for other shuffles for now.
5251 if (!isTypeLegal(VT))
5252 return false;
5253
5254 MVT SVT = VT.getSimpleVT();
5255
5256 // Not for i1 vectors.
5257 if (SVT.getScalarType() == MVT::i1)
5258 return false;
5259
5260 int Dummy1, Dummy2;
5261 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5262 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5263}
5264
5265// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5266// the exponent.
5267SDValue
5268RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5269 SelectionDAG &DAG) const {
5270 MVT VT = Op.getSimpleValueType();
5271 unsigned EltSize = VT.getScalarSizeInBits();
5272 SDValue Src = Op.getOperand(0);
5273 SDLoc DL(Op);
5274 MVT ContainerVT = VT;
5275
5276 SDValue Mask, VL;
5277 if (Op->isVPOpcode()) {
5278 Mask = Op.getOperand(1);
5279 if (VT.isFixedLengthVector())
5280 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5281 Subtarget);
5282 VL = Op.getOperand(2);
5283 }
5284
5285 // We choose FP type that can represent the value if possible. Otherwise, we
5286 // use rounding to zero conversion for correct exponent of the result.
5287 // TODO: Use f16 for i8 when possible?
5288 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5289 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5290 FloatEltVT = MVT::f32;
5291 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5292
5293 // Legal types should have been checked in the RISCVTargetLowering
5294 // constructor.
5295 // TODO: Splitting may make sense in some cases.
5296 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5297 "Expected legal float type!");
5298
5299 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5300 // The trailing zero count is equal to log2 of this single bit value.
5301 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5302 SDValue Neg = DAG.getNegative(Src, DL, VT);
5303 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5304 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5305 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5306 Src, Mask, VL);
5307 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5308 }
5309
5310 // We have a legal FP type, convert to it.
5311 SDValue FloatVal;
5312 if (FloatVT.bitsGT(VT)) {
5313 if (Op->isVPOpcode())
5314 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5315 else
5316 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5317 } else {
5318 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5319 if (VT.isFixedLengthVector()) {
5320 ContainerVT = getContainerForFixedLengthVector(VT);
5321 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5322 }
5323 if (!Op->isVPOpcode())
5324 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5325 SDValue RTZRM =
5327 MVT ContainerFloatVT =
5328 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5329 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5330 Src, Mask, RTZRM, VL);
5331 if (VT.isFixedLengthVector())
5332 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5333 }
5334 // Bitcast to integer and shift the exponent to the LSB.
5335 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5336 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5337 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5338
5339 SDValue Exp;
5340 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5341 if (Op->isVPOpcode()) {
5342 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5343 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5344 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5345 } else {
5346 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5347 DAG.getConstant(ShiftAmt, DL, IntVT));
5348 if (IntVT.bitsLT(VT))
5349 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5350 else if (IntVT.bitsGT(VT))
5351 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5352 }
5353
5354 // The exponent contains log2 of the value in biased form.
5355 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5356 // For trailing zeros, we just need to subtract the bias.
5357 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5358 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5359 DAG.getConstant(ExponentBias, DL, VT));
5360 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5361 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5362 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5363
5364 // For leading zeros, we need to remove the bias and convert from log2 to
5365 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5366 unsigned Adjust = ExponentBias + (EltSize - 1);
5367 SDValue Res;
5368 if (Op->isVPOpcode())
5369 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5370 Mask, VL);
5371 else
5372 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5373
5374 // The above result with zero input equals to Adjust which is greater than
5375 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5376 if (Op.getOpcode() == ISD::CTLZ)
5377 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5378 else if (Op.getOpcode() == ISD::VP_CTLZ)
5379 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5380 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5381 return Res;
5382}
5383
5384SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5385 SelectionDAG &DAG) const {
5386 SDLoc DL(Op);
5387 MVT XLenVT = Subtarget.getXLenVT();
5388 SDValue Source = Op->getOperand(0);
5389 MVT SrcVT = Source.getSimpleValueType();
5390 SDValue Mask = Op->getOperand(1);
5391 SDValue EVL = Op->getOperand(2);
5392
5393 if (SrcVT.isFixedLengthVector()) {
5394 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5395 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5396 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5397 Subtarget);
5398 SrcVT = ContainerVT;
5399 }
5400
5401 // Convert to boolean vector.
5402 if (SrcVT.getScalarType() != MVT::i1) {
5403 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5404 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5405 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5406 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5407 DAG.getUNDEF(SrcVT), Mask, EVL});
5408 }
5409
5410 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5411 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5412 // In this case, we can interpret poison as -1, so nothing to do further.
5413 return Res;
5414
5415 // Convert -1 to VL.
5416 SDValue SetCC =
5417 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5418 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5419 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5420}
5421
5422// While RVV has alignment restrictions, we should always be able to load as a
5423// legal equivalently-sized byte-typed vector instead. This method is
5424// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5425// the load is already correctly-aligned, it returns SDValue().
5426SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5427 SelectionDAG &DAG) const {
5428 auto *Load = cast<LoadSDNode>(Op);
5429 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5430
5432 Load->getMemoryVT(),
5433 *Load->getMemOperand()))
5434 return SDValue();
5435
5436 SDLoc DL(Op);
5437 MVT VT = Op.getSimpleValueType();
5438 unsigned EltSizeBits = VT.getScalarSizeInBits();
5439 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5440 "Unexpected unaligned RVV load type");
5441 MVT NewVT =
5442 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5443 assert(NewVT.isValid() &&
5444 "Expecting equally-sized RVV vector types to be legal");
5445 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5446 Load->getPointerInfo(), Load->getOriginalAlign(),
5447 Load->getMemOperand()->getFlags());
5448 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5449}
5450
5451// While RVV has alignment restrictions, we should always be able to store as a
5452// legal equivalently-sized byte-typed vector instead. This method is
5453// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5454// returns SDValue() if the store is already correctly aligned.
5455SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5456 SelectionDAG &DAG) const {
5457 auto *Store = cast<StoreSDNode>(Op);
5458 assert(Store && Store->getValue().getValueType().isVector() &&
5459 "Expected vector store");
5460
5462 Store->getMemoryVT(),
5463 *Store->getMemOperand()))
5464 return SDValue();
5465
5466 SDLoc DL(Op);
5467 SDValue StoredVal = Store->getValue();
5468 MVT VT = StoredVal.getSimpleValueType();
5469 unsigned EltSizeBits = VT.getScalarSizeInBits();
5470 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5471 "Unexpected unaligned RVV store type");
5472 MVT NewVT =
5473 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5474 assert(NewVT.isValid() &&
5475 "Expecting equally-sized RVV vector types to be legal");
5476 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5477 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5478 Store->getPointerInfo(), Store->getOriginalAlign(),
5479 Store->getMemOperand()->getFlags());
5480}
5481
5483 const RISCVSubtarget &Subtarget) {
5484 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5485
5486 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5487
5488 // All simm32 constants should be handled by isel.
5489 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5490 // this check redundant, but small immediates are common so this check
5491 // should have better compile time.
5492 if (isInt<32>(Imm))
5493 return Op;
5494
5495 // We only need to cost the immediate, if constant pool lowering is enabled.
5496 if (!Subtarget.useConstantPoolForLargeInts())
5497 return Op;
5498
5500 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5501 return Op;
5502
5503 // Optimizations below are disabled for opt size. If we're optimizing for
5504 // size, use a constant pool.
5505 if (DAG.shouldOptForSize())
5506 return SDValue();
5507
5508 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5509 // that if it will avoid a constant pool.
5510 // It will require an extra temporary register though.
5511 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5512 // low and high 32 bits are the same and bit 31 and 63 are set.
5513 unsigned ShiftAmt, AddOpc;
5514 RISCVMatInt::InstSeq SeqLo =
5515 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5516 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5517 return Op;
5518
5519 return SDValue();
5520}
5521
5523 const RISCVSubtarget &Subtarget) {
5524 SDLoc dl(Op);
5525 AtomicOrdering FenceOrdering =
5526 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5527 SyncScope::ID FenceSSID =
5528 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5529
5530 if (Subtarget.hasStdExtZtso()) {
5531 // The only fence that needs an instruction is a sequentially-consistent
5532 // cross-thread fence.
5533 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5534 FenceSSID == SyncScope::System)
5535 return Op;
5536
5537 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5538 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5539 }
5540
5541 // singlethread fences only synchronize with signal handlers on the same
5542 // thread and thus only need to preserve instruction order, not actually
5543 // enforce memory ordering.
5544 if (FenceSSID == SyncScope::SingleThread)
5545 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5546 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5547
5548 return Op;
5549}
5550
5552 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5553 "Unexpected custom legalisation");
5554
5555 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5556 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5557 SDLoc DL(Op);
5558 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5559 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5560 SDValue Result =
5561 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5562
5563 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5564 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5565 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5566 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5567 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5568 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5569 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5570}
5571
5573 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5574 "Unexpected custom legalisation");
5575
5576 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5577 // sign extend allows overflow of the lower 32 bits to be detected on
5578 // the promoted size.
5579 SDLoc DL(Op);
5580 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5581 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5582 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5583 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5584}
5585
5586// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5588 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5589 "Unexpected custom legalisation");
5590 if (isa<ConstantSDNode>(Op.getOperand(1)))
5591 return SDValue();
5592
5593 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5594 SDLoc DL(Op);
5595 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5596 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5597 SDValue WideOp =
5598 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5599 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5600 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5601 DAG.getValueType(MVT::i32));
5602 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5603 ISD::SETNE);
5604 return DAG.getMergeValues({Res, Ovf}, DL);
5605}
5606
5607// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5609 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5610 "Unexpected custom legalisation");
5611 SDLoc DL(Op);
5612 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5613 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5614 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5615 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5616 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5617 DAG.getValueType(MVT::i32));
5618 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5619 ISD::SETNE);
5620 return DAG.getMergeValues({Res, Ovf}, DL);
5621}
5622
5623SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5624 SelectionDAG &DAG) const {
5625 SDLoc DL(Op);
5626 MVT VT = Op.getSimpleValueType();
5627 MVT XLenVT = Subtarget.getXLenVT();
5628 unsigned Check = Op.getConstantOperandVal(1);
5629 unsigned TDCMask = 0;
5630 if (Check & fcSNan)
5631 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5632 if (Check & fcQNan)
5633 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5634 if (Check & fcPosInf)
5636 if (Check & fcNegInf)
5638 if (Check & fcPosNormal)
5640 if (Check & fcNegNormal)
5642 if (Check & fcPosSubnormal)
5644 if (Check & fcNegSubnormal)
5646 if (Check & fcPosZero)
5647 TDCMask |= RISCV::FPMASK_Positive_Zero;
5648 if (Check & fcNegZero)
5649 TDCMask |= RISCV::FPMASK_Negative_Zero;
5650
5651 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5652
5653 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5654
5655 if (VT.isVector()) {
5656 SDValue Op0 = Op.getOperand(0);
5657 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5658
5659 if (VT.isScalableVector()) {
5661 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5662 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5663 Mask = Op.getOperand(2);
5664 VL = Op.getOperand(3);
5665 }
5666 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5667 VL, Op->getFlags());
5668 if (IsOneBitMask)
5669 return DAG.getSetCC(DL, VT, FPCLASS,
5670 DAG.getConstant(TDCMask, DL, DstVT),
5672 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5673 DAG.getConstant(TDCMask, DL, DstVT));
5674 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5675 ISD::SETNE);
5676 }
5677
5678 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5679 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5680 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5681 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5682 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5683 Mask = Op.getOperand(2);
5684 MVT MaskContainerVT =
5685 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5686 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5687 VL = Op.getOperand(3);
5688 }
5689 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5690
5691 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5692 Mask, VL, Op->getFlags());
5693
5694 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5695 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5696 if (IsOneBitMask) {
5697 SDValue VMSEQ =
5698 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5699 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5700 DAG.getUNDEF(ContainerVT), Mask, VL});
5701 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5702 }
5703 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5704 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5705
5706 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5707 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5708 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5709
5710 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5711 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5712 DAG.getUNDEF(ContainerVT), Mask, VL});
5713 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5714 }
5715
5716 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5717 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5718 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5720 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5721}
5722
5723// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5724// operations propagate nans.
5726 const RISCVSubtarget &Subtarget) {
5727 SDLoc DL(Op);
5728 MVT VT = Op.getSimpleValueType();
5729
5730 SDValue X = Op.getOperand(0);
5731 SDValue Y = Op.getOperand(1);
5732
5733 if (!VT.isVector()) {
5734 MVT XLenVT = Subtarget.getXLenVT();
5735
5736 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5737 // ensures that when one input is a nan, the other will also be a nan
5738 // allowing the nan to propagate. If both inputs are nan, this will swap the
5739 // inputs which is harmless.
5740
5741 SDValue NewY = Y;
5742 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5743 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5744 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5745 }
5746
5747 SDValue NewX = X;
5748 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5749 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5750 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5751 }
5752
5753 unsigned Opc =
5754 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5755 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5756 }
5757
5758 // Check no NaNs before converting to fixed vector scalable.
5759 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5760 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5761
5762 MVT ContainerVT = VT;
5763 if (VT.isFixedLengthVector()) {
5764 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5765 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5766 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5767 }
5768
5769 SDValue Mask, VL;
5770 if (Op->isVPOpcode()) {
5771 Mask = Op.getOperand(2);
5772 if (VT.isFixedLengthVector())
5773 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5774 Subtarget);
5775 VL = Op.getOperand(3);
5776 } else {
5777 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5778 }
5779
5780 SDValue NewY = Y;
5781 if (!XIsNeverNan) {
5782 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5783 {X, X, DAG.getCondCode(ISD::SETOEQ),
5784 DAG.getUNDEF(ContainerVT), Mask, VL});
5785 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5786 DAG.getUNDEF(ContainerVT), VL);
5787 }
5788
5789 SDValue NewX = X;
5790 if (!YIsNeverNan) {
5791 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5792 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5793 DAG.getUNDEF(ContainerVT), Mask, VL});
5794 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5795 DAG.getUNDEF(ContainerVT), VL);
5796 }
5797
5798 unsigned Opc =
5799 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5802 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5803 DAG.getUNDEF(ContainerVT), Mask, VL);
5804 if (VT.isFixedLengthVector())
5805 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5806 return Res;
5807}
5808
5809/// Get a RISC-V target specified VL op for a given SDNode.
5810static unsigned getRISCVVLOp(SDValue Op) {
5811#define OP_CASE(NODE) \
5812 case ISD::NODE: \
5813 return RISCVISD::NODE##_VL;
5814#define VP_CASE(NODE) \
5815 case ISD::VP_##NODE: \
5816 return RISCVISD::NODE##_VL;
5817 // clang-format off
5818 switch (Op.getOpcode()) {
5819 default:
5820 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5821 OP_CASE(ADD)
5822 OP_CASE(SUB)
5823 OP_CASE(MUL)
5824 OP_CASE(MULHS)
5825 OP_CASE(MULHU)
5826 OP_CASE(SDIV)
5827 OP_CASE(SREM)
5828 OP_CASE(UDIV)
5829 OP_CASE(UREM)
5830 OP_CASE(SHL)
5831 OP_CASE(SRA)
5832 OP_CASE(SRL)
5833 OP_CASE(ROTL)
5834 OP_CASE(ROTR)
5835 OP_CASE(BSWAP)
5836 OP_CASE(CTTZ)
5837 OP_CASE(CTLZ)
5838 OP_CASE(CTPOP)
5839 OP_CASE(BITREVERSE)
5840 OP_CASE(SADDSAT)
5841 OP_CASE(UADDSAT)
5842 OP_CASE(SSUBSAT)
5843 OP_CASE(USUBSAT)
5844 OP_CASE(AVGFLOORU)
5845 OP_CASE(AVGCEILU)
5846 OP_CASE(FADD)
5847 OP_CASE(FSUB)
5848 OP_CASE(FMUL)
5849 OP_CASE(FDIV)
5850 OP_CASE(FNEG)
5851 OP_CASE(FABS)
5852 OP_CASE(FSQRT)
5853 OP_CASE(SMIN)
5854 OP_CASE(SMAX)
5855 OP_CASE(UMIN)
5856 OP_CASE(UMAX)
5857 OP_CASE(STRICT_FADD)
5858 OP_CASE(STRICT_FSUB)
5859 OP_CASE(STRICT_FMUL)
5860 OP_CASE(STRICT_FDIV)
5861 OP_CASE(STRICT_FSQRT)
5862 VP_CASE(ADD) // VP_ADD
5863 VP_CASE(SUB) // VP_SUB
5864 VP_CASE(MUL) // VP_MUL
5865 VP_CASE(SDIV) // VP_SDIV
5866 VP_CASE(SREM) // VP_SREM
5867 VP_CASE(UDIV) // VP_UDIV
5868 VP_CASE(UREM) // VP_UREM
5869 VP_CASE(SHL) // VP_SHL
5870 VP_CASE(FADD) // VP_FADD
5871 VP_CASE(FSUB) // VP_FSUB
5872 VP_CASE(FMUL) // VP_FMUL
5873 VP_CASE(FDIV) // VP_FDIV
5874 VP_CASE(FNEG) // VP_FNEG
5875 VP_CASE(FABS) // VP_FABS
5876 VP_CASE(SMIN) // VP_SMIN
5877 VP_CASE(SMAX) // VP_SMAX
5878 VP_CASE(UMIN) // VP_UMIN
5879 VP_CASE(UMAX) // VP_UMAX
5880 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5881 VP_CASE(SETCC) // VP_SETCC
5882 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5883 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5884 VP_CASE(BITREVERSE) // VP_BITREVERSE
5885 VP_CASE(SADDSAT) // VP_SADDSAT
5886 VP_CASE(UADDSAT) // VP_UADDSAT
5887 VP_CASE(SSUBSAT) // VP_SSUBSAT
5888 VP_CASE(USUBSAT) // VP_USUBSAT
5889 VP_CASE(BSWAP) // VP_BSWAP
5890 VP_CASE(CTLZ) // VP_CTLZ
5891 VP_CASE(CTTZ) // VP_CTTZ
5892 VP_CASE(CTPOP) // VP_CTPOP
5894 case ISD::VP_CTLZ_ZERO_UNDEF:
5895 return RISCVISD::CTLZ_VL;
5897 case ISD::VP_CTTZ_ZERO_UNDEF:
5898 return RISCVISD::CTTZ_VL;
5899 case ISD::FMA:
5900 case ISD::VP_FMA:
5901 return RISCVISD::VFMADD_VL;
5902 case ISD::STRICT_FMA:
5904 case ISD::AND:
5905 case ISD::VP_AND:
5906 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5907 return RISCVISD::VMAND_VL;
5908 return RISCVISD::AND_VL;
5909 case ISD::OR:
5910 case ISD::VP_OR:
5911 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5912 return RISCVISD::VMOR_VL;
5913 return RISCVISD::OR_VL;
5914 case ISD::XOR:
5915 case ISD::VP_XOR:
5916 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5917 return RISCVISD::VMXOR_VL;
5918 return RISCVISD::XOR_VL;
5919 case ISD::VP_SELECT:
5920 case ISD::VP_MERGE:
5921 return RISCVISD::VMERGE_VL;
5922 case ISD::VP_ASHR:
5923 return RISCVISD::SRA_VL;
5924 case ISD::VP_LSHR:
5925 return RISCVISD::SRL_VL;
5926 case ISD::VP_SQRT:
5927 return RISCVISD::FSQRT_VL;
5928 case ISD::VP_SIGN_EXTEND:
5929 return RISCVISD::VSEXT_VL;
5930 case ISD::VP_ZERO_EXTEND:
5931 return RISCVISD::VZEXT_VL;
5932 case ISD::VP_FP_TO_SINT:
5934 case ISD::VP_FP_TO_UINT:
5936 case ISD::FMINNUM:
5937 case ISD::VP_FMINNUM:
5938 return RISCVISD::VFMIN_VL;
5939 case ISD::FMAXNUM:
5940 case ISD::VP_FMAXNUM:
5941 return RISCVISD::VFMAX_VL;
5942 case ISD::LRINT:
5943 case ISD::VP_LRINT:
5944 case ISD::LLRINT:
5945 case ISD::VP_LLRINT:
5947 }
5948 // clang-format on
5949#undef OP_CASE
5950#undef VP_CASE
5951}
5952
5953/// Return true if a RISC-V target specified op has a merge operand.
5954static bool hasMergeOp(unsigned Opcode) {
5955 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5957 "not a RISC-V target specific op");
5959 126 &&
5962 21 &&
5963 "adding target specific op should update this function");
5964 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5965 return true;
5966 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5967 return true;
5968 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5969 return true;
5970 if (Opcode == RISCVISD::SETCC_VL)
5971 return true;
5972 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5973 return true;
5974 if (Opcode == RISCVISD::VMERGE_VL)
5975 return true;
5976 return false;
5977}
5978
5979/// Return true if a RISC-V target specified op has a mask operand.
5980static bool hasMaskOp(unsigned Opcode) {
5981 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5983 "not a RISC-V target specific op");
5985 126 &&
5988 21 &&
5989 "adding target specific op should update this function");
5990 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5991 return true;
5992 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5993 return true;
5994 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5996 return true;
5997 return false;
5998}
5999
6001 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6002 SDLoc DL(Op);
6003
6006
6007 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6008 if (!Op.getOperand(j).getValueType().isVector()) {
6009 LoOperands[j] = Op.getOperand(j);
6010 HiOperands[j] = Op.getOperand(j);
6011 continue;
6012 }
6013 std::tie(LoOperands[j], HiOperands[j]) =
6014 DAG.SplitVector(Op.getOperand(j), DL);
6015 }
6016
6017 SDValue LoRes =
6018 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6019 SDValue HiRes =
6020 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6021
6022 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6023}
6024
6026 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6027 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6028 SDLoc DL(Op);
6029
6032
6033 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6034 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6035 std::tie(LoOperands[j], HiOperands[j]) =
6036 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6037 continue;
6038 }
6039 if (!Op.getOperand(j).getValueType().isVector()) {
6040 LoOperands[j] = Op.getOperand(j);
6041 HiOperands[j] = Op.getOperand(j);
6042 continue;
6043 }
6044 std::tie(LoOperands[j], HiOperands[j]) =
6045 DAG.SplitVector(Op.getOperand(j), DL);
6046 }
6047
6048 SDValue LoRes =
6049 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6050 SDValue HiRes =
6051 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6052
6053 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6054}
6055
6057 SDLoc DL(Op);
6058
6059 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6060 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6061 auto [EVLLo, EVLHi] =
6062 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6063
6064 SDValue ResLo =
6065 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6066 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6067 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6068 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6069}
6070
6072
6073 assert(Op->isStrictFPOpcode());
6074
6075 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6076
6077 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6078 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6079
6080 SDLoc DL(Op);
6081
6084
6085 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6086 if (!Op.getOperand(j).getValueType().isVector()) {
6087 LoOperands[j] = Op.getOperand(j);
6088 HiOperands[j] = Op.getOperand(j);
6089 continue;
6090 }
6091 std::tie(LoOperands[j], HiOperands[j]) =
6092 DAG.SplitVector(Op.getOperand(j), DL);
6093 }
6094
6095 SDValue LoRes =
6096 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6097 HiOperands[0] = LoRes.getValue(1);
6098 SDValue HiRes =
6099 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6100
6101 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6102 LoRes.getValue(0), HiRes.getValue(0));
6103 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6104}
6105
6107 SelectionDAG &DAG) const {
6108 switch (Op.getOpcode()) {
6109 default:
6110 report_fatal_error("unimplemented operand");
6111 case ISD::ATOMIC_FENCE:
6112 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6113 case ISD::GlobalAddress:
6114 return lowerGlobalAddress(Op, DAG);
6115 case ISD::BlockAddress:
6116 return lowerBlockAddress(Op, DAG);
6117 case ISD::ConstantPool:
6118 return lowerConstantPool(Op, DAG);
6119 case ISD::JumpTable:
6120 return lowerJumpTable(Op, DAG);
6122 return lowerGlobalTLSAddress(Op, DAG);
6123 case ISD::Constant:
6124 return lowerConstant(Op, DAG, Subtarget);
6125 case ISD::SELECT:
6126 return lowerSELECT(Op, DAG);
6127 case ISD::BRCOND:
6128 return lowerBRCOND(Op, DAG);
6129 case ISD::VASTART:
6130 return lowerVASTART(Op, DAG);
6131 case ISD::FRAMEADDR:
6132 return lowerFRAMEADDR(Op, DAG);
6133 case ISD::RETURNADDR:
6134 return lowerRETURNADDR(Op, DAG);
6135 case ISD::SADDO:
6136 case ISD::SSUBO:
6137 return lowerSADDO_SSUBO(Op, DAG);
6138 case ISD::SMULO:
6139 return lowerSMULO(Op, DAG);
6140 case ISD::SHL_PARTS:
6141 return lowerShiftLeftParts(Op, DAG);
6142 case ISD::SRA_PARTS:
6143 return lowerShiftRightParts(Op, DAG, true);
6144 case ISD::SRL_PARTS:
6145 return lowerShiftRightParts(Op, DAG, false);
6146 case ISD::ROTL:
6147 case ISD::ROTR:
6148 if (Op.getValueType().isFixedLengthVector()) {
6149 assert(Subtarget.hasStdExtZvkb());
6150 return lowerToScalableOp(Op, DAG);
6151 }
6152 assert(Subtarget.hasVendorXTHeadBb() &&
6153 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6154 "Unexpected custom legalization");
6155 // XTHeadBb only supports rotate by constant.
6156 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6157 return SDValue();
6158 return Op;
6159 case ISD::BITCAST: {
6160 SDLoc DL(Op);
6161 EVT VT = Op.getValueType();
6162 SDValue Op0 = Op.getOperand(0);
6163 EVT Op0VT = Op0.getValueType();
6164 MVT XLenVT = Subtarget.getXLenVT();
6165 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6166 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6167 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6168 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6169 return FPConv;
6170 }
6171 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6172 Subtarget.hasStdExtZfbfmin()) {
6173 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6174 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6175 return FPConv;
6176 }
6177 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6178 Subtarget.hasStdExtFOrZfinx()) {
6179 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6180 SDValue FPConv =
6181 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6182 return FPConv;
6183 }
6184 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6185 SDValue Lo, Hi;
6186 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6187 SDValue RetReg =
6188 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6189 return RetReg;
6190 }
6191
6192 // Consider other scalar<->scalar casts as legal if the types are legal.
6193 // Otherwise expand them.
6194 if (!VT.isVector() && !Op0VT.isVector()) {
6195 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6196 return Op;
6197 return SDValue();
6198 }
6199
6200 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6201 "Unexpected types");
6202
6203 if (VT.isFixedLengthVector()) {
6204 // We can handle fixed length vector bitcasts with a simple replacement
6205 // in isel.
6206 if (Op0VT.isFixedLengthVector())
6207 return Op;
6208 // When bitcasting from scalar to fixed-length vector, insert the scalar
6209 // into a one-element vector of the result type, and perform a vector
6210 // bitcast.
6211 if (!Op0VT.isVector()) {
6212 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6213 if (!isTypeLegal(BVT))
6214 return SDValue();
6215 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6216 DAG.getUNDEF(BVT), Op0,
6217 DAG.getVectorIdxConstant(0, DL)));
6218 }
6219 return SDValue();
6220 }
6221 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6222 // thus: bitcast the vector to a one-element vector type whose element type
6223 // is the same as the result type, and extract the first element.
6224 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6225 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6226 if (!isTypeLegal(BVT))
6227 return SDValue();
6228 SDValue BVec = DAG.getBitcast(BVT, Op0);
6229 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6230 DAG.getVectorIdxConstant(0, DL));
6231 }
6232 return SDValue();
6233 }
6235 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6237 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6239 return LowerINTRINSIC_VOID(Op, DAG);
6240 case ISD::IS_FPCLASS:
6241 return LowerIS_FPCLASS(Op, DAG);
6242 case ISD::BITREVERSE: {
6243 MVT VT = Op.getSimpleValueType();
6244 if (VT.isFixedLengthVector()) {
6245 assert(Subtarget.hasStdExtZvbb());
6246 return lowerToScalableOp(Op, DAG);
6247 }
6248 SDLoc DL(Op);
6249 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6250 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6251 // Expand bitreverse to a bswap(rev8) followed by brev8.
6252 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6253 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6254 }
6255 case ISD::TRUNCATE:
6256 // Only custom-lower vector truncates
6257 if (!Op.getSimpleValueType().isVector())
6258 return Op;
6259 return lowerVectorTruncLike(Op, DAG);
6260 case ISD::ANY_EXTEND:
6261 case ISD::ZERO_EXTEND:
6262 if (Op.getOperand(0).getValueType().isVector() &&
6263 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6264 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6265 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6266 case ISD::SIGN_EXTEND:
6267 if (Op.getOperand(0).getValueType().isVector() &&
6268 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6269 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6270 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6272 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6274 return lowerINSERT_VECTOR_ELT(Op, DAG);
6276 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6277 case ISD::SCALAR_TO_VECTOR: {
6278 MVT VT = Op.getSimpleValueType();
6279 SDLoc DL(Op);
6280 SDValue Scalar = Op.getOperand(0);
6281 if (VT.getVectorElementType() == MVT::i1) {
6282 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6283 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6284 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6285 }
6286 MVT ContainerVT = VT;
6287 if (VT.isFixedLengthVector())
6288 ContainerVT = getContainerForFixedLengthVector(VT);
6289 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6290 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6291 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6292 DAG.getUNDEF(ContainerVT), Scalar, VL);
6293 if (VT.isFixedLengthVector())
6294 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6295 return V;
6296 }
6297 case ISD::VSCALE: {
6298 MVT XLenVT = Subtarget.getXLenVT();
6299 MVT VT = Op.getSimpleValueType();
6300 SDLoc DL(Op);
6301 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6302 // We define our scalable vector types for lmul=1 to use a 64 bit known
6303 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6304 // vscale as VLENB / 8.
6305 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6306 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6307 report_fatal_error("Support for VLEN==32 is incomplete.");
6308 // We assume VLENB is a multiple of 8. We manually choose the best shift
6309 // here because SimplifyDemandedBits isn't always able to simplify it.
6310 uint64_t Val = Op.getConstantOperandVal(0);
6311 if (isPowerOf2_64(Val)) {
6312 uint64_t Log2 = Log2_64(Val);
6313 if (Log2 < 3)
6314 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6315 DAG.getConstant(3 - Log2, DL, VT));
6316 else if (Log2 > 3)
6317 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6318 DAG.getConstant(Log2 - 3, DL, XLenVT));
6319 } else if ((Val % 8) == 0) {
6320 // If the multiplier is a multiple of 8, scale it down to avoid needing
6321 // to shift the VLENB value.
6322 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6323 DAG.getConstant(Val / 8, DL, XLenVT));
6324 } else {
6325 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6326 DAG.getConstant(3, DL, XLenVT));
6327 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6328 DAG.getConstant(Val, DL, XLenVT));
6329 }
6330 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6331 }
6332 case ISD::FPOWI: {
6333 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6334 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6335 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6336 Op.getOperand(1).getValueType() == MVT::i32) {
6337 SDLoc DL(Op);
6338 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6339 SDValue Powi =
6340 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6341 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6342 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6343 }
6344 return SDValue();
6345 }
6346 case ISD::FMAXIMUM:
6347 case ISD::FMINIMUM:
6348 if (Op.getValueType() == MVT::nxv32f16 &&
6349 (Subtarget.hasVInstructionsF16Minimal() &&
6350 !Subtarget.hasVInstructionsF16()))
6351 return SplitVectorOp(Op, DAG);
6352 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6353 case ISD::FP_EXTEND: {
6354 SDLoc DL(Op);
6355 EVT VT = Op.getValueType();
6356 SDValue Op0 = Op.getOperand(0);
6357 EVT Op0VT = Op0.getValueType();
6358 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6359 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6360 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6361 SDValue FloatVal =
6362 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6363 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6364 }
6365
6366 if (!Op.getValueType().isVector())
6367 return Op;
6368 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6369 }
6370 case ISD::FP_ROUND: {
6371 SDLoc DL(Op);
6372 EVT VT = Op.getValueType();
6373 SDValue Op0 = Op.getOperand(0);
6374 EVT Op0VT = Op0.getValueType();
6375 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6376 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6377 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6378 Subtarget.hasStdExtDOrZdinx()) {
6379 SDValue FloatVal =
6380 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6381 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6382 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6383 }
6384
6385 if (!Op.getValueType().isVector())
6386 return Op;
6387 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6388 }
6391 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6392 case ISD::SINT_TO_FP:
6393 case ISD::UINT_TO_FP:
6394 if (Op.getValueType().isVector() &&
6395 Op.getValueType().getScalarType() == MVT::f16 &&
6396 (Subtarget.hasVInstructionsF16Minimal() &&
6397 !Subtarget.hasVInstructionsF16())) {
6398 if (Op.getValueType() == MVT::nxv32f16)
6399 return SplitVectorOp(Op, DAG);
6400 // int -> f32
6401 SDLoc DL(Op);
6402 MVT NVT =
6403 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6404 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6405 // f32 -> f16
6406 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6407 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6408 }
6409 [[fallthrough]];
6410 case ISD::FP_TO_SINT:
6411 case ISD::FP_TO_UINT:
6412 if (SDValue Op1 = Op.getOperand(0);
6413 Op1.getValueType().isVector() &&
6414 Op1.getValueType().getScalarType() == MVT::f16 &&
6415 (Subtarget.hasVInstructionsF16Minimal() &&
6416 !Subtarget.hasVInstructionsF16())) {
6417 if (Op1.getValueType() == MVT::nxv32f16)
6418 return SplitVectorOp(Op, DAG);
6419 // f16 -> f32
6420 SDLoc DL(Op);
6421 MVT NVT = MVT::getVectorVT(MVT::f32,
6422 Op1.getValueType().getVectorElementCount());
6423 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6424 // f32 -> int
6425 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6426 }
6427 [[fallthrough]];
6432 // RVV can only do fp<->int conversions to types half/double the size as
6433 // the source. We custom-lower any conversions that do two hops into
6434 // sequences.
6435 MVT VT = Op.getSimpleValueType();
6436 if (!VT.isVector())
6437 return Op;
6438 SDLoc DL(Op);
6439 bool IsStrict = Op->isStrictFPOpcode();
6440 SDValue Src = Op.getOperand(0 + IsStrict);
6441 MVT EltVT = VT.getVectorElementType();
6442 MVT SrcVT = Src.getSimpleValueType();
6443 MVT SrcEltVT = SrcVT.getVectorElementType();
6444 unsigned EltSize = EltVT.getSizeInBits();
6445 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6446 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6447 "Unexpected vector element types");
6448
6449 bool IsInt2FP = SrcEltVT.isInteger();
6450 // Widening conversions
6451 if (EltSize > (2 * SrcEltSize)) {
6452 if (IsInt2FP) {
6453 // Do a regular integer sign/zero extension then convert to float.
6454 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6456 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6457 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6460 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6461 if (IsStrict)
6462 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6463 Op.getOperand(0), Ext);
6464 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6465 }
6466 // FP2Int
6467 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6468 // Do one doubling fp_extend then complete the operation by converting
6469 // to int.
6470 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6471 if (IsStrict) {
6472 auto [FExt, Chain] =
6473 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6474 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6475 }
6476 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6477 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6478 }
6479
6480 // Narrowing conversions
6481 if (SrcEltSize > (2 * EltSize)) {
6482 if (IsInt2FP) {
6483 // One narrowing int_to_fp, then an fp_round.
6484 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6485 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6486 if (IsStrict) {
6487 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6488 DAG.getVTList(InterimFVT, MVT::Other),
6489 Op.getOperand(0), Src);
6490 SDValue Chain = Int2FP.getValue(1);
6491 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6492 }
6493 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6494 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6495 }
6496 // FP2Int
6497 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6498 // representable by the integer, the result is poison.
6499 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6501 if (IsStrict) {
6502 SDValue FP2Int =
6503 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6504 Op.getOperand(0), Src);
6505 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6506 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6507 }
6508 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6509 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6510 }
6511
6512 // Scalable vectors can exit here. Patterns will handle equally-sized
6513 // conversions halving/doubling ones.
6514 if (!VT.isFixedLengthVector())
6515 return Op;
6516
6517 // For fixed-length vectors we lower to a custom "VL" node.
6518 unsigned RVVOpc = 0;
6519 switch (Op.getOpcode()) {
6520 default:
6521 llvm_unreachable("Impossible opcode");
6522 case ISD::FP_TO_SINT:
6524 break;
6525 case ISD::FP_TO_UINT:
6527 break;
6528 case ISD::SINT_TO_FP:
6529 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6530 break;
6531 case ISD::UINT_TO_FP:
6532 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6533 break;
6536 break;
6539 break;
6542 break;
6545 break;
6546 }
6547
6548 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6549 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6550 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6551 "Expected same element count");
6552
6553 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6554
6555 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6556 if (IsStrict) {
6557 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6558 Op.getOperand(0), Src, Mask, VL);
6559 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6560 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6561 }
6562 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6563 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6564 }
6567 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6568 case ISD::FP_TO_BF16: {
6569 // Custom lower to ensure the libcall return is passed in an FPR on hard
6570 // float ABIs.
6571 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6572 SDLoc DL(Op);
6573 MakeLibCallOptions CallOptions;
6574 RTLIB::Libcall LC =
6575 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6576 SDValue Res =
6577 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6578 if (Subtarget.is64Bit() && !RV64LegalI32)
6579 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6580 return DAG.getBitcast(MVT::i32, Res);
6581 }
6582 case ISD::BF16_TO_FP: {
6583 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6584 MVT VT = Op.getSimpleValueType();
6585 SDLoc DL(Op);
6586 Op = DAG.getNode(
6587 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6588 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6589 SDValue Res = Subtarget.is64Bit()
6590 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6591 : DAG.getBitcast(MVT::f32, Op);
6592 // fp_extend if the target VT is bigger than f32.
6593 if (VT != MVT::f32)
6594 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6595 return Res;
6596 }
6597 case ISD::FP_TO_FP16: {
6598 // Custom lower to ensure the libcall return is passed in an FPR on hard
6599 // float ABIs.
6600 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6601 SDLoc DL(Op);
6602 MakeLibCallOptions CallOptions;
6603 RTLIB::Libcall LC =
6604 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6605 SDValue Res =
6606 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6607 if (Subtarget.is64Bit() && !RV64LegalI32)
6608 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6609 return DAG.getBitcast(MVT::i32, Res);
6610 }
6611 case ISD::FP16_TO_FP: {
6612 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6613 // float ABIs.
6614 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6615 SDLoc DL(Op);
6616 MakeLibCallOptions CallOptions;
6617 SDValue Arg = Subtarget.is64Bit()
6618 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6619 Op.getOperand(0))
6620 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6621 SDValue Res =
6622 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6623 .first;
6624 return Res;
6625 }
6626 case ISD::FTRUNC:
6627 case ISD::FCEIL:
6628 case ISD::FFLOOR:
6629 case ISD::FNEARBYINT:
6630 case ISD::FRINT:
6631 case ISD::FROUND:
6632 case ISD::FROUNDEVEN:
6633 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6634 case ISD::LRINT:
6635 case ISD::LLRINT:
6636 return lowerVectorXRINT(Op, DAG, Subtarget);
6637 case ISD::VECREDUCE_ADD:
6642 return lowerVECREDUCE(Op, DAG);
6643 case ISD::VECREDUCE_AND:
6644 case ISD::VECREDUCE_OR:
6645 case ISD::VECREDUCE_XOR:
6646 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6647 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6648 return lowerVECREDUCE(Op, DAG);
6655 return lowerFPVECREDUCE(Op, DAG);
6656 case ISD::VP_REDUCE_ADD:
6657 case ISD::VP_REDUCE_UMAX:
6658 case ISD::VP_REDUCE_SMAX:
6659 case ISD::VP_REDUCE_UMIN:
6660 case ISD::VP_REDUCE_SMIN:
6661 case ISD::VP_REDUCE_FADD:
6662 case ISD::VP_REDUCE_SEQ_FADD:
6663 case ISD::VP_REDUCE_FMIN:
6664 case ISD::VP_REDUCE_FMAX:
6665 case ISD::VP_REDUCE_FMINIMUM:
6666 case ISD::VP_REDUCE_FMAXIMUM:
6667 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6668 (Subtarget.hasVInstructionsF16Minimal() &&
6669 !Subtarget.hasVInstructionsF16()))
6670 return SplitVectorReductionOp(Op, DAG);
6671 return lowerVPREDUCE(Op, DAG);
6672 case ISD::VP_REDUCE_AND:
6673 case ISD::VP_REDUCE_OR:
6674 case ISD::VP_REDUCE_XOR:
6675 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6676 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6677 return lowerVPREDUCE(Op, DAG);
6678 case ISD::VP_CTTZ_ELTS:
6679 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6680 return lowerVPCttzElements(Op, DAG);
6681 case ISD::UNDEF: {
6682 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6683 return convertFromScalableVector(Op.getSimpleValueType(),
6684 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6685 }
6687 return lowerINSERT_SUBVECTOR(Op, DAG);
6689 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6691 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6693 return lowerVECTOR_INTERLEAVE(Op, DAG);
6694 case ISD::STEP_VECTOR:
6695 return lowerSTEP_VECTOR(Op, DAG);
6697 return lowerVECTOR_REVERSE(Op, DAG);
6698 case ISD::VECTOR_SPLICE:
6699 return lowerVECTOR_SPLICE(Op, DAG);
6700 case ISD::BUILD_VECTOR:
6701 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6702 case ISD::SPLAT_VECTOR:
6703 if (Op.getValueType().getScalarType() == MVT::f16 &&
6704 (Subtarget.hasVInstructionsF16Minimal() &&
6705 !Subtarget.hasVInstructionsF16())) {
6706 if (Op.getValueType() == MVT::nxv32f16)
6707 return SplitVectorOp(Op, DAG);
6708 SDLoc DL(Op);
6709 SDValue NewScalar =
6710 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6711 SDValue NewSplat = DAG.getNode(
6713 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6714 NewScalar);
6715 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6716 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6717 }
6718 if (Op.getValueType().getVectorElementType() == MVT::i1)
6719 return lowerVectorMaskSplat(Op, DAG);
6720 return SDValue();
6722 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6723 case ISD::CONCAT_VECTORS: {
6724 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6725 // better than going through the stack, as the default expansion does.
6726 SDLoc DL(Op);
6727 MVT VT = Op.getSimpleValueType();
6728 MVT ContainerVT = VT;
6729 if (VT.isFixedLengthVector())
6730 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6731
6732 // Recursively split concat_vectors with more than 2 operands:
6733 //
6734 // concat_vector op1, op2, op3, op4
6735 // ->
6736 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6737 //
6738 // This reduces the length of the chain of vslideups and allows us to
6739 // perform the vslideups at a smaller LMUL, limited to MF2.
6740 if (Op.getNumOperands() > 2 &&
6741 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6742 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6744 size_t HalfNumOps = Op.getNumOperands() / 2;
6745 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6746 Op->ops().take_front(HalfNumOps));
6747 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6748 Op->ops().drop_front(HalfNumOps));
6749 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6750 }
6751
6752 unsigned NumOpElts =
6753 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6754 SDValue Vec = DAG.getUNDEF(VT);
6755 for (const auto &OpIdx : enumerate(Op->ops())) {
6756 SDValue SubVec = OpIdx.value();
6757 // Don't insert undef subvectors.
6758 if (SubVec.isUndef())
6759 continue;
6760 Vec =
6761 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6762 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6763 }
6764 return Vec;
6765 }
6766 case ISD::LOAD:
6767 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6768 return V;
6769 if (Op.getValueType().isFixedLengthVector())
6770 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6771 return Op;
6772 case ISD::STORE:
6773 if (auto V = expandUnalignedRVVStore(Op, DAG))
6774 return V;
6775 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6776 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6777 return Op;
6778 case ISD::MLOAD:
6779 case ISD::VP_LOAD:
6780 return lowerMaskedLoad(Op, DAG);
6781 case ISD::MSTORE:
6782 case ISD::VP_STORE:
6783 return lowerMaskedStore(Op, DAG);
6784 case ISD::SELECT_CC: {
6785 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6786 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6787 // into separate SETCC+SELECT just like LegalizeDAG.
6788 SDValue Tmp1 = Op.getOperand(0);
6789 SDValue Tmp2 = Op.getOperand(1);
6790 SDValue True = Op.getOperand(2);
6791 SDValue False = Op.getOperand(3);
6792 EVT VT = Op.getValueType();
6793 SDValue CC = Op.getOperand(4);
6794 EVT CmpVT = Tmp1.getValueType();
6795 EVT CCVT =
6796 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6797 SDLoc DL(Op);
6798 SDValue Cond =
6799 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6800 return DAG.getSelect(DL, VT, Cond, True, False);
6801 }
6802 case ISD::SETCC: {
6803 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6804 if (OpVT.isScalarInteger()) {
6805 MVT VT = Op.getSimpleValueType();
6806 SDValue LHS = Op.getOperand(0);
6807 SDValue RHS = Op.getOperand(1);
6808 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6809 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6810 "Unexpected CondCode");
6811
6812 SDLoc DL(Op);
6813
6814 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6815 // convert this to the equivalent of (set(u)ge X, C+1) by using
6816 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6817 // in a register.
6818 if (isa<ConstantSDNode>(RHS)) {
6819 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6820 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6821 // If this is an unsigned compare and the constant is -1, incrementing
6822 // the constant would change behavior. The result should be false.
6823 if (CCVal == ISD::SETUGT && Imm == -1)
6824 return DAG.getConstant(0, DL, VT);
6825 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6826 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6827 SDValue SetCC = DAG.getSetCC(
6828 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6829 return DAG.getLogicalNOT(DL, SetCC, VT);
6830 }
6831 }
6832
6833 // Not a constant we could handle, swap the operands and condition code to
6834 // SETLT/SETULT.
6835 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6836 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6837 }
6838
6839 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6840 (Subtarget.hasVInstructionsF16Minimal() &&
6841 !Subtarget.hasVInstructionsF16()))
6842 return SplitVectorOp(Op, DAG);
6843
6844 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6845 }
6846 case ISD::ADD:
6847 case ISD::SUB:
6848 case ISD::MUL:
6849 case ISD::MULHS:
6850 case ISD::MULHU:
6851 case ISD::AND:
6852 case ISD::OR:
6853 case ISD::XOR:
6854 case ISD::SDIV:
6855 case ISD::SREM:
6856 case ISD::UDIV:
6857 case ISD::UREM:
6858 case ISD::BSWAP:
6859 case ISD::CTPOP:
6860 return lowerToScalableOp(Op, DAG);
6861 case ISD::SHL:
6862 case ISD::SRA:
6863 case ISD::SRL:
6864 if (Op.getSimpleValueType().isFixedLengthVector())
6865 return lowerToScalableOp(Op, DAG);
6866 // This can be called for an i32 shift amount that needs to be promoted.
6867 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6868 "Unexpected custom legalisation");
6869 return SDValue();
6870 case ISD::FADD:
6871 case ISD::FSUB:
6872 case ISD::FMUL:
6873 case ISD::FDIV:
6874 case ISD::FNEG:
6875 case ISD::FABS:
6876 case ISD::FSQRT:
6877 case ISD::FMA:
6878 case ISD::FMINNUM:
6879 case ISD::FMAXNUM:
6880 if (Op.getValueType() == MVT::nxv32f16 &&
6881 (Subtarget.hasVInstructionsF16Minimal() &&
6882 !Subtarget.hasVInstructionsF16()))
6883 return SplitVectorOp(Op, DAG);
6884 [[fallthrough]];
6885 case ISD::AVGFLOORU:
6886 case ISD::AVGCEILU:
6887 case ISD::SMIN:
6888 case ISD::SMAX:
6889 case ISD::UMIN:
6890 case ISD::UMAX:
6891 return lowerToScalableOp(Op, DAG);
6892 case ISD::UADDSAT:
6893 case ISD::USUBSAT:
6894 if (!Op.getValueType().isVector())
6895 return lowerUADDSAT_USUBSAT(Op, DAG);
6896 return lowerToScalableOp(Op, DAG);
6897 case ISD::SADDSAT:
6898 case ISD::SSUBSAT:
6899 if (!Op.getValueType().isVector())
6900 return lowerSADDSAT_SSUBSAT(Op, DAG);
6901 return lowerToScalableOp(Op, DAG);
6902 case ISD::ABDS:
6903 case ISD::ABDU: {
6904 SDLoc dl(Op);
6905 EVT VT = Op->getValueType(0);
6906 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6907 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6908 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6909
6910 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6911 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6912 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6913 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6914 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6915 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6916 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6917 }
6918 case ISD::ABS:
6919 case ISD::VP_ABS:
6920 return lowerABS(Op, DAG);
6921 case ISD::CTLZ:
6923 case ISD::CTTZ:
6925 if (Subtarget.hasStdExtZvbb())
6926 return lowerToScalableOp(Op, DAG);
6927 assert(Op.getOpcode() != ISD::CTTZ);
6928 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6929 case ISD::VSELECT:
6930 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6931 case ISD::FCOPYSIGN:
6932 if (Op.getValueType() == MVT::nxv32f16 &&
6933 (Subtarget.hasVInstructionsF16Minimal() &&
6934 !Subtarget.hasVInstructionsF16()))
6935 return SplitVectorOp(Op, DAG);
6936 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6937 case ISD::STRICT_FADD:
6938 case ISD::STRICT_FSUB:
6939 case ISD::STRICT_FMUL:
6940 case ISD::STRICT_FDIV:
6941 case ISD::STRICT_FSQRT:
6942 case ISD::STRICT_FMA:
6943 if (Op.getValueType() == MVT::nxv32f16 &&
6944 (Subtarget.hasVInstructionsF16Minimal() &&
6945 !Subtarget.hasVInstructionsF16()))
6946 return SplitStrictFPVectorOp(Op, DAG);
6947 return lowerToScalableOp(Op, DAG);
6948 case ISD::STRICT_FSETCC:
6950 return lowerVectorStrictFSetcc(Op, DAG);
6951 case ISD::STRICT_FCEIL:
6952 case ISD::STRICT_FRINT:
6953 case ISD::STRICT_FFLOOR:
6954 case ISD::STRICT_FTRUNC:
6956 case ISD::STRICT_FROUND:
6958 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6959 case ISD::MGATHER:
6960 case ISD::VP_GATHER:
6961 return lowerMaskedGather(Op, DAG);
6962 case ISD::MSCATTER:
6963 case ISD::VP_SCATTER:
6964 return lowerMaskedScatter(Op, DAG);
6965 case ISD::GET_ROUNDING:
6966 return lowerGET_ROUNDING(Op, DAG);
6967 case ISD::SET_ROUNDING:
6968 return lowerSET_ROUNDING(Op, DAG);
6969 case ISD::EH_DWARF_CFA:
6970 return lowerEH_DWARF_CFA(Op, DAG);
6971 case ISD::VP_SELECT:
6972 case ISD::VP_MERGE:
6973 case ISD::VP_ADD:
6974 case ISD::VP_SUB:
6975 case ISD::VP_MUL:
6976 case ISD::VP_SDIV:
6977 case ISD::VP_UDIV:
6978 case ISD::VP_SREM:
6979 case ISD::VP_UREM:
6980 case ISD::VP_UADDSAT:
6981 case ISD::VP_USUBSAT:
6982 case ISD::VP_SADDSAT:
6983 case ISD::VP_SSUBSAT:
6984 case ISD::VP_LRINT:
6985 case ISD::VP_LLRINT:
6986 return lowerVPOp(Op, DAG);
6987 case ISD::VP_AND:
6988 case ISD::VP_OR:
6989 case ISD::VP_XOR:
6990 return lowerLogicVPOp(Op, DAG);
6991 case ISD::VP_FADD:
6992 case ISD::VP_FSUB:
6993 case ISD::VP_FMUL:
6994 case ISD::VP_FDIV:
6995 case ISD::VP_FNEG:
6996 case ISD::VP_FABS:
6997 case ISD::VP_SQRT:
6998 case ISD::VP_FMA:
6999 case ISD::VP_FMINNUM:
7000 case ISD::VP_FMAXNUM:
7001 case ISD::VP_FCOPYSIGN:
7002 if (Op.getValueType() == MVT::nxv32f16 &&
7003 (Subtarget.hasVInstructionsF16Minimal() &&
7004 !Subtarget.hasVInstructionsF16()))
7005 return SplitVPOp(Op, DAG);
7006 [[fallthrough]];
7007 case ISD::VP_ASHR:
7008 case ISD::VP_LSHR:
7009 case ISD::VP_SHL:
7010 return lowerVPOp(Op, DAG);
7011 case ISD::VP_IS_FPCLASS:
7012 return LowerIS_FPCLASS(Op, DAG);
7013 case ISD::VP_SIGN_EXTEND:
7014 case ISD::VP_ZERO_EXTEND:
7015 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7016 return lowerVPExtMaskOp(Op, DAG);
7017 return lowerVPOp(Op, DAG);
7018 case ISD::VP_TRUNCATE:
7019 return lowerVectorTruncLike(Op, DAG);
7020 case ISD::VP_FP_EXTEND:
7021 case ISD::VP_FP_ROUND:
7022 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7023 case ISD::VP_SINT_TO_FP:
7024 case ISD::VP_UINT_TO_FP:
7025 if (Op.getValueType().isVector() &&
7026 Op.getValueType().getScalarType() == MVT::f16 &&
7027 (Subtarget.hasVInstructionsF16Minimal() &&
7028 !Subtarget.hasVInstructionsF16())) {
7029 if (Op.getValueType() == MVT::nxv32f16)
7030 return SplitVPOp(Op, DAG);
7031 // int -> f32
7032 SDLoc DL(Op);
7033 MVT NVT =
7034 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7035 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7036 // f32 -> f16
7037 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7038 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7039 }
7040 [[fallthrough]];
7041 case ISD::VP_FP_TO_SINT:
7042 case ISD::VP_FP_TO_UINT:
7043 if (SDValue Op1 = Op.getOperand(0);
7044 Op1.getValueType().isVector() &&
7045 Op1.getValueType().getScalarType() == MVT::f16 &&
7046 (Subtarget.hasVInstructionsF16Minimal() &&
7047 !Subtarget.hasVInstructionsF16())) {
7048 if (Op1.getValueType() == MVT::nxv32f16)
7049 return SplitVPOp(Op, DAG);
7050 // f16 -> f32
7051 SDLoc DL(Op);
7052 MVT NVT = MVT::getVectorVT(MVT::f32,
7053 Op1.getValueType().getVectorElementCount());
7054 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7055 // f32 -> int
7056 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7057 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7058 }
7059 return lowerVPFPIntConvOp(Op, DAG);
7060 case ISD::VP_SETCC:
7061 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7062 (Subtarget.hasVInstructionsF16Minimal() &&
7063 !Subtarget.hasVInstructionsF16()))
7064 return SplitVPOp(Op, DAG);
7065 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7066 return lowerVPSetCCMaskOp(Op, DAG);
7067 [[fallthrough]];
7068 case ISD::VP_SMIN:
7069 case ISD::VP_SMAX:
7070 case ISD::VP_UMIN:
7071 case ISD::VP_UMAX:
7072 case ISD::VP_BITREVERSE:
7073 case ISD::VP_BSWAP:
7074 return lowerVPOp(Op, DAG);
7075 case ISD::VP_CTLZ:
7076 case ISD::VP_CTLZ_ZERO_UNDEF:
7077 if (Subtarget.hasStdExtZvbb())
7078 return lowerVPOp(Op, DAG);
7079 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7080 case ISD::VP_CTTZ:
7081 case ISD::VP_CTTZ_ZERO_UNDEF:
7082 if (Subtarget.hasStdExtZvbb())
7083 return lowerVPOp(Op, DAG);
7084 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7085 case ISD::VP_CTPOP:
7086 return lowerVPOp(Op, DAG);
7087 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7088 return lowerVPStridedLoad(Op, DAG);
7089 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7090 return lowerVPStridedStore(Op, DAG);
7091 case ISD::VP_FCEIL:
7092 case ISD::VP_FFLOOR:
7093 case ISD::VP_FRINT:
7094 case ISD::VP_FNEARBYINT:
7095 case ISD::VP_FROUND:
7096 case ISD::VP_FROUNDEVEN:
7097 case ISD::VP_FROUNDTOZERO:
7098 if (Op.getValueType() == MVT::nxv32f16 &&
7099 (Subtarget.hasVInstructionsF16Minimal() &&
7100 !Subtarget.hasVInstructionsF16()))
7101 return SplitVPOp(Op, DAG);
7102 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7103 case ISD::VP_FMAXIMUM:
7104 case ISD::VP_FMINIMUM:
7105 if (Op.getValueType() == MVT::nxv32f16 &&
7106 (Subtarget.hasVInstructionsF16Minimal() &&
7107 !Subtarget.hasVInstructionsF16()))
7108 return SplitVPOp(Op, DAG);
7109 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7110 case ISD::EXPERIMENTAL_VP_SPLICE:
7111 return lowerVPSpliceExperimental(Op, DAG);
7112 case ISD::EXPERIMENTAL_VP_REVERSE:
7113 return lowerVPReverseExperimental(Op, DAG);
7114 }
7115}
7116
7118 SelectionDAG &DAG, unsigned Flags) {
7119 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7120}
7121
7123 SelectionDAG &DAG, unsigned Flags) {
7124 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7125 Flags);
7126}
7127
7129 SelectionDAG &DAG, unsigned Flags) {
7130 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7131 N->getOffset(), Flags);
7132}
7133
7135 SelectionDAG &DAG, unsigned Flags) {
7136 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7137}
7138
7139template <class NodeTy>
7140SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7141 bool IsLocal, bool IsExternWeak) const {
7142 SDLoc DL(N);
7143 EVT Ty = getPointerTy(DAG.getDataLayout());
7144
7145 // When HWASAN is used and tagging of global variables is enabled
7146 // they should be accessed via the GOT, since the tagged address of a global
7147 // is incompatible with existing code models. This also applies to non-pic
7148 // mode.
7149 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7150 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7151 if (IsLocal && !Subtarget.allowTaggedGlobals())
7152 // Use PC-relative addressing to access the symbol. This generates the
7153 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7154 // %pcrel_lo(auipc)).
7155 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7156
7157 // Use PC-relative addressing to access the GOT for this symbol, then load
7158 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7159 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7160 SDValue Load =
7161 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7167 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7168 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7169 return Load;
7170 }
7171
7172 switch (getTargetMachine().getCodeModel()) {
7173 default:
7174 report_fatal_error("Unsupported code model for lowering");
7175 case CodeModel::Small: {
7176 // Generate a sequence for accessing addresses within the first 2 GiB of
7177 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7178 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7179 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7180 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7181 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7182 }
7183 case CodeModel::Medium: {
7184 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7185 if (IsExternWeak) {
7186 // An extern weak symbol may be undefined, i.e. have value 0, which may
7187 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7188 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7189 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7190 SDValue Load =
7191 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7197 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7198 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7199 return Load;
7200 }
7201
7202 // Generate a sequence for accessing addresses within any 2GiB range within
7203 // the address space. This generates the pattern (PseudoLLA sym), which
7204 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7205 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7206 }
7207 }
7208}
7209
7210SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7211 SelectionDAG &DAG) const {
7212 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7213 assert(N->getOffset() == 0 && "unexpected offset in global node");
7214 const GlobalValue *GV = N->getGlobal();
7215 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7216}
7217
7218SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7219 SelectionDAG &DAG) const {
7220 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7221
7222 return getAddr(N, DAG);
7223}
7224
7225SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7226 SelectionDAG &DAG) const {
7227 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7228
7229 return getAddr(N, DAG);
7230}
7231
7232SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7233 SelectionDAG &DAG) const {
7234 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7235
7236 return getAddr(N, DAG);
7237}
7238
7239SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7240 SelectionDAG &DAG,
7241 bool UseGOT) const {
7242 SDLoc DL(N);
7243 EVT Ty = getPointerTy(DAG.getDataLayout());
7244 const GlobalValue *GV = N->getGlobal();
7245 MVT XLenVT = Subtarget.getXLenVT();
7246
7247 if (UseGOT) {
7248 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7249 // load the address from the GOT and add the thread pointer. This generates
7250 // the pattern (PseudoLA_TLS_IE sym), which expands to
7251 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7252 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7253 SDValue Load =
7254 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7260 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7261 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7262
7263 // Add the thread pointer.
7264 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7265 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7266 }
7267
7268 // Generate a sequence for accessing the address relative to the thread
7269 // pointer, with the appropriate adjustment for the thread pointer offset.
7270 // This generates the pattern
7271 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7272 SDValue AddrHi =
7274 SDValue AddrAdd =
7276 SDValue AddrLo =
7278
7279 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7280 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7281 SDValue MNAdd =
7282 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7283 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7284}
7285
7286SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7287 SelectionDAG &DAG) const {
7288 SDLoc DL(N);
7289 EVT Ty = getPointerTy(DAG.getDataLayout());
7290 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7291 const GlobalValue *GV = N->getGlobal();
7292
7293 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7294 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7295 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7296 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7297 SDValue Load =
7298 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7299
7300 // Prepare argument list to generate call.
7302 ArgListEntry Entry;
7303 Entry.Node = Load;
7304 Entry.Ty = CallTy;
7305 Args.push_back(Entry);
7306
7307 // Setup call to __tls_get_addr.
7309 CLI.setDebugLoc(DL)
7310 .setChain(DAG.getEntryNode())
7311 .setLibCallee(CallingConv::C, CallTy,
7312 DAG.getExternalSymbol("__tls_get_addr", Ty),
7313 std::move(Args));
7314
7315 return LowerCallTo(CLI).first;
7316}
7317
7318SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7319 SelectionDAG &DAG) const {
7320 SDLoc DL(N);
7321 EVT Ty = getPointerTy(DAG.getDataLayout());
7322 const GlobalValue *GV = N->getGlobal();
7323
7324 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7325 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7326 //
7327 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7328 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7329 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7330 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7331 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7332 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7333}
7334
7335SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7336 SelectionDAG &DAG) const {
7337 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7338 assert(N->getOffset() == 0 && "unexpected offset in global node");
7339
7340 if (DAG.getTarget().useEmulatedTLS())
7341 return LowerToTLSEmulatedModel(N, DAG);
7342
7344
7347 report_fatal_error("In GHC calling convention TLS is not supported");
7348
7349 SDValue Addr;
7350 switch (Model) {
7352 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7353 break;
7355 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7356 break;
7359 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7360 : getDynamicTLSAddr(N, DAG);
7361 break;
7362 }
7363
7364 return Addr;
7365}
7366
7367// Return true if Val is equal to (setcc LHS, RHS, CC).
7368// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7369// Otherwise, return std::nullopt.
7370static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7371 ISD::CondCode CC, SDValue Val) {
7372 assert(Val->getOpcode() == ISD::SETCC);
7373 SDValue LHS2 = Val.getOperand(0);
7374 SDValue RHS2 = Val.getOperand(1);
7375 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7376
7377 if (LHS == LHS2 && RHS == RHS2) {
7378 if (CC == CC2)
7379 return true;
7380 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7381 return false;
7382 } else if (LHS == RHS2 && RHS == LHS2) {
7384 if (CC == CC2)
7385 return true;
7386 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7387 return false;
7388 }
7389
7390 return std::nullopt;
7391}
7392
7394 const RISCVSubtarget &Subtarget) {
7395 SDValue CondV = N->getOperand(0);
7396 SDValue TrueV = N->getOperand(1);
7397 SDValue FalseV = N->getOperand(2);
7398 MVT VT = N->getSimpleValueType(0);
7399 SDLoc DL(N);
7400
7401 if (!Subtarget.hasConditionalMoveFusion()) {
7402 // (select c, -1, y) -> -c | y
7403 if (isAllOnesConstant(TrueV)) {
7404 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7405 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7406 }
7407 // (select c, y, -1) -> (c-1) | y
7408 if (isAllOnesConstant(FalseV)) {
7409 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7410 DAG.getAllOnesConstant(DL, VT));
7411 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7412 }
7413
7414 // (select c, 0, y) -> (c-1) & y
7415 if (isNullConstant(TrueV)) {
7416 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7417 DAG.getAllOnesConstant(DL, VT));
7418 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7419 }
7420 // (select c, y, 0) -> -c & y
7421 if (isNullConstant(FalseV)) {
7422 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7423 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7424 }
7425 }
7426
7427 // select c, ~x, x --> xor -c, x
7428 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7429 const APInt &TrueVal = TrueV->getAsAPIntVal();
7430 const APInt &FalseVal = FalseV->getAsAPIntVal();
7431 if (~TrueVal == FalseVal) {
7432 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7433 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7434 }
7435 }
7436
7437 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7438 // when both truev and falsev are also setcc.
7439 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7440 FalseV.getOpcode() == ISD::SETCC) {
7441 SDValue LHS = CondV.getOperand(0);
7442 SDValue RHS = CondV.getOperand(1);
7443 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7444
7445 // (select x, x, y) -> x | y
7446 // (select !x, x, y) -> x & y
7447 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7448 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7449 DAG.getFreeze(FalseV));
7450 }
7451 // (select x, y, x) -> x & y
7452 // (select !x, y, x) -> x | y
7453 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7454 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7455 DAG.getFreeze(TrueV), FalseV);
7456 }
7457 }
7458
7459 return SDValue();
7460}
7461
7462// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7463// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7464// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7465// being `0` or `-1`. In such cases we can replace `select` with `and`.
7466// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7467// than `c0`?
7468static SDValue
7470 const RISCVSubtarget &Subtarget) {
7471 if (Subtarget.hasShortForwardBranchOpt())
7472 return SDValue();
7473
7474 unsigned SelOpNo = 0;
7475 SDValue Sel = BO->getOperand(0);
7476 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7477 SelOpNo = 1;
7478 Sel = BO->getOperand(1);
7479 }
7480
7481 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7482 return SDValue();
7483
7484 unsigned ConstSelOpNo = 1;
7485 unsigned OtherSelOpNo = 2;
7486 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7487 ConstSelOpNo = 2;
7488 OtherSelOpNo = 1;
7489 }
7490 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7491 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7492 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7493 return SDValue();
7494
7495 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7496 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7497 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7498 return SDValue();
7499
7500 SDLoc DL(Sel);
7501 EVT VT = BO->getValueType(0);
7502
7503 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7504 if (SelOpNo == 1)
7505 std::swap(NewConstOps[0], NewConstOps[1]);
7506
7507 SDValue NewConstOp =
7508 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7509 if (!NewConstOp)
7510 return SDValue();
7511
7512 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7513 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7514 return SDValue();
7515
7516 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7517 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7518 if (SelOpNo == 1)
7519 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7520 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7521
7522 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7523 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7524 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7525}
7526
7527SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7528 SDValue CondV = Op.getOperand(0);
7529 SDValue TrueV = Op.getOperand(1);
7530 SDValue FalseV = Op.getOperand(2);
7531 SDLoc DL(Op);
7532 MVT VT = Op.getSimpleValueType();
7533 MVT XLenVT = Subtarget.getXLenVT();
7534
7535 // Lower vector SELECTs to VSELECTs by splatting the condition.
7536 if (VT.isVector()) {
7537 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7538 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7539 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7540 }
7541
7542 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7543 // nodes to implement the SELECT. Performing the lowering here allows for
7544 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7545 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7546 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7547 VT.isScalarInteger()) {
7548 // (select c, t, 0) -> (czero_eqz t, c)
7549 if (isNullConstant(FalseV))
7550 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7551 // (select c, 0, f) -> (czero_nez f, c)
7552 if (isNullConstant(TrueV))
7553 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7554
7555 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7556 if (TrueV.getOpcode() == ISD::AND &&
7557 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7558 return DAG.getNode(
7559 ISD::OR, DL, VT, TrueV,
7560 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7561 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7562 if (FalseV.getOpcode() == ISD::AND &&
7563 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7564 return DAG.getNode(
7565 ISD::OR, DL, VT, FalseV,
7566 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7567
7568 // Try some other optimizations before falling back to generic lowering.
7569 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7570 return V;
7571
7572 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7573 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7574 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7575 const APInt &TrueVal = TrueV->getAsAPIntVal();
7576 const APInt &FalseVal = FalseV->getAsAPIntVal();
7577 const int TrueValCost = RISCVMatInt::getIntMatCost(
7578 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7579 const int FalseValCost = RISCVMatInt::getIntMatCost(
7580 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7581 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7582 SDValue LHSVal = DAG.getConstant(
7583 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7584 SDValue RHSVal =
7585 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7586 SDValue CMOV =
7588 DL, VT, LHSVal, CondV);
7589 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7590 }
7591
7592 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7593 // Unless we have the short forward branch optimization.
7594 if (!Subtarget.hasConditionalMoveFusion())
7595 return DAG.getNode(
7596 ISD::OR, DL, VT,
7597 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7598 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7599 }
7600
7601 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7602 return V;
7603
7604 if (Op.hasOneUse()) {
7605 unsigned UseOpc = Op->use_begin()->getOpcode();
7606 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7607 SDNode *BinOp = *Op->use_begin();
7608 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7609 DAG, Subtarget)) {
7610 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7611 return lowerSELECT(NewSel, DAG);
7612 }
7613 }
7614 }
7615
7616 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7617 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7618 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7619 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7620 if (FPTV && FPFV) {
7621 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7622 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7623 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7624 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7625 DAG.getConstant(1, DL, XLenVT));
7626 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7627 }
7628 }
7629
7630 // If the condition is not an integer SETCC which operates on XLenVT, we need
7631 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7632 // (select condv, truev, falsev)
7633 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7634 if (CondV.getOpcode() != ISD::SETCC ||
7635 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7636 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7637 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7638
7639 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7640
7641 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7642 }
7643
7644 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7645 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7646 // advantage of the integer compare+branch instructions. i.e.:
7647 // (select (setcc lhs, rhs, cc), truev, falsev)
7648 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7649 SDValue LHS = CondV.getOperand(0);
7650 SDValue RHS = CondV.getOperand(1);
7651 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7652
7653 // Special case for a select of 2 constants that have a diffence of 1.
7654 // Normally this is done by DAGCombine, but if the select is introduced by
7655 // type legalization or op legalization, we miss it. Restricting to SETLT
7656 // case for now because that is what signed saturating add/sub need.
7657 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7658 // but we would probably want to swap the true/false values if the condition
7659 // is SETGE/SETLE to avoid an XORI.
7660 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7661 CCVal == ISD::SETLT) {
7662 const APInt &TrueVal = TrueV->getAsAPIntVal();
7663 const APInt &FalseVal = FalseV->getAsAPIntVal();
7664 if (TrueVal - 1 == FalseVal)
7665 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7666 if (TrueVal + 1 == FalseVal)
7667 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7668 }
7669
7670 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7671 // 1 < x ? x : 1 -> 0 < x ? x : 1
7672 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7673 RHS == TrueV && LHS == FalseV) {
7674 LHS = DAG.getConstant(0, DL, VT);
7675 // 0 <u x is the same as x != 0.
7676 if (CCVal == ISD::SETULT) {
7677 std::swap(LHS, RHS);
7678 CCVal = ISD::SETNE;
7679 }
7680 }
7681
7682 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7683 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7684 RHS == FalseV) {
7685 RHS = DAG.getConstant(0, DL, VT);
7686 }
7687
7688 SDValue TargetCC = DAG.getCondCode(CCVal);
7689
7690 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7691 // (select (setcc lhs, rhs, CC), constant, falsev)
7692 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7693 std::swap(TrueV, FalseV);
7694 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7695 }
7696
7697 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7698 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7699}
7700
7701SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7702 SDValue CondV = Op.getOperand(1);
7703 SDLoc DL(Op);
7704 MVT XLenVT = Subtarget.getXLenVT();
7705
7706 if (CondV.getOpcode() == ISD::SETCC &&
7707 CondV.getOperand(0).getValueType() == XLenVT) {
7708 SDValue LHS = CondV.getOperand(0);
7709 SDValue RHS = CondV.getOperand(1);
7710 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7711
7712 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7713
7714 SDValue TargetCC = DAG.getCondCode(CCVal);
7715 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7716 LHS, RHS, TargetCC, Op.getOperand(2));
7717 }
7718
7719 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7720 CondV, DAG.getConstant(0, DL, XLenVT),
7721 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7722}
7723
7724SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7727
7728 SDLoc DL(Op);
7729 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7731
7732 // vastart just stores the address of the VarArgsFrameIndex slot into the
7733 // memory location argument.
7734 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7735 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7736 MachinePointerInfo(SV));
7737}
7738
7739SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7740 SelectionDAG &DAG) const {
7741 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7743 MachineFrameInfo &MFI = MF.getFrameInfo();
7744 MFI.setFrameAddressIsTaken(true);
7745 Register FrameReg = RI.getFrameRegister(MF);
7746 int XLenInBytes = Subtarget.getXLen() / 8;
7747
7748 EVT VT = Op.getValueType();
7749 SDLoc DL(Op);
7750 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7751 unsigned Depth = Op.getConstantOperandVal(0);
7752 while (Depth--) {
7753 int Offset = -(XLenInBytes * 2);
7754 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7756 FrameAddr =
7757 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7758 }
7759 return FrameAddr;
7760}
7761
7762SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7763 SelectionDAG &DAG) const {
7764 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7766 MachineFrameInfo &MFI = MF.getFrameInfo();
7767 MFI.setReturnAddressIsTaken(true);
7768 MVT XLenVT = Subtarget.getXLenVT();
7769 int XLenInBytes = Subtarget.getXLen() / 8;
7770
7772 return SDValue();
7773
7774 EVT VT = Op.getValueType();
7775 SDLoc DL(Op);
7776 unsigned Depth = Op.getConstantOperandVal(0);
7777 if (Depth) {
7778 int Off = -XLenInBytes;
7779 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7780 SDValue Offset = DAG.getConstant(Off, DL, VT);
7781 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7782 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7784 }
7785
7786 // Return the value of the return address register, marking it an implicit
7787 // live-in.
7788 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7789 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7790}
7791
7792SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7793 SelectionDAG &DAG) const {
7794 SDLoc DL(Op);
7795 SDValue Lo = Op.getOperand(0);
7796 SDValue Hi = Op.getOperand(1);
7797 SDValue Shamt = Op.getOperand(2);
7798 EVT VT = Lo.getValueType();
7799
7800 // if Shamt-XLEN < 0: // Shamt < XLEN
7801 // Lo = Lo << Shamt
7802 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7803 // else:
7804 // Lo = 0
7805 // Hi = Lo << (Shamt-XLEN)
7806
7807 SDValue Zero = DAG.getConstant(0, DL, VT);
7808 SDValue One = DAG.getConstant(1, DL, VT);
7809 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7810 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7811 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7812 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7813
7814 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7815 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7816 SDValue ShiftRightLo =
7817 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7818 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7819 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7820 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7821
7822 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7823
7824 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7825 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7826
7827 SDValue Parts[2] = {Lo, Hi};
7828 return DAG.getMergeValues(Parts, DL);
7829}
7830
7831SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7832 bool IsSRA) const {
7833 SDLoc DL(Op);
7834 SDValue Lo = Op.getOperand(0);
7835 SDValue Hi = Op.getOperand(1);
7836 SDValue Shamt = Op.getOperand(2);
7837 EVT VT = Lo.getValueType();
7838
7839 // SRA expansion:
7840 // if Shamt-XLEN < 0: // Shamt < XLEN
7841 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7842 // Hi = Hi >>s Shamt
7843 // else:
7844 // Lo = Hi >>s (Shamt-XLEN);
7845 // Hi = Hi >>s (XLEN-1)
7846 //
7847 // SRL expansion:
7848 // if Shamt-XLEN < 0: // Shamt < XLEN
7849 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7850 // Hi = Hi >>u Shamt
7851 // else:
7852 // Lo = Hi >>u (Shamt-XLEN);
7853 // Hi = 0;
7854
7855 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7856
7857 SDValue Zero = DAG.getConstant(0, DL, VT);
7858 SDValue One = DAG.getConstant(1, DL, VT);
7859 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7860 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7861 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7862 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7863
7864 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7865 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7866 SDValue ShiftLeftHi =
7867 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7868 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7869 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7870 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7871 SDValue HiFalse =
7872 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7873
7874 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7875
7876 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7877 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7878
7879 SDValue Parts[2] = {Lo, Hi};
7880 return DAG.getMergeValues(Parts, DL);
7881}
7882
7883// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7884// legal equivalently-sized i8 type, so we can use that as a go-between.
7885SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7886 SelectionDAG &DAG) const {
7887 SDLoc DL(Op);
7888 MVT VT = Op.getSimpleValueType();
7889 SDValue SplatVal = Op.getOperand(0);
7890 // All-zeros or all-ones splats are handled specially.
7891 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7892 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7893 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7894 }
7895 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7896 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7897 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7898 }
7899 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7900 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7901 DAG.getConstant(1, DL, SplatVal.getValueType()));
7902 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7903 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7904 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7905}
7906
7907// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7908// illegal (currently only vXi64 RV32).
7909// FIXME: We could also catch non-constant sign-extended i32 values and lower
7910// them to VMV_V_X_VL.
7911SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7912 SelectionDAG &DAG) const {
7913 SDLoc DL(Op);
7914 MVT VecVT = Op.getSimpleValueType();
7915 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7916 "Unexpected SPLAT_VECTOR_PARTS lowering");
7917
7918 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7919 SDValue Lo = Op.getOperand(0);
7920 SDValue Hi = Op.getOperand(1);
7921
7922 MVT ContainerVT = VecVT;
7923 if (VecVT.isFixedLengthVector())
7924 ContainerVT = getContainerForFixedLengthVector(VecVT);
7925
7926 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7927
7928 SDValue Res =
7929 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7930
7931 if (VecVT.isFixedLengthVector())
7932 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7933
7934 return Res;
7935}
7936
7937// Custom-lower extensions from mask vectors by using a vselect either with 1
7938// for zero/any-extension or -1 for sign-extension:
7939// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7940// Note that any-extension is lowered identically to zero-extension.
7941SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7942 int64_t ExtTrueVal) const {
7943 SDLoc DL(Op);
7944 MVT VecVT = Op.getSimpleValueType();
7945 SDValue Src = Op.getOperand(0);
7946 // Only custom-lower extensions from mask types
7947 assert(Src.getValueType().isVector() &&
7948 Src.getValueType().getVectorElementType() == MVT::i1);
7949
7950 if (VecVT.isScalableVector()) {
7951 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7952 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7953 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7954 }
7955
7956 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7957 MVT I1ContainerVT =
7958 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7959
7960 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7961
7962 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7963
7964 MVT XLenVT = Subtarget.getXLenVT();
7965 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7966 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7967
7968 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7969 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7970 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7971 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7972 SDValue Select =
7973 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7974 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7975
7976 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7977}
7978
7979SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7980 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7981 MVT ExtVT = Op.getSimpleValueType();
7982 // Only custom-lower extensions from fixed-length vector types.
7983 if (!ExtVT.isFixedLengthVector())
7984 return Op;
7985 MVT VT = Op.getOperand(0).getSimpleValueType();
7986 // Grab the canonical container type for the extended type. Infer the smaller
7987 // type from that to ensure the same number of vector elements, as we know
7988 // the LMUL will be sufficient to hold the smaller type.
7989 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7990 // Get the extended container type manually to ensure the same number of
7991 // vector elements between source and dest.
7992 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7993 ContainerExtVT.getVectorElementCount());
7994
7995 SDValue Op1 =
7996 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7997
7998 SDLoc DL(Op);
7999 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8000
8001 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8002
8003 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8004}
8005
8006// Custom-lower truncations from vectors to mask vectors by using a mask and a
8007// setcc operation:
8008// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8009SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8010 SelectionDAG &DAG) const {
8011 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8012 SDLoc DL(Op);
8013 EVT MaskVT = Op.getValueType();
8014 // Only expect to custom-lower truncations to mask types
8015 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8016 "Unexpected type for vector mask lowering");
8017 SDValue Src = Op.getOperand(0);
8018 MVT VecVT = Src.getSimpleValueType();
8019 SDValue Mask, VL;
8020 if (IsVPTrunc) {
8021 Mask = Op.getOperand(1);
8022 VL = Op.getOperand(2);
8023 }
8024 // If this is a fixed vector, we need to convert it to a scalable vector.
8025 MVT ContainerVT = VecVT;
8026
8027 if (VecVT.isFixedLengthVector()) {
8028 ContainerVT = getContainerForFixedLengthVector(VecVT);
8029 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8030 if (IsVPTrunc) {
8031 MVT MaskContainerVT =
8032 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8033 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8034 }
8035 }
8036
8037 if (!IsVPTrunc) {
8038 std::tie(Mask, VL) =
8039 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8040 }
8041
8042 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8043 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8044
8045 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8046 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8047 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8048 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8049
8050 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8051 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8052 DAG.getUNDEF(ContainerVT), Mask, VL);
8053 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8054 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8055 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8056 if (MaskVT.isFixedLengthVector())
8057 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8058 return Trunc;
8059}
8060
8061SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8062 SelectionDAG &DAG) const {
8063 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8064 SDLoc DL(Op);
8065
8066 MVT VT = Op.getSimpleValueType();
8067 // Only custom-lower vector truncates
8068 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8069
8070 // Truncates to mask types are handled differently
8071 if (VT.getVectorElementType() == MVT::i1)
8072 return lowerVectorMaskTruncLike(Op, DAG);
8073
8074 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8075 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8076 // truncate by one power of two at a time.
8077 MVT DstEltVT = VT.getVectorElementType();
8078
8079 SDValue Src = Op.getOperand(0);
8080 MVT SrcVT = Src.getSimpleValueType();
8081 MVT SrcEltVT = SrcVT.getVectorElementType();
8082
8083 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8084 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8085 "Unexpected vector truncate lowering");
8086
8087 MVT ContainerVT = SrcVT;
8088 SDValue Mask, VL;
8089 if (IsVPTrunc) {
8090 Mask = Op.getOperand(1);
8091 VL = Op.getOperand(2);
8092 }
8093 if (SrcVT.isFixedLengthVector()) {
8094 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8095 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8096 if (IsVPTrunc) {
8097 MVT MaskVT = getMaskTypeFor(ContainerVT);
8098 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8099 }
8100 }
8101
8102 SDValue Result = Src;
8103 if (!IsVPTrunc) {
8104 std::tie(Mask, VL) =
8105 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8106 }
8107
8108 LLVMContext &Context = *DAG.getContext();
8109 const ElementCount Count = ContainerVT.getVectorElementCount();
8110 do {
8111 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8112 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8113 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8114 Mask, VL);
8115 } while (SrcEltVT != DstEltVT);
8116
8117 if (SrcVT.isFixedLengthVector())
8118 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8119
8120 return Result;
8121}
8122
8123SDValue
8124RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8125 SelectionDAG &DAG) const {
8126 SDLoc DL(Op);
8127 SDValue Chain = Op.getOperand(0);
8128 SDValue Src = Op.getOperand(1);
8129 MVT VT = Op.getSimpleValueType();
8130 MVT SrcVT = Src.getSimpleValueType();
8131 MVT ContainerVT = VT;
8132 if (VT.isFixedLengthVector()) {
8133 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8134 ContainerVT =
8135 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8136 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8137 }
8138
8139 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8140
8141 // RVV can only widen/truncate fp to types double/half the size as the source.
8142 if ((VT.getVectorElementType() == MVT::f64 &&
8143 (SrcVT.getVectorElementType() == MVT::f16 ||
8144 SrcVT.getVectorElementType() == MVT::bf16)) ||
8145 ((VT.getVectorElementType() == MVT::f16 ||
8146 VT.getVectorElementType() == MVT::bf16) &&
8147 SrcVT.getVectorElementType() == MVT::f64)) {
8148 // For double rounding, the intermediate rounding should be round-to-odd.
8149 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8152 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8153 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8154 Chain, Src, Mask, VL);
8155 Chain = Src.getValue(1);
8156 }
8157
8158 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8161 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8162 Chain, Src, Mask, VL);
8163 if (VT.isFixedLengthVector()) {
8164 // StrictFP operations have two result values. Their lowered result should
8165 // have same result count.
8166 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8167 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8168 }
8169 return Res;
8170}
8171
8172SDValue
8173RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8174 SelectionDAG &DAG) const {
8175 bool IsVP =
8176 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8177 bool IsExtend =
8178 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8179 // RVV can only do truncate fp to types half the size as the source. We
8180 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8181 // conversion instruction.
8182 SDLoc DL(Op);
8183 MVT VT = Op.getSimpleValueType();
8184
8185 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8186
8187 SDValue Src = Op.getOperand(0);
8188 MVT SrcVT = Src.getSimpleValueType();
8189
8190 bool IsDirectExtend =
8191 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8192 (SrcVT.getVectorElementType() != MVT::f16 &&
8193 SrcVT.getVectorElementType() != MVT::bf16));
8194 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8195 VT.getVectorElementType() != MVT::bf16) ||
8196 SrcVT.getVectorElementType() != MVT::f64);
8197
8198 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8199
8200 // Prepare any fixed-length vector operands.
8201 MVT ContainerVT = VT;
8202 SDValue Mask, VL;
8203 if (IsVP) {
8204 Mask = Op.getOperand(1);
8205 VL = Op.getOperand(2);
8206 }
8207 if (VT.isFixedLengthVector()) {
8208 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8209 ContainerVT =
8210 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8211 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8212 if (IsVP) {
8213 MVT MaskVT = getMaskTypeFor(ContainerVT);
8214 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8215 }
8216 }
8217
8218 if (!IsVP)
8219 std::tie(Mask, VL) =
8220 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8221
8222 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8223
8224 if (IsDirectConv) {
8225 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8226 if (VT.isFixedLengthVector())
8227 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8228 return Src;
8229 }
8230
8231 unsigned InterConvOpc =
8233
8234 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8235 SDValue IntermediateConv =
8236 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8237 SDValue Result =
8238 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8239 if (VT.isFixedLengthVector())
8240 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8241 return Result;
8242}
8243
8244// Given a scalable vector type and an index into it, returns the type for the
8245// smallest subvector that the index fits in. This can be used to reduce LMUL
8246// for operations like vslidedown.
8247//
8248// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8249static std::optional<MVT>
8250getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8251 const RISCVSubtarget &Subtarget) {
8252 assert(VecVT.isScalableVector());
8253 const unsigned EltSize = VecVT.getScalarSizeInBits();
8254 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8255 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8256 MVT SmallerVT;
8257 if (MaxIdx < MinVLMAX)
8258 SmallerVT = getLMUL1VT(VecVT);
8259 else if (MaxIdx < MinVLMAX * 2)
8260 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8261 else if (MaxIdx < MinVLMAX * 4)
8262 SmallerVT = getLMUL1VT(VecVT)
8265 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8266 return std::nullopt;
8267 return SmallerVT;
8268}
8269
8270// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8271// first position of a vector, and that vector is slid up to the insert index.
8272// By limiting the active vector length to index+1 and merging with the
8273// original vector (with an undisturbed tail policy for elements >= VL), we
8274// achieve the desired result of leaving all elements untouched except the one
8275// at VL-1, which is replaced with the desired value.
8276SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8277 SelectionDAG &DAG) const {
8278 SDLoc DL(Op);
8279 MVT VecVT = Op.getSimpleValueType();
8280 SDValue Vec = Op.getOperand(0);
8281 SDValue Val = Op.getOperand(1);
8282 SDValue Idx = Op.getOperand(2);
8283
8284 if (VecVT.getVectorElementType() == MVT::i1) {
8285 // FIXME: For now we just promote to an i8 vector and insert into that,
8286 // but this is probably not optimal.
8287 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8288 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8289 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8290 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8291 }
8292
8293 MVT ContainerVT = VecVT;
8294 // If the operand is a fixed-length vector, convert to a scalable one.
8295 if (VecVT.isFixedLengthVector()) {
8296 ContainerVT = getContainerForFixedLengthVector(VecVT);
8297 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8298 }
8299
8300 // If we know the index we're going to insert at, we can shrink Vec so that
8301 // we're performing the scalar inserts and slideup on a smaller LMUL.
8302 MVT OrigContainerVT = ContainerVT;
8303 SDValue OrigVec = Vec;
8304 SDValue AlignedIdx;
8305 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8306 const unsigned OrigIdx = IdxC->getZExtValue();
8307 // Do we know an upper bound on LMUL?
8308 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8309 DL, DAG, Subtarget)) {
8310 ContainerVT = *ShrunkVT;
8311 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8312 }
8313
8314 // If we're compiling for an exact VLEN value, we can always perform
8315 // the insert in m1 as we can determine the register corresponding to
8316 // the index in the register group.
8317 const MVT M1VT = getLMUL1VT(ContainerVT);
8318 if (auto VLEN = Subtarget.getRealVLen();
8319 VLEN && ContainerVT.bitsGT(M1VT)) {
8320 EVT ElemVT = VecVT.getVectorElementType();
8321 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8322 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8323 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8324 unsigned ExtractIdx =
8325 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8326 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8327 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8328 ContainerVT = M1VT;
8329 }
8330
8331 if (AlignedIdx)
8332 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8333 AlignedIdx);
8334 }
8335
8336 MVT XLenVT = Subtarget.getXLenVT();
8337
8338 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8339 // Even i64-element vectors on RV32 can be lowered without scalar
8340 // legalization if the most-significant 32 bits of the value are not affected
8341 // by the sign-extension of the lower 32 bits.
8342 // TODO: We could also catch sign extensions of a 32-bit value.
8343 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8344 const auto *CVal = cast<ConstantSDNode>(Val);
8345 if (isInt<32>(CVal->getSExtValue())) {
8346 IsLegalInsert = true;
8347 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8348 }
8349 }
8350
8351 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8352
8353 SDValue ValInVec;
8354
8355 if (IsLegalInsert) {
8356 unsigned Opc =
8358 if (isNullConstant(Idx)) {
8359 if (!VecVT.isFloatingPoint())
8360 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8361 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8362
8363 if (AlignedIdx)
8364 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8365 Vec, AlignedIdx);
8366 if (!VecVT.isFixedLengthVector())
8367 return Vec;
8368 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8369 }
8370 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8371 } else {
8372 // On RV32, i64-element vectors must be specially handled to place the
8373 // value at element 0, by using two vslide1down instructions in sequence on
8374 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8375 // this.
8376 SDValue ValLo, ValHi;
8377 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8378 MVT I32ContainerVT =
8379 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8380 SDValue I32Mask =
8381 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8382 // Limit the active VL to two.
8383 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8384 // If the Idx is 0 we can insert directly into the vector.
8385 if (isNullConstant(Idx)) {
8386 // First slide in the lo value, then the hi in above it. We use slide1down
8387 // to avoid the register group overlap constraint of vslide1up.
8388 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8389 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8390 // If the source vector is undef don't pass along the tail elements from
8391 // the previous slide1down.
8392 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8393 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8394 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8395 // Bitcast back to the right container type.
8396 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8397
8398 if (AlignedIdx)
8399 ValInVec =
8400 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8401 ValInVec, AlignedIdx);
8402 if (!VecVT.isFixedLengthVector())
8403 return ValInVec;
8404 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8405 }
8406
8407 // First slide in the lo value, then the hi in above it. We use slide1down
8408 // to avoid the register group overlap constraint of vslide1up.
8409 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8410 DAG.getUNDEF(I32ContainerVT),
8411 DAG.getUNDEF(I32ContainerVT), ValLo,
8412 I32Mask, InsertI64VL);
8413 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8414 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8415 I32Mask, InsertI64VL);
8416 // Bitcast back to the right container type.
8417 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8418 }
8419
8420 // Now that the value is in a vector, slide it into position.
8421 SDValue InsertVL =
8422 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8423
8424 // Use tail agnostic policy if Idx is the last index of Vec.
8426 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8427 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8428 Policy = RISCVII::TAIL_AGNOSTIC;
8429 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8430 Idx, Mask, InsertVL, Policy);
8431
8432 if (AlignedIdx)
8433 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8434 Slideup, AlignedIdx);
8435 if (!VecVT.isFixedLengthVector())
8436 return Slideup;
8437 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8438}
8439
8440// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8441// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8442// types this is done using VMV_X_S to allow us to glean information about the
8443// sign bits of the result.
8444SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8445 SelectionDAG &DAG) const {
8446 SDLoc DL(Op);
8447 SDValue Idx = Op.getOperand(1);
8448 SDValue Vec = Op.getOperand(0);
8449 EVT EltVT = Op.getValueType();
8450 MVT VecVT = Vec.getSimpleValueType();
8451 MVT XLenVT = Subtarget.getXLenVT();
8452
8453 if (VecVT.getVectorElementType() == MVT::i1) {
8454 // Use vfirst.m to extract the first bit.
8455 if (isNullConstant(Idx)) {
8456 MVT ContainerVT = VecVT;
8457 if (VecVT.isFixedLengthVector()) {
8458 ContainerVT = getContainerForFixedLengthVector(VecVT);
8459 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8460 }
8461 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8462 SDValue Vfirst =
8463 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8464 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8465 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8466 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8467 }
8468 if (VecVT.isFixedLengthVector()) {
8469 unsigned NumElts = VecVT.getVectorNumElements();
8470 if (NumElts >= 8) {
8471 MVT WideEltVT;
8472 unsigned WidenVecLen;
8473 SDValue ExtractElementIdx;
8474 SDValue ExtractBitIdx;
8475 unsigned MaxEEW = Subtarget.getELen();
8476 MVT LargestEltVT = MVT::getIntegerVT(
8477 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8478 if (NumElts <= LargestEltVT.getSizeInBits()) {
8479 assert(isPowerOf2_32(NumElts) &&
8480 "the number of elements should be power of 2");
8481 WideEltVT = MVT::getIntegerVT(NumElts);
8482 WidenVecLen = 1;
8483 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8484 ExtractBitIdx = Idx;
8485 } else {
8486 WideEltVT = LargestEltVT;
8487 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8488 // extract element index = index / element width
8489 ExtractElementIdx = DAG.getNode(
8490 ISD::SRL, DL, XLenVT, Idx,
8491 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8492 // mask bit index = index % element width
8493 ExtractBitIdx = DAG.getNode(
8494 ISD::AND, DL, XLenVT, Idx,
8495 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8496 }
8497 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8498 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8499 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8500 Vec, ExtractElementIdx);
8501 // Extract the bit from GPR.
8502 SDValue ShiftRight =
8503 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8504 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8505 DAG.getConstant(1, DL, XLenVT));
8506 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8507 }
8508 }
8509 // Otherwise, promote to an i8 vector and extract from that.
8510 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8511 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8512 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8513 }
8514
8515 // If this is a fixed vector, we need to convert it to a scalable vector.
8516 MVT ContainerVT = VecVT;
8517 if (VecVT.isFixedLengthVector()) {
8518 ContainerVT = getContainerForFixedLengthVector(VecVT);
8519 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8520 }
8521
8522 // If we're compiling for an exact VLEN value and we have a known
8523 // constant index, we can always perform the extract in m1 (or
8524 // smaller) as we can determine the register corresponding to
8525 // the index in the register group.
8526 const auto VLen = Subtarget.getRealVLen();
8527 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8528 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8529 MVT M1VT = getLMUL1VT(ContainerVT);
8530 unsigned OrigIdx = IdxC->getZExtValue();
8531 EVT ElemVT = VecVT.getVectorElementType();
8532 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8533 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8534 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8535 unsigned ExtractIdx =
8536 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8537 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8538 DAG.getVectorIdxConstant(ExtractIdx, DL));
8539 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8540 ContainerVT = M1VT;
8541 }
8542
8543 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8544 // contains our index.
8545 std::optional<uint64_t> MaxIdx;
8546 if (VecVT.isFixedLengthVector())
8547 MaxIdx = VecVT.getVectorNumElements() - 1;
8548 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8549 MaxIdx = IdxC->getZExtValue();
8550 if (MaxIdx) {
8551 if (auto SmallerVT =
8552 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8553 ContainerVT = *SmallerVT;
8554 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8555 DAG.getConstant(0, DL, XLenVT));
8556 }
8557 }
8558
8559 // If after narrowing, the required slide is still greater than LMUL2,
8560 // fallback to generic expansion and go through the stack. This is done
8561 // for a subtle reason: extracting *all* elements out of a vector is
8562 // widely expected to be linear in vector size, but because vslidedown
8563 // is linear in LMUL, performing N extracts using vslidedown becomes
8564 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8565 // seems to have the same problem (the store is linear in LMUL), but the
8566 // generic expansion *memoizes* the store, and thus for many extracts of
8567 // the same vector we end up with one store and a bunch of loads.
8568 // TODO: We don't have the same code for insert_vector_elt because we
8569 // have BUILD_VECTOR and handle the degenerate case there. Should we
8570 // consider adding an inverse BUILD_VECTOR node?
8571 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8572 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8573 return SDValue();
8574
8575 // If the index is 0, the vector is already in the right position.
8576 if (!isNullConstant(Idx)) {
8577 // Use a VL of 1 to avoid processing more elements than we need.
8578 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8579 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8580 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8581 }
8582
8583 if (!EltVT.isInteger()) {
8584 // Floating-point extracts are handled in TableGen.
8585 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8586 DAG.getVectorIdxConstant(0, DL));
8587 }
8588
8589 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8590 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8591}
8592
8593// Some RVV intrinsics may claim that they want an integer operand to be
8594// promoted or expanded.
8596 const RISCVSubtarget &Subtarget) {
8597 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8598 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8599 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8600 "Unexpected opcode");
8601
8602 if (!Subtarget.hasVInstructions())
8603 return SDValue();
8604
8605 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8606 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8607 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8608
8609 SDLoc DL(Op);
8610
8612 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8613 if (!II || !II->hasScalarOperand())
8614 return SDValue();
8615
8616 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8617 assert(SplatOp < Op.getNumOperands());
8618
8619 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8620 SDValue &ScalarOp = Operands[SplatOp];
8621 MVT OpVT = ScalarOp.getSimpleValueType();
8622 MVT XLenVT = Subtarget.getXLenVT();
8623
8624 // If this isn't a scalar, or its type is XLenVT we're done.
8625 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8626 return SDValue();
8627
8628 // Simplest case is that the operand needs to be promoted to XLenVT.
8629 if (OpVT.bitsLT(XLenVT)) {
8630 // If the operand is a constant, sign extend to increase our chances
8631 // of being able to use a .vi instruction. ANY_EXTEND would become a
8632 // a zero extend and the simm5 check in isel would fail.
8633 // FIXME: Should we ignore the upper bits in isel instead?
8634 unsigned ExtOpc =
8635 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8636 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8637 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8638 }
8639
8640 // Use the previous operand to get the vXi64 VT. The result might be a mask
8641 // VT for compares. Using the previous operand assumes that the previous
8642 // operand will never have a smaller element size than a scalar operand and
8643 // that a widening operation never uses SEW=64.
8644 // NOTE: If this fails the below assert, we can probably just find the
8645 // element count from any operand or result and use it to construct the VT.
8646 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8647 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8648
8649 // The more complex case is when the scalar is larger than XLenVT.
8650 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8651 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8652
8653 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8654 // instruction to sign-extend since SEW>XLEN.
8655 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8656 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8657 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8658 }
8659
8660 switch (IntNo) {
8661 case Intrinsic::riscv_vslide1up:
8662 case Intrinsic::riscv_vslide1down:
8663 case Intrinsic::riscv_vslide1up_mask:
8664 case Intrinsic::riscv_vslide1down_mask: {
8665 // We need to special case these when the scalar is larger than XLen.
8666 unsigned NumOps = Op.getNumOperands();
8667 bool IsMasked = NumOps == 7;
8668
8669 // Convert the vector source to the equivalent nxvXi32 vector.
8670 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8671 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8672 SDValue ScalarLo, ScalarHi;
8673 std::tie(ScalarLo, ScalarHi) =
8674 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8675
8676 // Double the VL since we halved SEW.
8677 SDValue AVL = getVLOperand(Op);
8678 SDValue I32VL;
8679
8680 // Optimize for constant AVL
8681 if (isa<ConstantSDNode>(AVL)) {
8682 const auto [MinVLMAX, MaxVLMAX] =
8684
8685 uint64_t AVLInt = AVL->getAsZExtVal();
8686 if (AVLInt <= MinVLMAX) {
8687 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8688 } else if (AVLInt >= 2 * MaxVLMAX) {
8689 // Just set vl to VLMAX in this situation
8691 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8692 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8693 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8694 SDValue SETVLMAX = DAG.getTargetConstant(
8695 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8696 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8697 LMUL);
8698 } else {
8699 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8700 // is related to the hardware implementation.
8701 // So let the following code handle
8702 }
8703 }
8704 if (!I32VL) {
8706 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8707 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8708 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8709 SDValue SETVL =
8710 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8711 // Using vsetvli instruction to get actually used length which related to
8712 // the hardware implementation
8713 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8714 SEW, LMUL);
8715 I32VL =
8716 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8717 }
8718
8719 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8720
8721 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8722 // instructions.
8723 SDValue Passthru;
8724 if (IsMasked)
8725 Passthru = DAG.getUNDEF(I32VT);
8726 else
8727 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8728
8729 if (IntNo == Intrinsic::riscv_vslide1up ||
8730 IntNo == Intrinsic::riscv_vslide1up_mask) {
8731 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8732 ScalarHi, I32Mask, I32VL);
8733 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8734 ScalarLo, I32Mask, I32VL);
8735 } else {
8736 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8737 ScalarLo, I32Mask, I32VL);
8738 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8739 ScalarHi, I32Mask, I32VL);
8740 }
8741
8742 // Convert back to nxvXi64.
8743 Vec = DAG.getBitcast(VT, Vec);
8744
8745 if (!IsMasked)
8746 return Vec;
8747 // Apply mask after the operation.
8748 SDValue Mask = Operands[NumOps - 3];
8749 SDValue MaskedOff = Operands[1];
8750 // Assume Policy operand is the last operand.
8751 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8752 // We don't need to select maskedoff if it's undef.
8753 if (MaskedOff.isUndef())
8754 return Vec;
8755 // TAMU
8756 if (Policy == RISCVII::TAIL_AGNOSTIC)
8757 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8758 DAG.getUNDEF(VT), AVL);
8759 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8760 // It's fine because vmerge does not care mask policy.
8761 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8762 MaskedOff, AVL);
8763 }
8764 }
8765
8766 // We need to convert the scalar to a splat vector.
8767 SDValue VL = getVLOperand(Op);
8768 assert(VL.getValueType() == XLenVT);
8769 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8770 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8771}
8772
8773// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8774// scalable vector llvm.get.vector.length for now.
8775//
8776// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8777// (vscale * VF). The vscale and VF are independent of element width. We use
8778// SEW=8 for the vsetvli because it is the only element width that supports all
8779// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8780// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8781// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8782// SEW and LMUL are better for the surrounding vector instructions.
8784 const RISCVSubtarget &Subtarget) {
8785 MVT XLenVT = Subtarget.getXLenVT();
8786
8787 // The smallest LMUL is only valid for the smallest element width.
8788 const unsigned ElementWidth = 8;
8789
8790 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8791 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8792 // We don't support VF==1 with ELEN==32.
8793 [[maybe_unused]] unsigned MinVF =
8794 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8795
8796 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8797 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8798 "Unexpected VF");
8799
8800 bool Fractional = VF < LMul1VF;
8801 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8802 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8803 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8804
8805 SDLoc DL(N);
8806
8807 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8808 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8809
8810 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8811
8812 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8813 SDValue Res =
8814 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8815 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8816}
8817
8819 const RISCVSubtarget &Subtarget) {
8820 SDValue Op0 = N->getOperand(1);
8821 MVT OpVT = Op0.getSimpleValueType();
8822 MVT ContainerVT = OpVT;
8823 if (OpVT.isFixedLengthVector()) {
8824 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8825 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8826 }
8827 MVT XLenVT = Subtarget.getXLenVT();
8828 SDLoc DL(N);
8829 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8830 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8831 if (isOneConstant(N->getOperand(2)))
8832 return Res;
8833
8834 // Convert -1 to VL.
8835 SDValue Setcc =
8836 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8837 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8838 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8839}
8840
8841static inline void promoteVCIXScalar(const SDValue &Op,
8843 SelectionDAG &DAG) {
8844 const RISCVSubtarget &Subtarget =
8846
8847 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8848 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8849 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8850 SDLoc DL(Op);
8851
8853 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8854 if (!II || !II->hasScalarOperand())
8855 return;
8856
8857 unsigned SplatOp = II->ScalarOperand + 1;
8858 assert(SplatOp < Op.getNumOperands());
8859
8860 SDValue &ScalarOp = Operands[SplatOp];
8861 MVT OpVT = ScalarOp.getSimpleValueType();
8862 MVT XLenVT = Subtarget.getXLenVT();
8863
8864 // The code below is partially copied from lowerVectorIntrinsicScalars.
8865 // If this isn't a scalar, or its type is XLenVT we're done.
8866 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8867 return;
8868
8869 // Manually emit promote operation for scalar operation.
8870 if (OpVT.bitsLT(XLenVT)) {
8871 unsigned ExtOpc =
8872 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8873 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8874 }
8875
8876 return;
8877}
8878
8879static void processVCIXOperands(SDValue &OrigOp,
8881 SelectionDAG &DAG) {
8882 promoteVCIXScalar(OrigOp, Operands, DAG);
8883 const RISCVSubtarget &Subtarget =
8885 for (SDValue &V : Operands) {
8886 EVT ValType = V.getValueType();
8887 if (ValType.isVector() && ValType.isFloatingPoint()) {
8888 MVT InterimIVT =
8889 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8890 ValType.getVectorElementCount());
8891 V = DAG.getBitcast(InterimIVT, V);
8892 }
8893 if (ValType.isFixedLengthVector()) {
8894 MVT OpContainerVT = getContainerForFixedLengthVector(
8895 DAG, V.getSimpleValueType(), Subtarget);
8896 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8897 }
8898 }
8899}
8900
8901// LMUL * VLEN should be greater than or equal to EGS * SEW
8902static inline bool isValidEGW(int EGS, EVT VT,
8903 const RISCVSubtarget &Subtarget) {
8904 return (Subtarget.getRealMinVLen() *
8906 EGS * VT.getScalarSizeInBits();
8907}
8908
8909SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8910 SelectionDAG &DAG) const {
8911 unsigned IntNo = Op.getConstantOperandVal(0);
8912 SDLoc DL(Op);
8913 MVT XLenVT = Subtarget.getXLenVT();
8914
8915 switch (IntNo) {
8916 default:
8917 break; // Don't custom lower most intrinsics.
8918 case Intrinsic::thread_pointer: {
8919 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8920 return DAG.getRegister(RISCV::X4, PtrVT);
8921 }
8922 case Intrinsic::riscv_orc_b:
8923 case Intrinsic::riscv_brev8:
8924 case Intrinsic::riscv_sha256sig0:
8925 case Intrinsic::riscv_sha256sig1:
8926 case Intrinsic::riscv_sha256sum0:
8927 case Intrinsic::riscv_sha256sum1:
8928 case Intrinsic::riscv_sm3p0:
8929 case Intrinsic::riscv_sm3p1: {
8930 unsigned Opc;
8931 switch (IntNo) {
8932 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8933 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8934 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8935 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8936 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8937 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8938 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8939 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8940 }
8941
8942 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8943 SDValue NewOp =
8944 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8945 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8946 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8947 }
8948
8949 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8950 }
8951 case Intrinsic::riscv_sm4ks:
8952 case Intrinsic::riscv_sm4ed: {
8953 unsigned Opc =
8954 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8955
8956 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8957 SDValue NewOp0 =
8958 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8959 SDValue NewOp1 =
8960 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8961 SDValue Res =
8962 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8963 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8964 }
8965
8966 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8967 Op.getOperand(3));
8968 }
8969 case Intrinsic::riscv_zip:
8970 case Intrinsic::riscv_unzip: {
8971 unsigned Opc =
8972 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8973 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8974 }
8975 case Intrinsic::riscv_mopr: {
8976 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8977 SDValue NewOp =
8978 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8979 SDValue Res = DAG.getNode(
8980 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8981 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8982 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8983 }
8984 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8985 Op.getOperand(2));
8986 }
8987
8988 case Intrinsic::riscv_moprr: {
8989 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8990 SDValue NewOp0 =
8991 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8992 SDValue NewOp1 =
8993 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8994 SDValue Res = DAG.getNode(
8995 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8996 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8997 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8998 }
8999 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9000 Op.getOperand(2), Op.getOperand(3));
9001 }
9002 case Intrinsic::riscv_clmul:
9003 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9004 SDValue NewOp0 =
9005 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9006 SDValue NewOp1 =
9007 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9008 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
9009 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9010 }
9011 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9012 Op.getOperand(2));
9013 case Intrinsic::riscv_clmulh:
9014 case Intrinsic::riscv_clmulr: {
9015 unsigned Opc =
9016 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9017 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9018 SDValue NewOp0 =
9019 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9020 SDValue NewOp1 =
9021 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9022 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
9023 DAG.getConstant(32, DL, MVT::i64));
9024 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
9025 DAG.getConstant(32, DL, MVT::i64));
9026 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
9027 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
9028 DAG.getConstant(32, DL, MVT::i64));
9029 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9030 }
9031
9032 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9033 }
9034 case Intrinsic::experimental_get_vector_length:
9035 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9036 case Intrinsic::experimental_cttz_elts:
9037 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9038 case Intrinsic::riscv_vmv_x_s: {
9039 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9040 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9041 }
9042 case Intrinsic::riscv_vfmv_f_s:
9043 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9044 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9045 case Intrinsic::riscv_vmv_v_x:
9046 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9047 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9048 Subtarget);
9049 case Intrinsic::riscv_vfmv_v_f:
9050 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9051 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9052 case Intrinsic::riscv_vmv_s_x: {
9053 SDValue Scalar = Op.getOperand(2);
9054
9055 if (Scalar.getValueType().bitsLE(XLenVT)) {
9056 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9057 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9058 Op.getOperand(1), Scalar, Op.getOperand(3));
9059 }
9060
9061 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9062
9063 // This is an i64 value that lives in two scalar registers. We have to
9064 // insert this in a convoluted way. First we build vXi64 splat containing
9065 // the two values that we assemble using some bit math. Next we'll use
9066 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9067 // to merge element 0 from our splat into the source vector.
9068 // FIXME: This is probably not the best way to do this, but it is
9069 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9070 // point.
9071 // sw lo, (a0)
9072 // sw hi, 4(a0)
9073 // vlse vX, (a0)
9074 //
9075 // vid.v vVid
9076 // vmseq.vx mMask, vVid, 0
9077 // vmerge.vvm vDest, vSrc, vVal, mMask
9078 MVT VT = Op.getSimpleValueType();
9079 SDValue Vec = Op.getOperand(1);
9080 SDValue VL = getVLOperand(Op);
9081
9082 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9083 if (Op.getOperand(1).isUndef())
9084 return SplattedVal;
9085 SDValue SplattedIdx =
9086 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9087 DAG.getConstant(0, DL, MVT::i32), VL);
9088
9089 MVT MaskVT = getMaskTypeFor(VT);
9090 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9091 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9092 SDValue SelectCond =
9093 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9094 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9095 DAG.getUNDEF(MaskVT), Mask, VL});
9096 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9097 Vec, DAG.getUNDEF(VT), VL);
9098 }
9099 case Intrinsic::riscv_vfmv_s_f:
9100 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9101 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9102 // EGS * EEW >= 128 bits
9103 case Intrinsic::riscv_vaesdf_vv:
9104 case Intrinsic::riscv_vaesdf_vs:
9105 case Intrinsic::riscv_vaesdm_vv:
9106 case Intrinsic::riscv_vaesdm_vs:
9107 case Intrinsic::riscv_vaesef_vv:
9108 case Intrinsic::riscv_vaesef_vs:
9109 case Intrinsic::riscv_vaesem_vv:
9110 case Intrinsic::riscv_vaesem_vs:
9111 case Intrinsic::riscv_vaeskf1:
9112 case Intrinsic::riscv_vaeskf2:
9113 case Intrinsic::riscv_vaesz_vs:
9114 case Intrinsic::riscv_vsm4k:
9115 case Intrinsic::riscv_vsm4r_vv:
9116 case Intrinsic::riscv_vsm4r_vs: {
9117 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9118 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9119 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9120 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9121 return Op;
9122 }
9123 // EGS * EEW >= 256 bits
9124 case Intrinsic::riscv_vsm3c:
9125 case Intrinsic::riscv_vsm3me: {
9126 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9127 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9128 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9129 return Op;
9130 }
9131 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9132 case Intrinsic::riscv_vsha2ch:
9133 case Intrinsic::riscv_vsha2cl:
9134 case Intrinsic::riscv_vsha2ms: {
9135 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9136 !Subtarget.hasStdExtZvknhb())
9137 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9138 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9139 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9140 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9141 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9142 return Op;
9143 }
9144 case Intrinsic::riscv_sf_vc_v_x:
9145 case Intrinsic::riscv_sf_vc_v_i:
9146 case Intrinsic::riscv_sf_vc_v_xv:
9147 case Intrinsic::riscv_sf_vc_v_iv:
9148 case Intrinsic::riscv_sf_vc_v_vv:
9149 case Intrinsic::riscv_sf_vc_v_fv:
9150 case Intrinsic::riscv_sf_vc_v_xvv:
9151 case Intrinsic::riscv_sf_vc_v_ivv:
9152 case Intrinsic::riscv_sf_vc_v_vvv:
9153 case Intrinsic::riscv_sf_vc_v_fvv:
9154 case Intrinsic::riscv_sf_vc_v_xvw:
9155 case Intrinsic::riscv_sf_vc_v_ivw:
9156 case Intrinsic::riscv_sf_vc_v_vvw:
9157 case Intrinsic::riscv_sf_vc_v_fvw: {
9158 MVT VT = Op.getSimpleValueType();
9159
9160 SmallVector<SDValue> Operands{Op->op_values()};
9162
9163 MVT RetVT = VT;
9164 if (VT.isFixedLengthVector())
9166 else if (VT.isFloatingPoint())
9169
9170 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9171
9172 if (VT.isFixedLengthVector())
9173 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9174 else if (VT.isFloatingPoint())
9175 NewNode = DAG.getBitcast(VT, NewNode);
9176
9177 if (Op == NewNode)
9178 break;
9179
9180 return NewNode;
9181 }
9182 }
9183
9184 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9185}
9186
9188 unsigned Type) {
9189 SDLoc DL(Op);
9190 SmallVector<SDValue> Operands{Op->op_values()};
9191 Operands.erase(Operands.begin() + 1);
9192
9193 const RISCVSubtarget &Subtarget =
9195 MVT VT = Op.getSimpleValueType();
9196 MVT RetVT = VT;
9197 MVT FloatVT = VT;
9198
9199 if (VT.isFloatingPoint()) {
9200 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9202 FloatVT = RetVT;
9203 }
9204 if (VT.isFixedLengthVector())
9206 Subtarget);
9207
9209
9210 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9211 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9212 SDValue Chain = NewNode.getValue(1);
9213
9214 if (VT.isFixedLengthVector())
9215 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9216 if (VT.isFloatingPoint())
9217 NewNode = DAG.getBitcast(VT, NewNode);
9218
9219 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9220
9221 return NewNode;
9222}
9223
9225 unsigned Type) {
9226 SmallVector<SDValue> Operands{Op->op_values()};
9227 Operands.erase(Operands.begin() + 1);
9229
9230 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9231}
9232
9233SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9234 SelectionDAG &DAG) const {
9235 unsigned IntNo = Op.getConstantOperandVal(1);
9236 switch (IntNo) {
9237 default:
9238 break;
9239 case Intrinsic::riscv_masked_strided_load: {
9240 SDLoc DL(Op);
9241 MVT XLenVT = Subtarget.getXLenVT();
9242
9243 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9244 // the selection of the masked intrinsics doesn't do this for us.
9245 SDValue Mask = Op.getOperand(5);
9246 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9247
9248 MVT VT = Op->getSimpleValueType(0);
9249 MVT ContainerVT = VT;
9250 if (VT.isFixedLengthVector())
9251 ContainerVT = getContainerForFixedLengthVector(VT);
9252
9253 SDValue PassThru = Op.getOperand(2);
9254 if (!IsUnmasked) {
9255 MVT MaskVT = getMaskTypeFor(ContainerVT);
9256 if (VT.isFixedLengthVector()) {
9257 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9258 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9259 }
9260 }
9261
9262 auto *Load = cast<MemIntrinsicSDNode>(Op);
9263 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9264 SDValue Ptr = Op.getOperand(3);
9265 SDValue Stride = Op.getOperand(4);
9266 SDValue Result, Chain;
9267
9268 // TODO: We restrict this to unmasked loads currently in consideration of
9269 // the complexity of handling all falses masks.
9270 MVT ScalarVT = ContainerVT.getVectorElementType();
9271 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9272 SDValue ScalarLoad =
9273 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9274 ScalarVT, Load->getMemOperand());
9275 Chain = ScalarLoad.getValue(1);
9276 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9277 Subtarget);
9278 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9279 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9280 Load->getMemOperand());
9281 Chain = ScalarLoad.getValue(1);
9282 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9283 } else {
9284 SDValue IntID = DAG.getTargetConstant(
9285 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9286 XLenVT);
9287
9288 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9289 if (IsUnmasked)
9290 Ops.push_back(DAG.getUNDEF(ContainerVT));
9291 else
9292 Ops.push_back(PassThru);
9293 Ops.push_back(Ptr);
9294 Ops.push_back(Stride);
9295 if (!IsUnmasked)
9296 Ops.push_back(Mask);
9297 Ops.push_back(VL);
9298 if (!IsUnmasked) {
9299 SDValue Policy =
9301 Ops.push_back(Policy);
9302 }
9303
9304 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9305 Result =
9307 Load->getMemoryVT(), Load->getMemOperand());
9308 Chain = Result.getValue(1);
9309 }
9310 if (VT.isFixedLengthVector())
9311 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9312 return DAG.getMergeValues({Result, Chain}, DL);
9313 }
9314 case Intrinsic::riscv_seg2_load:
9315 case Intrinsic::riscv_seg3_load:
9316 case Intrinsic::riscv_seg4_load:
9317 case Intrinsic::riscv_seg5_load:
9318 case Intrinsic::riscv_seg6_load:
9319 case Intrinsic::riscv_seg7_load:
9320 case Intrinsic::riscv_seg8_load: {
9321 SDLoc DL(Op);
9322 static const Intrinsic::ID VlsegInts[7] = {
9323 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9324 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9325 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9326 Intrinsic::riscv_vlseg8};
9327 unsigned NF = Op->getNumValues() - 1;
9328 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9329 MVT XLenVT = Subtarget.getXLenVT();
9330 MVT VT = Op->getSimpleValueType(0);
9331 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9332
9333 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9334 Subtarget);
9335 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9336 auto *Load = cast<MemIntrinsicSDNode>(Op);
9337 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9338 ContainerVTs.push_back(MVT::Other);
9339 SDVTList VTs = DAG.getVTList(ContainerVTs);
9340 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9341 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9342 Ops.push_back(Op.getOperand(2));
9343 Ops.push_back(VL);
9344 SDValue Result =
9346 Load->getMemoryVT(), Load->getMemOperand());
9348 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9349 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9350 DAG, Subtarget));
9351 Results.push_back(Result.getValue(NF));
9352 return DAG.getMergeValues(Results, DL);
9353 }
9354 case Intrinsic::riscv_sf_vc_v_x_se:
9356 case Intrinsic::riscv_sf_vc_v_i_se:
9358 case Intrinsic::riscv_sf_vc_v_xv_se:
9360 case Intrinsic::riscv_sf_vc_v_iv_se:
9362 case Intrinsic::riscv_sf_vc_v_vv_se:
9364 case Intrinsic::riscv_sf_vc_v_fv_se:
9366 case Intrinsic::riscv_sf_vc_v_xvv_se:
9368 case Intrinsic::riscv_sf_vc_v_ivv_se:
9370 case Intrinsic::riscv_sf_vc_v_vvv_se:
9372 case Intrinsic::riscv_sf_vc_v_fvv_se:
9374 case Intrinsic::riscv_sf_vc_v_xvw_se:
9376 case Intrinsic::riscv_sf_vc_v_ivw_se:
9378 case Intrinsic::riscv_sf_vc_v_vvw_se:
9380 case Intrinsic::riscv_sf_vc_v_fvw_se:
9382 }
9383
9384 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9385}
9386
9387SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9388 SelectionDAG &DAG) const {
9389 unsigned IntNo = Op.getConstantOperandVal(1);
9390 switch (IntNo) {
9391 default:
9392 break;
9393 case Intrinsic::riscv_masked_strided_store: {
9394 SDLoc DL(Op);
9395 MVT XLenVT = Subtarget.getXLenVT();
9396
9397 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9398 // the selection of the masked intrinsics doesn't do this for us.
9399 SDValue Mask = Op.getOperand(5);
9400 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9401
9402 SDValue Val = Op.getOperand(2);
9403 MVT VT = Val.getSimpleValueType();
9404 MVT ContainerVT = VT;
9405 if (VT.isFixedLengthVector()) {
9406 ContainerVT = getContainerForFixedLengthVector(VT);
9407 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9408 }
9409 if (!IsUnmasked) {
9410 MVT MaskVT = getMaskTypeFor(ContainerVT);
9411 if (VT.isFixedLengthVector())
9412 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9413 }
9414
9415 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9416
9417 SDValue IntID = DAG.getTargetConstant(
9418 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9419 XLenVT);
9420
9421 auto *Store = cast<MemIntrinsicSDNode>(Op);
9422 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9423 Ops.push_back(Val);
9424 Ops.push_back(Op.getOperand(3)); // Ptr
9425 Ops.push_back(Op.getOperand(4)); // Stride
9426 if (!IsUnmasked)
9427 Ops.push_back(Mask);
9428 Ops.push_back(VL);
9429
9430 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9431 Ops, Store->getMemoryVT(),
9432 Store->getMemOperand());
9433 }
9434 case Intrinsic::riscv_seg2_store:
9435 case Intrinsic::riscv_seg3_store:
9436 case Intrinsic::riscv_seg4_store:
9437 case Intrinsic::riscv_seg5_store:
9438 case Intrinsic::riscv_seg6_store:
9439 case Intrinsic::riscv_seg7_store:
9440 case Intrinsic::riscv_seg8_store: {
9441 SDLoc DL(Op);
9442 static const Intrinsic::ID VssegInts[] = {
9443 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9444 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9445 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9446 Intrinsic::riscv_vsseg8};
9447 // Operands are (chain, int_id, vec*, ptr, vl)
9448 unsigned NF = Op->getNumOperands() - 4;
9449 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9450 MVT XLenVT = Subtarget.getXLenVT();
9451 MVT VT = Op->getOperand(2).getSimpleValueType();
9452 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9453
9454 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9455 Subtarget);
9456 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9457 SDValue Ptr = Op->getOperand(NF + 2);
9458
9459 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9460 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9461 for (unsigned i = 0; i < NF; i++)
9463 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9464 Ops.append({Ptr, VL});
9465
9466 return DAG.getMemIntrinsicNode(
9467 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9468 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9469 }
9470 case Intrinsic::riscv_sf_vc_xv_se:
9472 case Intrinsic::riscv_sf_vc_iv_se:
9474 case Intrinsic::riscv_sf_vc_vv_se:
9476 case Intrinsic::riscv_sf_vc_fv_se:
9478 case Intrinsic::riscv_sf_vc_xvv_se:
9480 case Intrinsic::riscv_sf_vc_ivv_se:
9482 case Intrinsic::riscv_sf_vc_vvv_se:
9484 case Intrinsic::riscv_sf_vc_fvv_se:
9486 case Intrinsic::riscv_sf_vc_xvw_se:
9488 case Intrinsic::riscv_sf_vc_ivw_se:
9490 case Intrinsic::riscv_sf_vc_vvw_se:
9492 case Intrinsic::riscv_sf_vc_fvw_se:
9494 }
9495
9496 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9497}
9498
9499static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9500 switch (ISDOpcode) {
9501 default:
9502 llvm_unreachable("Unhandled reduction");
9503 case ISD::VP_REDUCE_ADD:
9504 case ISD::VECREDUCE_ADD:
9506 case ISD::VP_REDUCE_UMAX:
9509 case ISD::VP_REDUCE_SMAX:
9512 case ISD::VP_REDUCE_UMIN:
9515 case ISD::VP_REDUCE_SMIN:
9518 case ISD::VP_REDUCE_AND:
9519 case ISD::VECREDUCE_AND:
9521 case ISD::VP_REDUCE_OR:
9522 case ISD::VECREDUCE_OR:
9524 case ISD::VP_REDUCE_XOR:
9525 case ISD::VECREDUCE_XOR:
9527 case ISD::VP_REDUCE_FADD:
9529 case ISD::VP_REDUCE_SEQ_FADD:
9531 case ISD::VP_REDUCE_FMAX:
9532 case ISD::VP_REDUCE_FMAXIMUM:
9534 case ISD::VP_REDUCE_FMIN:
9535 case ISD::VP_REDUCE_FMINIMUM:
9537 }
9538
9539}
9540
9541SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9542 SelectionDAG &DAG,
9543 bool IsVP) const {
9544 SDLoc DL(Op);
9545 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9546 MVT VecVT = Vec.getSimpleValueType();
9547 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9548 Op.getOpcode() == ISD::VECREDUCE_OR ||
9549 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9550 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9551 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9552 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9553 "Unexpected reduction lowering");
9554
9555 MVT XLenVT = Subtarget.getXLenVT();
9556
9557 MVT ContainerVT = VecVT;
9558 if (VecVT.isFixedLengthVector()) {
9559 ContainerVT = getContainerForFixedLengthVector(VecVT);
9560 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9561 }
9562
9563 SDValue Mask, VL;
9564 if (IsVP) {
9565 Mask = Op.getOperand(2);
9566 VL = Op.getOperand(3);
9567 } else {
9568 std::tie(Mask, VL) =
9569 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9570 }
9571
9572 unsigned BaseOpc;
9574 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9575
9576 switch (Op.getOpcode()) {
9577 default:
9578 llvm_unreachable("Unhandled reduction");
9579 case ISD::VECREDUCE_AND:
9580 case ISD::VP_REDUCE_AND: {
9581 // vcpop ~x == 0
9582 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9583 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9584 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9585 CC = ISD::SETEQ;
9586 BaseOpc = ISD::AND;
9587 break;
9588 }
9589 case ISD::VECREDUCE_OR:
9590 case ISD::VP_REDUCE_OR:
9591 // vcpop x != 0
9592 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9593 CC = ISD::SETNE;
9594 BaseOpc = ISD::OR;
9595 break;
9596 case ISD::VECREDUCE_XOR:
9597 case ISD::VP_REDUCE_XOR: {
9598 // ((vcpop x) & 1) != 0
9599 SDValue One = DAG.getConstant(1, DL, XLenVT);
9600 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9601 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9602 CC = ISD::SETNE;
9603 BaseOpc = ISD::XOR;
9604 break;
9605 }
9606 }
9607
9608 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9609 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9610
9611 if (!IsVP)
9612 return SetCC;
9613
9614 // Now include the start value in the operation.
9615 // Note that we must return the start value when no elements are operated
9616 // upon. The vcpop instructions we've emitted in each case above will return
9617 // 0 for an inactive vector, and so we've already received the neutral value:
9618 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9619 // can simply include the start value.
9620 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9621}
9622
9623static bool isNonZeroAVL(SDValue AVL) {
9624 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9625 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9626 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9627 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9628}
9629
9630/// Helper to lower a reduction sequence of the form:
9631/// scalar = reduce_op vec, scalar_start
9632static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9633 SDValue StartValue, SDValue Vec, SDValue Mask,
9634 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9635 const RISCVSubtarget &Subtarget) {
9636 const MVT VecVT = Vec.getSimpleValueType();
9637 const MVT M1VT = getLMUL1VT(VecVT);
9638 const MVT XLenVT = Subtarget.getXLenVT();
9639 const bool NonZeroAVL = isNonZeroAVL(VL);
9640
9641 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9642 // or the original VT if fractional.
9643 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9644 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9645 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9646 // be the result of the reduction operation.
9647 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9648 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9649 DAG, Subtarget);
9650 if (M1VT != InnerVT)
9651 InitialValue =
9652 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9653 InitialValue, DAG.getVectorIdxConstant(0, DL));
9654 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9655 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9656 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9657 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9658 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9659 DAG.getVectorIdxConstant(0, DL));
9660}
9661
9662SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9663 SelectionDAG &DAG) const {
9664 SDLoc DL(Op);
9665 SDValue Vec = Op.getOperand(0);
9666 EVT VecEVT = Vec.getValueType();
9667
9668 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9669
9670 // Due to ordering in legalize types we may have a vector type that needs to
9671 // be split. Do that manually so we can get down to a legal type.
9672 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9674 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9675 VecEVT = Lo.getValueType();
9676 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9677 }
9678
9679 // TODO: The type may need to be widened rather than split. Or widened before
9680 // it can be split.
9681 if (!isTypeLegal(VecEVT))
9682 return SDValue();
9683
9684 MVT VecVT = VecEVT.getSimpleVT();
9685 MVT VecEltVT = VecVT.getVectorElementType();
9686 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9687
9688 MVT ContainerVT = VecVT;
9689 if (VecVT.isFixedLengthVector()) {
9690 ContainerVT = getContainerForFixedLengthVector(VecVT);
9691 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9692 }
9693
9694 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9695
9696 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9697 switch (BaseOpc) {
9698 case ISD::AND:
9699 case ISD::OR:
9700 case ISD::UMAX:
9701 case ISD::UMIN:
9702 case ISD::SMAX:
9703 case ISD::SMIN:
9704 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9705 DAG.getVectorIdxConstant(0, DL));
9706 }
9707 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9708 Mask, VL, DL, DAG, Subtarget);
9709}
9710
9711// Given a reduction op, this function returns the matching reduction opcode,
9712// the vector SDValue and the scalar SDValue required to lower this to a
9713// RISCVISD node.
9714static std::tuple<unsigned, SDValue, SDValue>
9716 const RISCVSubtarget &Subtarget) {
9717 SDLoc DL(Op);
9718 auto Flags = Op->getFlags();
9719 unsigned Opcode = Op.getOpcode();
9720 switch (Opcode) {
9721 default:
9722 llvm_unreachable("Unhandled reduction");
9723 case ISD::VECREDUCE_FADD: {
9724 // Use positive zero if we can. It is cheaper to materialize.
9725 SDValue Zero =
9726 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9727 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9728 }
9730 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9731 Op.getOperand(0));
9735 case ISD::VECREDUCE_FMAX: {
9736 SDValue Front =
9737 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9738 DAG.getVectorIdxConstant(0, DL));
9739 unsigned RVVOpc =
9740 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9743 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9744 }
9745 }
9746}
9747
9748SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9749 SelectionDAG &DAG) const {
9750 SDLoc DL(Op);
9751 MVT VecEltVT = Op.getSimpleValueType();
9752
9753 unsigned RVVOpcode;
9754 SDValue VectorVal, ScalarVal;
9755 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9756 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9757 MVT VecVT = VectorVal.getSimpleValueType();
9758
9759 MVT ContainerVT = VecVT;
9760 if (VecVT.isFixedLengthVector()) {
9761 ContainerVT = getContainerForFixedLengthVector(VecVT);
9762 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9763 }
9764
9765 MVT ResVT = Op.getSimpleValueType();
9766 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9767 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9768 VL, DL, DAG, Subtarget);
9769 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9770 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9771 return Res;
9772
9773 if (Op->getFlags().hasNoNaNs())
9774 return Res;
9775
9776 // Force output to NaN if any element is Nan.
9777 SDValue IsNan =
9778 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9779 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9780 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9781 MVT XLenVT = Subtarget.getXLenVT();
9782 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9783 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9784 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9785 return DAG.getSelect(
9786 DL, ResVT, NoNaNs, Res,
9788 ResVT));
9789}
9790
9791SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9792 SelectionDAG &DAG) const {
9793 SDLoc DL(Op);
9794 unsigned Opc = Op.getOpcode();
9795 SDValue Start = Op.getOperand(0);
9796 SDValue Vec = Op.getOperand(1);
9797 EVT VecEVT = Vec.getValueType();
9798 MVT XLenVT = Subtarget.getXLenVT();
9799
9800 // TODO: The type may need to be widened rather than split. Or widened before
9801 // it can be split.
9802 if (!isTypeLegal(VecEVT))
9803 return SDValue();
9804
9805 MVT VecVT = VecEVT.getSimpleVT();
9806 unsigned RVVOpcode = getRVVReductionOp(Opc);
9807
9808 if (VecVT.isFixedLengthVector()) {
9809 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9810 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9811 }
9812
9813 SDValue VL = Op.getOperand(3);
9814 SDValue Mask = Op.getOperand(2);
9815 SDValue Res =
9816 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9817 Vec, Mask, VL, DL, DAG, Subtarget);
9818 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9819 Op->getFlags().hasNoNaNs())
9820 return Res;
9821
9822 // Propagate NaNs.
9823 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9824 // Check if any of the elements in Vec is NaN.
9825 SDValue IsNaN = DAG.getNode(
9826 RISCVISD::SETCC_VL, DL, PredVT,
9827 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9828 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9829 // Check if the start value is NaN.
9830 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9831 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9832 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9833 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9834 MVT ResVT = Res.getSimpleValueType();
9835 return DAG.getSelect(
9836 DL, ResVT, NoNaNs, Res,
9838 ResVT));
9839}
9840
9841SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9842 SelectionDAG &DAG) const {
9843 SDValue Vec = Op.getOperand(0);
9844 SDValue SubVec = Op.getOperand(1);
9845 MVT VecVT = Vec.getSimpleValueType();
9846 MVT SubVecVT = SubVec.getSimpleValueType();
9847
9848 SDLoc DL(Op);
9849 MVT XLenVT = Subtarget.getXLenVT();
9850 unsigned OrigIdx = Op.getConstantOperandVal(2);
9851 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9852
9853 // We don't have the ability to slide mask vectors up indexed by their i1
9854 // elements; the smallest we can do is i8. Often we are able to bitcast to
9855 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9856 // into a scalable one, we might not necessarily have enough scalable
9857 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9858 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9859 (OrigIdx != 0 || !Vec.isUndef())) {
9860 if (VecVT.getVectorMinNumElements() >= 8 &&
9861 SubVecVT.getVectorMinNumElements() >= 8) {
9862 assert(OrigIdx % 8 == 0 && "Invalid index");
9863 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9864 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9865 "Unexpected mask vector lowering");
9866 OrigIdx /= 8;
9867 SubVecVT =
9868 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9869 SubVecVT.isScalableVector());
9870 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9871 VecVT.isScalableVector());
9872 Vec = DAG.getBitcast(VecVT, Vec);
9873 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9874 } else {
9875 // We can't slide this mask vector up indexed by its i1 elements.
9876 // This poses a problem when we wish to insert a scalable vector which
9877 // can't be re-expressed as a larger type. Just choose the slow path and
9878 // extend to a larger type, then truncate back down.
9879 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9880 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9881 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9882 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9883 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9884 Op.getOperand(2));
9885 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9886 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9887 }
9888 }
9889
9890 // If the subvector vector is a fixed-length type and we don't know VLEN
9891 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9892 // don't know which register of a LMUL group contains the specific subvector
9893 // as we only know the minimum register size. Therefore we must slide the
9894 // vector group up the full amount.
9895 const auto VLen = Subtarget.getRealVLen();
9896 if (SubVecVT.isFixedLengthVector() && !VLen) {
9897 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9898 return Op;
9899 MVT ContainerVT = VecVT;
9900 if (VecVT.isFixedLengthVector()) {
9901 ContainerVT = getContainerForFixedLengthVector(VecVT);
9902 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9903 }
9904
9905 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9906 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9907 DAG.getUNDEF(ContainerVT), SubVec,
9908 DAG.getVectorIdxConstant(0, DL));
9909 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9910 return DAG.getBitcast(Op.getValueType(), SubVec);
9911 }
9912
9913 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9914 DAG.getUNDEF(ContainerVT), SubVec,
9915 DAG.getVectorIdxConstant(0, DL));
9916 SDValue Mask =
9917 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9918 // Set the vector length to only the number of elements we care about. Note
9919 // that for slideup this includes the offset.
9920 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9921 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9922
9923 // Use tail agnostic policy if we're inserting over Vec's tail.
9925 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9926 Policy = RISCVII::TAIL_AGNOSTIC;
9927
9928 // If we're inserting into the lowest elements, use a tail undisturbed
9929 // vmv.v.v.
9930 if (OrigIdx == 0) {
9931 SubVec =
9932 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9933 } else {
9934 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9935 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9936 SlideupAmt, Mask, VL, Policy);
9937 }
9938
9939 if (VecVT.isFixedLengthVector())
9940 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9941 return DAG.getBitcast(Op.getValueType(), SubVec);
9942 }
9943
9944 MVT ContainerVecVT = VecVT;
9945 if (VecVT.isFixedLengthVector()) {
9946 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9947 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9948 }
9949
9950 MVT ContainerSubVecVT = SubVecVT;
9951 if (SubVecVT.isFixedLengthVector()) {
9952 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9953 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9954 }
9955
9956 unsigned SubRegIdx;
9957 ElementCount RemIdx;
9958 // insert_subvector scales the index by vscale if the subvector is scalable,
9959 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9960 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9961 if (SubVecVT.isFixedLengthVector()) {
9962 assert(VLen);
9963 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9964 auto Decompose =
9966 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9967 SubRegIdx = Decompose.first;
9968 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9969 (OrigIdx % Vscale));
9970 } else {
9971 auto Decompose =
9973 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9974 SubRegIdx = Decompose.first;
9975 RemIdx = ElementCount::getScalable(Decompose.second);
9976 }
9977
9980 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9981 bool ExactlyVecRegSized =
9982 Subtarget.expandVScale(SubVecVT.getSizeInBits())
9983 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9984
9985 // 1. If the Idx has been completely eliminated and this subvector's size is
9986 // a vector register or a multiple thereof, or the surrounding elements are
9987 // undef, then this is a subvector insert which naturally aligns to a vector
9988 // register. These can easily be handled using subregister manipulation.
9989 // 2. If the subvector isn't an exact multiple of a valid register group size,
9990 // then the insertion must preserve the undisturbed elements of the register.
9991 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9992 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9993 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9994 // of that LMUL=1 type back into the larger vector (resolving to another
9995 // subregister operation). See below for how our VSLIDEUP works. We go via a
9996 // LMUL=1 type to avoid allocating a large register group to hold our
9997 // subvector.
9998 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9999 if (SubVecVT.isFixedLengthVector()) {
10000 // We may get NoSubRegister if inserting at index 0 and the subvec
10001 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10002 if (SubRegIdx == RISCV::NoSubRegister) {
10003 assert(OrigIdx == 0);
10004 return Op;
10005 }
10006
10007 SDValue Insert =
10008 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
10009 if (VecVT.isFixedLengthVector())
10010 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10011 return Insert;
10012 }
10013 return Op;
10014 }
10015
10016 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10017 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10018 // (in our case undisturbed). This means we can set up a subvector insertion
10019 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10020 // size of the subvector.
10021 MVT InterSubVT = ContainerVecVT;
10022 SDValue AlignedExtract = Vec;
10023 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10024 if (SubVecVT.isFixedLengthVector())
10025 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10026 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10027 InterSubVT = getLMUL1VT(ContainerVecVT);
10028 // Extract a subvector equal to the nearest full vector register type. This
10029 // should resolve to a EXTRACT_SUBREG instruction.
10030 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10031 DAG.getVectorIdxConstant(AlignedIdx, DL));
10032 }
10033
10034 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10035 DAG.getUNDEF(InterSubVT), SubVec,
10036 DAG.getVectorIdxConstant(0, DL));
10037
10038 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10039
10040 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10041 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10042
10043 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10045 if (Subtarget.expandVScale(EndIndex) ==
10046 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10047 Policy = RISCVII::TAIL_AGNOSTIC;
10048
10049 // If we're inserting into the lowest elements, use a tail undisturbed
10050 // vmv.v.v.
10051 if (RemIdx.isZero()) {
10052 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10053 SubVec, VL);
10054 } else {
10055 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10056
10057 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10058 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10059
10060 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10061 SlideupAmt, Mask, VL, Policy);
10062 }
10063
10064 // If required, insert this subvector back into the correct vector register.
10065 // This should resolve to an INSERT_SUBREG instruction.
10066 if (ContainerVecVT.bitsGT(InterSubVT))
10067 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10068 DAG.getVectorIdxConstant(AlignedIdx, DL));
10069
10070 if (VecVT.isFixedLengthVector())
10071 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10072
10073 // We might have bitcast from a mask type: cast back to the original type if
10074 // required.
10075 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10076}
10077
10078SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10079 SelectionDAG &DAG) const {
10080 SDValue Vec = Op.getOperand(0);
10081 MVT SubVecVT = Op.getSimpleValueType();
10082 MVT VecVT = Vec.getSimpleValueType();
10083
10084 SDLoc DL(Op);
10085 MVT XLenVT = Subtarget.getXLenVT();
10086 unsigned OrigIdx = Op.getConstantOperandVal(1);
10087 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10088
10089 // We don't have the ability to slide mask vectors down indexed by their i1
10090 // elements; the smallest we can do is i8. Often we are able to bitcast to
10091 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10092 // from a scalable one, we might not necessarily have enough scalable
10093 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10094 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
10095 if (VecVT.getVectorMinNumElements() >= 8 &&
10096 SubVecVT.getVectorMinNumElements() >= 8) {
10097 assert(OrigIdx % 8 == 0 && "Invalid index");
10098 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10099 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10100 "Unexpected mask vector lowering");
10101 OrigIdx /= 8;
10102 SubVecVT =
10103 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10104 SubVecVT.isScalableVector());
10105 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10106 VecVT.isScalableVector());
10107 Vec = DAG.getBitcast(VecVT, Vec);
10108 } else {
10109 // We can't slide this mask vector down, indexed by its i1 elements.
10110 // This poses a problem when we wish to extract a scalable vector which
10111 // can't be re-expressed as a larger type. Just choose the slow path and
10112 // extend to a larger type, then truncate back down.
10113 // TODO: We could probably improve this when extracting certain fixed
10114 // from fixed, where we can extract as i8 and shift the correct element
10115 // right to reach the desired subvector?
10116 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10117 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10118 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10119 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10120 Op.getOperand(1));
10121 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10122 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10123 }
10124 }
10125
10126 // With an index of 0 this is a cast-like subvector, which can be performed
10127 // with subregister operations.
10128 if (OrigIdx == 0)
10129 return Op;
10130
10131 const auto VLen = Subtarget.getRealVLen();
10132
10133 // If the subvector vector is a fixed-length type and we don't know VLEN
10134 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10135 // don't know which register of a LMUL group contains the specific subvector
10136 // as we only know the minimum register size. Therefore we must slide the
10137 // vector group down the full amount.
10138 if (SubVecVT.isFixedLengthVector() && !VLen) {
10139 MVT ContainerVT = VecVT;
10140 if (VecVT.isFixedLengthVector()) {
10141 ContainerVT = getContainerForFixedLengthVector(VecVT);
10142 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10143 }
10144
10145 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10146 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10147 if (auto ShrunkVT =
10148 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10149 ContainerVT = *ShrunkVT;
10150 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10151 DAG.getVectorIdxConstant(0, DL));
10152 }
10153
10154 SDValue Mask =
10155 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10156 // Set the vector length to only the number of elements we care about. This
10157 // avoids sliding down elements we're going to discard straight away.
10158 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
10159 Subtarget);
10160 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10161 SDValue Slidedown =
10162 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10163 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10164 // Now we can use a cast-like subvector extract to get the result.
10165 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10166 DAG.getVectorIdxConstant(0, DL));
10167 return DAG.getBitcast(Op.getValueType(), Slidedown);
10168 }
10169
10170 if (VecVT.isFixedLengthVector()) {
10171 VecVT = getContainerForFixedLengthVector(VecVT);
10172 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10173 }
10174
10175 MVT ContainerSubVecVT = SubVecVT;
10176 if (SubVecVT.isFixedLengthVector())
10177 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10178
10179 unsigned SubRegIdx;
10180 ElementCount RemIdx;
10181 // extract_subvector scales the index by vscale if the subvector is scalable,
10182 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10183 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10184 if (SubVecVT.isFixedLengthVector()) {
10185 assert(VLen);
10186 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10187 auto Decompose =
10189 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10190 SubRegIdx = Decompose.first;
10191 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10192 (OrigIdx % Vscale));
10193 } else {
10194 auto Decompose =
10196 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10197 SubRegIdx = Decompose.first;
10198 RemIdx = ElementCount::getScalable(Decompose.second);
10199 }
10200
10201 // If the Idx has been completely eliminated then this is a subvector extract
10202 // which naturally aligns to a vector register. These can easily be handled
10203 // using subregister manipulation.
10204 if (RemIdx.isZero()) {
10205 if (SubVecVT.isFixedLengthVector()) {
10206 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10207 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10208 }
10209 return Op;
10210 }
10211
10212 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10213 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10214 // divide exactly.
10215 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10216 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10217
10218 // If the vector type is an LMUL-group type, extract a subvector equal to the
10219 // nearest full vector register type.
10220 MVT InterSubVT = VecVT;
10221 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10222 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10223 // we should have successfully decomposed the extract into a subregister.
10224 assert(SubRegIdx != RISCV::NoSubRegister);
10225 InterSubVT = getLMUL1VT(VecVT);
10226 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10227 }
10228
10229 // Slide this vector register down by the desired number of elements in order
10230 // to place the desired subvector starting at element 0.
10231 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10232 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10233 if (SubVecVT.isFixedLengthVector())
10234 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10235 Subtarget);
10236 SDValue Slidedown =
10237 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10238 Vec, SlidedownAmt, Mask, VL);
10239
10240 // Now the vector is in the right position, extract our final subvector. This
10241 // should resolve to a COPY.
10242 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10243 DAG.getVectorIdxConstant(0, DL));
10244
10245 // We might have bitcast from a mask type: cast back to the original type if
10246 // required.
10247 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10248}
10249
10250// Widen a vector's operands to i8, then truncate its results back to the
10251// original type, typically i1. All operand and result types must be the same.
10253 SelectionDAG &DAG) {
10254 MVT VT = N.getSimpleValueType();
10255 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10257 for (SDValue Op : N->ops()) {
10258 assert(Op.getSimpleValueType() == VT &&
10259 "Operands and result must be same type");
10260 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10261 }
10262
10263 unsigned NumVals = N->getNumValues();
10264
10266 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10267 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10268 SmallVector<SDValue, 4> TruncVals;
10269 for (unsigned I = 0; I < NumVals; I++) {
10270 TruncVals.push_back(
10271 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10272 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10273 }
10274
10275 if (TruncVals.size() > 1)
10276 return DAG.getMergeValues(TruncVals, DL);
10277 return TruncVals.front();
10278}
10279
10280SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10281 SelectionDAG &DAG) const {
10282 SDLoc DL(Op);
10283 MVT VecVT = Op.getSimpleValueType();
10284
10285 assert(VecVT.isScalableVector() &&
10286 "vector_interleave on non-scalable vector!");
10287
10288 // 1 bit element vectors need to be widened to e8
10289 if (VecVT.getVectorElementType() == MVT::i1)
10290 return widenVectorOpsToi8(Op, DL, DAG);
10291
10292 // If the VT is LMUL=8, we need to split and reassemble.
10293 if (VecVT.getSizeInBits().getKnownMinValue() ==
10294 (8 * RISCV::RVVBitsPerBlock)) {
10295 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10296 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10297 EVT SplitVT = Op0Lo.getValueType();
10298
10300 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10302 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10303
10304 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10305 ResLo.getValue(0), ResHi.getValue(0));
10306 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10307 ResHi.getValue(1));
10308 return DAG.getMergeValues({Even, Odd}, DL);
10309 }
10310
10311 // Concatenate the two vectors as one vector to deinterleave
10312 MVT ConcatVT =
10315 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10316 Op.getOperand(0), Op.getOperand(1));
10317
10318 // We want to operate on all lanes, so get the mask and VL and mask for it
10319 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10320 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10321
10322 // We can deinterleave through vnsrl.wi if the element type is smaller than
10323 // ELEN
10324 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10325 SDValue Even =
10326 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10327 SDValue Odd =
10328 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10329 return DAG.getMergeValues({Even, Odd}, DL);
10330 }
10331
10332 // For the indices, use the same SEW to avoid an extra vsetvli
10333 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10334 // Create a vector of even indices {0, 2, 4, ...}
10335 SDValue EvenIdx =
10336 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10337 // Create a vector of odd indices {1, 3, 5, ... }
10338 SDValue OddIdx =
10339 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10340
10341 // Gather the even and odd elements into two separate vectors
10342 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10343 Concat, EvenIdx, Passthru, Mask, VL);
10344 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10345 Concat, OddIdx, Passthru, Mask, VL);
10346
10347 // Extract the result half of the gather for even and odd
10348 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10349 DAG.getVectorIdxConstant(0, DL));
10350 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10351 DAG.getVectorIdxConstant(0, DL));
10352
10353 return DAG.getMergeValues({Even, Odd}, DL);
10354}
10355
10356SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10357 SelectionDAG &DAG) const {
10358 SDLoc DL(Op);
10359 MVT VecVT = Op.getSimpleValueType();
10360
10361 assert(VecVT.isScalableVector() &&
10362 "vector_interleave on non-scalable vector!");
10363
10364 // i1 vectors need to be widened to i8
10365 if (VecVT.getVectorElementType() == MVT::i1)
10366 return widenVectorOpsToi8(Op, DL, DAG);
10367
10368 MVT XLenVT = Subtarget.getXLenVT();
10369 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10370
10371 // If the VT is LMUL=8, we need to split and reassemble.
10372 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10373 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10374 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10375 EVT SplitVT = Op0Lo.getValueType();
10376
10378 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10380 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10381
10382 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10383 ResLo.getValue(0), ResLo.getValue(1));
10384 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10385 ResHi.getValue(0), ResHi.getValue(1));
10386 return DAG.getMergeValues({Lo, Hi}, DL);
10387 }
10388
10389 SDValue Interleaved;
10390
10391 // If the element type is smaller than ELEN, then we can interleave with
10392 // vwaddu.vv and vwmaccu.vx
10393 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10394 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10395 DAG, Subtarget);
10396 } else {
10397 // Otherwise, fallback to using vrgathere16.vv
10398 MVT ConcatVT =
10401 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10402 Op.getOperand(0), Op.getOperand(1));
10403
10404 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10405
10406 // 0 1 2 3 4 5 6 7 ...
10407 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10408
10409 // 1 1 1 1 1 1 1 1 ...
10410 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10411
10412 // 1 0 1 0 1 0 1 0 ...
10413 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10414 OddMask = DAG.getSetCC(
10415 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10416 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10418
10419 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10420
10421 // Build up the index vector for interleaving the concatenated vector
10422 // 0 0 1 1 2 2 3 3 ...
10423 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10424 // 0 n 1 n+1 2 n+2 3 n+3 ...
10425 Idx =
10426 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10427
10428 // Then perform the interleave
10429 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10430 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10431 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10432 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10433 }
10434
10435 // Extract the two halves from the interleaved result
10436 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10437 DAG.getVectorIdxConstant(0, DL));
10438 SDValue Hi = DAG.getNode(
10439 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10441
10442 return DAG.getMergeValues({Lo, Hi}, DL);
10443}
10444
10445// Lower step_vector to the vid instruction. Any non-identity step value must
10446// be accounted for my manual expansion.
10447SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10448 SelectionDAG &DAG) const {
10449 SDLoc DL(Op);
10450 MVT VT = Op.getSimpleValueType();
10451 assert(VT.isScalableVector() && "Expected scalable vector");
10452 MVT XLenVT = Subtarget.getXLenVT();
10453 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10454 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10455 uint64_t StepValImm = Op.getConstantOperandVal(0);
10456 if (StepValImm != 1) {
10457 if (isPowerOf2_64(StepValImm)) {
10458 SDValue StepVal =
10459 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10460 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10461 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10462 } else {
10463 SDValue StepVal = lowerScalarSplat(
10464 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10465 VL, VT, DL, DAG, Subtarget);
10466 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10467 }
10468 }
10469 return StepVec;
10470}
10471
10472// Implement vector_reverse using vrgather.vv with indices determined by
10473// subtracting the id of each element from (VLMAX-1). This will convert
10474// the indices like so:
10475// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10476// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10477SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10478 SelectionDAG &DAG) const {
10479 SDLoc DL(Op);
10480 MVT VecVT = Op.getSimpleValueType();
10481 if (VecVT.getVectorElementType() == MVT::i1) {
10482 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10483 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10484 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10485 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10486 }
10487 unsigned EltSize = VecVT.getScalarSizeInBits();
10488 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10489 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10490 unsigned MaxVLMAX =
10491 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10492
10493 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10494 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10495
10496 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10497 // to use vrgatherei16.vv.
10498 // TODO: It's also possible to use vrgatherei16.vv for other types to
10499 // decrease register width for the index calculation.
10500 if (MaxVLMAX > 256 && EltSize == 8) {
10501 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10502 // Reverse each half, then reassemble them in reverse order.
10503 // NOTE: It's also possible that after splitting that VLMAX no longer
10504 // requires vrgatherei16.vv.
10505 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10506 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10507 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10508 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10509 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10510 // Reassemble the low and high pieces reversed.
10511 // FIXME: This is a CONCAT_VECTORS.
10512 SDValue Res =
10513 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10514 DAG.getVectorIdxConstant(0, DL));
10515 return DAG.getNode(
10516 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10517 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10518 }
10519
10520 // Just promote the int type to i16 which will double the LMUL.
10521 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10522 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10523 }
10524
10525 MVT XLenVT = Subtarget.getXLenVT();
10526 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10527
10528 // Calculate VLMAX-1 for the desired SEW.
10529 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10530 computeVLMax(VecVT, DL, DAG),
10531 DAG.getConstant(1, DL, XLenVT));
10532
10533 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10534 bool IsRV32E64 =
10535 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10536 SDValue SplatVL;
10537 if (!IsRV32E64)
10538 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10539 else
10540 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10541 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10542
10543 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10544 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10545 DAG.getUNDEF(IntVT), Mask, VL);
10546
10547 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10548 DAG.getUNDEF(VecVT), Mask, VL);
10549}
10550
10551SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10552 SelectionDAG &DAG) const {
10553 SDLoc DL(Op);
10554 SDValue V1 = Op.getOperand(0);
10555 SDValue V2 = Op.getOperand(1);
10556 MVT XLenVT = Subtarget.getXLenVT();
10557 MVT VecVT = Op.getSimpleValueType();
10558
10559 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10560
10561 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10562 SDValue DownOffset, UpOffset;
10563 if (ImmValue >= 0) {
10564 // The operand is a TargetConstant, we need to rebuild it as a regular
10565 // constant.
10566 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10567 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10568 } else {
10569 // The operand is a TargetConstant, we need to rebuild it as a regular
10570 // constant rather than negating the original operand.
10571 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10572 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10573 }
10574
10575 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10576
10577 SDValue SlideDown =
10578 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10579 DownOffset, TrueMask, UpOffset);
10580 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10581 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10583}
10584
10585SDValue
10586RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10587 SelectionDAG &DAG) const {
10588 SDLoc DL(Op);
10589 auto *Load = cast<LoadSDNode>(Op);
10590
10592 Load->getMemoryVT(),
10593 *Load->getMemOperand()) &&
10594 "Expecting a correctly-aligned load");
10595
10596 MVT VT = Op.getSimpleValueType();
10597 MVT XLenVT = Subtarget.getXLenVT();
10598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10599
10600 // If we know the exact VLEN and our fixed length vector completely fills
10601 // the container, use a whole register load instead.
10602 const auto [MinVLMAX, MaxVLMAX] =
10603 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10604 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10605 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10606 MachineMemOperand *MMO = Load->getMemOperand();
10607 SDValue NewLoad =
10608 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10609 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10610 MMO->getAAInfo(), MMO->getRanges());
10611 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10612 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10613 }
10614
10615 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10616
10617 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10618 SDValue IntID = DAG.getTargetConstant(
10619 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10620 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10621 if (!IsMaskOp)
10622 Ops.push_back(DAG.getUNDEF(ContainerVT));
10623 Ops.push_back(Load->getBasePtr());
10624 Ops.push_back(VL);
10625 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10626 SDValue NewLoad =
10628 Load->getMemoryVT(), Load->getMemOperand());
10629
10630 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10631 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10632}
10633
10634SDValue
10635RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10636 SelectionDAG &DAG) const {
10637 SDLoc DL(Op);
10638 auto *Store = cast<StoreSDNode>(Op);
10639
10641 Store->getMemoryVT(),
10642 *Store->getMemOperand()) &&
10643 "Expecting a correctly-aligned store");
10644
10645 SDValue StoreVal = Store->getValue();
10646 MVT VT = StoreVal.getSimpleValueType();
10647 MVT XLenVT = Subtarget.getXLenVT();
10648
10649 // If the size less than a byte, we need to pad with zeros to make a byte.
10650 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10651 VT = MVT::v8i1;
10652 StoreVal =
10653 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10654 StoreVal, DAG.getVectorIdxConstant(0, DL));
10655 }
10656
10657 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10658
10659 SDValue NewValue =
10660 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10661
10662
10663 // If we know the exact VLEN and our fixed length vector completely fills
10664 // the container, use a whole register store instead.
10665 const auto [MinVLMAX, MaxVLMAX] =
10666 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10667 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10668 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10669 MachineMemOperand *MMO = Store->getMemOperand();
10670 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10671 MMO->getPointerInfo(), MMO->getBaseAlign(),
10672 MMO->getFlags(), MMO->getAAInfo());
10673 }
10674
10675 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10676 Subtarget);
10677
10678 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10679 SDValue IntID = DAG.getTargetConstant(
10680 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10681 return DAG.getMemIntrinsicNode(
10682 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10683 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10684 Store->getMemoryVT(), Store->getMemOperand());
10685}
10686
10687SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10688 SelectionDAG &DAG) const {
10689 SDLoc DL(Op);
10690 MVT VT = Op.getSimpleValueType();
10691
10692 const auto *MemSD = cast<MemSDNode>(Op);
10693 EVT MemVT = MemSD->getMemoryVT();
10694 MachineMemOperand *MMO = MemSD->getMemOperand();
10695 SDValue Chain = MemSD->getChain();
10696 SDValue BasePtr = MemSD->getBasePtr();
10697
10698 SDValue Mask, PassThru, VL;
10699 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10700 Mask = VPLoad->getMask();
10701 PassThru = DAG.getUNDEF(VT);
10702 VL = VPLoad->getVectorLength();
10703 } else {
10704 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10705 Mask = MLoad->getMask();
10706 PassThru = MLoad->getPassThru();
10707 }
10708
10709 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10710
10711 MVT XLenVT = Subtarget.getXLenVT();
10712
10713 MVT ContainerVT = VT;
10714 if (VT.isFixedLengthVector()) {
10715 ContainerVT = getContainerForFixedLengthVector(VT);
10716 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10717 if (!IsUnmasked) {
10718 MVT MaskVT = getMaskTypeFor(ContainerVT);
10719 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10720 }
10721 }
10722
10723 if (!VL)
10724 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10725
10726 unsigned IntID =
10727 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10728 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10729 if (IsUnmasked)
10730 Ops.push_back(DAG.getUNDEF(ContainerVT));
10731 else
10732 Ops.push_back(PassThru);
10733 Ops.push_back(BasePtr);
10734 if (!IsUnmasked)
10735 Ops.push_back(Mask);
10736 Ops.push_back(VL);
10737 if (!IsUnmasked)
10739
10740 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10741
10742 SDValue Result =
10743 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10744 Chain = Result.getValue(1);
10745
10746 if (VT.isFixedLengthVector())
10747 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10748
10749 return DAG.getMergeValues({Result, Chain}, DL);
10750}
10751
10752SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10753 SelectionDAG &DAG) const {
10754 SDLoc DL(Op);
10755
10756 const auto *MemSD = cast<MemSDNode>(Op);
10757 EVT MemVT = MemSD->getMemoryVT();
10758 MachineMemOperand *MMO = MemSD->getMemOperand();
10759 SDValue Chain = MemSD->getChain();
10760 SDValue BasePtr = MemSD->getBasePtr();
10761 SDValue Val, Mask, VL;
10762
10763 bool IsCompressingStore = false;
10764 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10765 Val = VPStore->getValue();
10766 Mask = VPStore->getMask();
10767 VL = VPStore->getVectorLength();
10768 } else {
10769 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10770 Val = MStore->getValue();
10771 Mask = MStore->getMask();
10772 IsCompressingStore = MStore->isCompressingStore();
10773 }
10774
10775 bool IsUnmasked =
10776 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10777
10778 MVT VT = Val.getSimpleValueType();
10779 MVT XLenVT = Subtarget.getXLenVT();
10780
10781 MVT ContainerVT = VT;
10782 if (VT.isFixedLengthVector()) {
10783 ContainerVT = getContainerForFixedLengthVector(VT);
10784
10785 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10786 if (!IsUnmasked || IsCompressingStore) {
10787 MVT MaskVT = getMaskTypeFor(ContainerVT);
10788 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10789 }
10790 }
10791
10792 if (!VL)
10793 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10794
10795 if (IsCompressingStore) {
10796 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10797 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10798 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10799 VL =
10800 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10801 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10802 }
10803
10804 unsigned IntID =
10805 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10806 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10807 Ops.push_back(Val);
10808 Ops.push_back(BasePtr);
10809 if (!IsUnmasked)
10810 Ops.push_back(Mask);
10811 Ops.push_back(VL);
10812
10814 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10815}
10816
10817SDValue
10818RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10819 SelectionDAG &DAG) const {
10820 MVT InVT = Op.getOperand(0).getSimpleValueType();
10821 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10822
10823 MVT VT = Op.getSimpleValueType();
10824
10825 SDValue Op1 =
10826 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10827 SDValue Op2 =
10828 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10829
10830 SDLoc DL(Op);
10831 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10832 DAG, Subtarget);
10833 MVT MaskVT = getMaskTypeFor(ContainerVT);
10834
10835 SDValue Cmp =
10836 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10837 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10838
10839 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10840}
10841
10842SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10843 SelectionDAG &DAG) const {
10844 unsigned Opc = Op.getOpcode();
10845 SDLoc DL(Op);
10846 SDValue Chain = Op.getOperand(0);
10847 SDValue Op1 = Op.getOperand(1);
10848 SDValue Op2 = Op.getOperand(2);
10849 SDValue CC = Op.getOperand(3);
10850 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10851 MVT VT = Op.getSimpleValueType();
10852 MVT InVT = Op1.getSimpleValueType();
10853
10854 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10855 // condition code.
10856 if (Opc == ISD::STRICT_FSETCCS) {
10857 // Expand strict_fsetccs(x, oeq) to
10858 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10859 SDVTList VTList = Op->getVTList();
10860 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10861 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10862 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10863 Op2, OLECCVal);
10864 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10865 Op1, OLECCVal);
10866 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10867 Tmp1.getValue(1), Tmp2.getValue(1));
10868 // Tmp1 and Tmp2 might be the same node.
10869 if (Tmp1 != Tmp2)
10870 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10871 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10872 }
10873
10874 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10875 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10876 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10877 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10878 Op2, OEQCCVal);
10879 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10880 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10881 }
10882 }
10883
10884 MVT ContainerInVT = InVT;
10885 if (InVT.isFixedLengthVector()) {
10886 ContainerInVT = getContainerForFixedLengthVector(InVT);
10887 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10888 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10889 }
10890 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10891
10892 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10893
10894 SDValue Res;
10895 if (Opc == ISD::STRICT_FSETCC &&
10896 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10897 CCVal == ISD::SETOLE)) {
10898 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10899 // active when both input elements are ordered.
10900 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10901 SDValue OrderMask1 = DAG.getNode(
10902 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10903 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10904 True, VL});
10905 SDValue OrderMask2 = DAG.getNode(
10906 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10907 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10908 True, VL});
10909 Mask =
10910 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10911 // Use Mask as the merge operand to let the result be 0 if either of the
10912 // inputs is unordered.
10914 DAG.getVTList(MaskVT, MVT::Other),
10915 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10916 } else {
10917 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10919 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10920 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10921 }
10922
10923 if (VT.isFixedLengthVector()) {
10924 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10925 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10926 }
10927 return Res;
10928}
10929
10930// Lower vector ABS to smax(X, sub(0, X)).
10931SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10932 SDLoc DL(Op);
10933 MVT VT = Op.getSimpleValueType();
10934 SDValue X = Op.getOperand(0);
10935
10936 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10937 "Unexpected type for ISD::ABS");
10938
10939 MVT ContainerVT = VT;
10940 if (VT.isFixedLengthVector()) {
10941 ContainerVT = getContainerForFixedLengthVector(VT);
10942 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10943 }
10944
10945 SDValue Mask, VL;
10946 if (Op->getOpcode() == ISD::VP_ABS) {
10947 Mask = Op->getOperand(1);
10948 if (VT.isFixedLengthVector())
10949 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10950 Subtarget);
10951 VL = Op->getOperand(2);
10952 } else
10953 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10954
10955 SDValue SplatZero = DAG.getNode(
10956 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10957 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10958 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10959 DAG.getUNDEF(ContainerVT), Mask, VL);
10960 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10961 DAG.getUNDEF(ContainerVT), Mask, VL);
10962
10963 if (VT.isFixedLengthVector())
10964 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10965 return Max;
10966}
10967
10968SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10969 SDValue Op, SelectionDAG &DAG) const {
10970 SDLoc DL(Op);
10971 MVT VT = Op.getSimpleValueType();
10972 SDValue Mag = Op.getOperand(0);
10973 SDValue Sign = Op.getOperand(1);
10974 assert(Mag.getValueType() == Sign.getValueType() &&
10975 "Can only handle COPYSIGN with matching types.");
10976
10977 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10978 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10979 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10980
10981 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10982
10983 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10984 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10985
10986 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10987}
10988
10989SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10990 SDValue Op, SelectionDAG &DAG) const {
10991 MVT VT = Op.getSimpleValueType();
10992 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10993
10994 MVT I1ContainerVT =
10995 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10996
10997 SDValue CC =
10998 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10999 SDValue Op1 =
11000 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11001 SDValue Op2 =
11002 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11003
11004 SDLoc DL(Op);
11005 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11006
11007 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11008 Op2, DAG.getUNDEF(ContainerVT), VL);
11009
11010 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11011}
11012
11013SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11014 SelectionDAG &DAG) const {
11015 unsigned NewOpc = getRISCVVLOp(Op);
11016 bool HasMergeOp = hasMergeOp(NewOpc);
11017 bool HasMask = hasMaskOp(NewOpc);
11018
11019 MVT VT = Op.getSimpleValueType();
11020 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11021
11022 // Create list of operands by converting existing ones to scalable types.
11024 for (const SDValue &V : Op->op_values()) {
11025 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11026
11027 // Pass through non-vector operands.
11028 if (!V.getValueType().isVector()) {
11029 Ops.push_back(V);
11030 continue;
11031 }
11032
11033 // "cast" fixed length vector to a scalable vector.
11034 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11035 "Only fixed length vectors are supported!");
11036 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11037 }
11038
11039 SDLoc DL(Op);
11040 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11041 if (HasMergeOp)
11042 Ops.push_back(DAG.getUNDEF(ContainerVT));
11043 if (HasMask)
11044 Ops.push_back(Mask);
11045 Ops.push_back(VL);
11046
11047 // StrictFP operations have two result values. Their lowered result should
11048 // have same result count.
11049 if (Op->isStrictFPOpcode()) {
11050 SDValue ScalableRes =
11051 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11052 Op->getFlags());
11053 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11054 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11055 }
11056
11057 SDValue ScalableRes =
11058 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11059 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11060}
11061
11062// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11063// * Operands of each node are assumed to be in the same order.
11064// * The EVL operand is promoted from i32 to i64 on RV64.
11065// * Fixed-length vectors are converted to their scalable-vector container
11066// types.
11067SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11068 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11069 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
11070
11071 SDLoc DL(Op);
11072 MVT VT = Op.getSimpleValueType();
11074
11075 MVT ContainerVT = VT;
11076 if (VT.isFixedLengthVector())
11077 ContainerVT = getContainerForFixedLengthVector(VT);
11078
11079 for (const auto &OpIdx : enumerate(Op->ops())) {
11080 SDValue V = OpIdx.value();
11081 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11082 // Add dummy merge value before the mask. Or if there isn't a mask, before
11083 // EVL.
11084 if (HasMergeOp) {
11085 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11086 if (MaskIdx) {
11087 if (*MaskIdx == OpIdx.index())
11088 Ops.push_back(DAG.getUNDEF(ContainerVT));
11089 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11090 OpIdx.index()) {
11091 if (Op.getOpcode() == ISD::VP_MERGE) {
11092 // For VP_MERGE, copy the false operand instead of an undef value.
11093 Ops.push_back(Ops.back());
11094 } else {
11095 assert(Op.getOpcode() == ISD::VP_SELECT);
11096 // For VP_SELECT, add an undef value.
11097 Ops.push_back(DAG.getUNDEF(ContainerVT));
11098 }
11099 }
11100 }
11101 // Pass through operands which aren't fixed-length vectors.
11102 if (!V.getValueType().isFixedLengthVector()) {
11103 Ops.push_back(V);
11104 continue;
11105 }
11106 // "cast" fixed length vector to a scalable vector.
11107 MVT OpVT = V.getSimpleValueType();
11108 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11109 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11110 "Only fixed length vectors are supported!");
11111 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11112 }
11113
11114 if (!VT.isFixedLengthVector())
11115 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11116
11117 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11118
11119 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11120}
11121
11122SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11123 SelectionDAG &DAG) const {
11124 SDLoc DL(Op);
11125 MVT VT = Op.getSimpleValueType();
11126
11127 SDValue Src = Op.getOperand(0);
11128 // NOTE: Mask is dropped.
11129 SDValue VL = Op.getOperand(2);
11130
11131 MVT ContainerVT = VT;
11132 if (VT.isFixedLengthVector()) {
11133 ContainerVT = getContainerForFixedLengthVector(VT);
11134 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11135 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11136 }
11137
11138 MVT XLenVT = Subtarget.getXLenVT();
11139 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11140 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11141 DAG.getUNDEF(ContainerVT), Zero, VL);
11142
11143 SDValue SplatValue = DAG.getConstant(
11144 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11145 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11146 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11147
11148 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11149 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11150 if (!VT.isFixedLengthVector())
11151 return Result;
11152 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11153}
11154
11155SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11156 SelectionDAG &DAG) const {
11157 SDLoc DL(Op);
11158 MVT VT = Op.getSimpleValueType();
11159
11160 SDValue Op1 = Op.getOperand(0);
11161 SDValue Op2 = Op.getOperand(1);
11162 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11163 // NOTE: Mask is dropped.
11164 SDValue VL = Op.getOperand(4);
11165
11166 MVT ContainerVT = VT;
11167 if (VT.isFixedLengthVector()) {
11168 ContainerVT = getContainerForFixedLengthVector(VT);
11169 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11170 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11171 }
11172
11174 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11175
11176 switch (Condition) {
11177 default:
11178 break;
11179 // X != Y --> (X^Y)
11180 case ISD::SETNE:
11181 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11182 break;
11183 // X == Y --> ~(X^Y)
11184 case ISD::SETEQ: {
11185 SDValue Temp =
11186 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11187 Result =
11188 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11189 break;
11190 }
11191 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11192 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11193 case ISD::SETGT:
11194 case ISD::SETULT: {
11195 SDValue Temp =
11196 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11197 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11198 break;
11199 }
11200 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11201 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11202 case ISD::SETLT:
11203 case ISD::SETUGT: {
11204 SDValue Temp =
11205 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11206 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11207 break;
11208 }
11209 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11210 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11211 case ISD::SETGE:
11212 case ISD::SETULE: {
11213 SDValue Temp =
11214 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11215 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11216 break;
11217 }
11218 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11219 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11220 case ISD::SETLE:
11221 case ISD::SETUGE: {
11222 SDValue Temp =
11223 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11224 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11225 break;
11226 }
11227 }
11228
11229 if (!VT.isFixedLengthVector())
11230 return Result;
11231 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11232}
11233
11234// Lower Floating-Point/Integer Type-Convert VP SDNodes
11235SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11236 SelectionDAG &DAG) const {
11237 SDLoc DL(Op);
11238
11239 SDValue Src = Op.getOperand(0);
11240 SDValue Mask = Op.getOperand(1);
11241 SDValue VL = Op.getOperand(2);
11242 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11243
11244 MVT DstVT = Op.getSimpleValueType();
11245 MVT SrcVT = Src.getSimpleValueType();
11246 if (DstVT.isFixedLengthVector()) {
11247 DstVT = getContainerForFixedLengthVector(DstVT);
11248 SrcVT = getContainerForFixedLengthVector(SrcVT);
11249 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11250 MVT MaskVT = getMaskTypeFor(DstVT);
11251 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11252 }
11253
11254 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11255 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11256
11258 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11259 if (SrcVT.isInteger()) {
11260 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11261
11262 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11265
11266 // Do we need to do any pre-widening before converting?
11267 if (SrcEltSize == 1) {
11268 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11269 MVT XLenVT = Subtarget.getXLenVT();
11270 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11271 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11272 DAG.getUNDEF(IntVT), Zero, VL);
11273 SDValue One = DAG.getConstant(
11274 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11275 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11276 DAG.getUNDEF(IntVT), One, VL);
11277 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11278 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11279 } else if (DstEltSize > (2 * SrcEltSize)) {
11280 // Widen before converting.
11281 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11282 DstVT.getVectorElementCount());
11283 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11284 }
11285
11286 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11287 } else {
11288 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11289 "Wrong input/output vector types");
11290
11291 // Convert f16 to f32 then convert f32 to i64.
11292 if (DstEltSize > (2 * SrcEltSize)) {
11293 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11294 MVT InterimFVT =
11295 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11296 Src =
11297 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11298 }
11299
11300 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11301 }
11302 } else { // Narrowing + Conversion
11303 if (SrcVT.isInteger()) {
11304 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11305 // First do a narrowing convert to an FP type half the size, then round
11306 // the FP type to a small FP type if needed.
11307
11308 MVT InterimFVT = DstVT;
11309 if (SrcEltSize > (2 * DstEltSize)) {
11310 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11311 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11312 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11313 }
11314
11315 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11316
11317 if (InterimFVT != DstVT) {
11318 Src = Result;
11319 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11320 }
11321 } else {
11322 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11323 "Wrong input/output vector types");
11324 // First do a narrowing conversion to an integer half the size, then
11325 // truncate if needed.
11326
11327 if (DstEltSize == 1) {
11328 // First convert to the same size integer, then convert to mask using
11329 // setcc.
11330 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11331 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11332 DstVT.getVectorElementCount());
11333 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11334
11335 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11336 // otherwise the conversion was undefined.
11337 MVT XLenVT = Subtarget.getXLenVT();
11338 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11339 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11340 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11341 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11342 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11343 DAG.getUNDEF(DstVT), Mask, VL});
11344 } else {
11345 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11346 DstVT.getVectorElementCount());
11347
11348 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11349
11350 while (InterimIVT != DstVT) {
11351 SrcEltSize /= 2;
11352 Src = Result;
11353 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11354 DstVT.getVectorElementCount());
11355 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11356 Src, Mask, VL);
11357 }
11358 }
11359 }
11360 }
11361
11362 MVT VT = Op.getSimpleValueType();
11363 if (!VT.isFixedLengthVector())
11364 return Result;
11365 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11366}
11367
11368SDValue
11369RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11370 SelectionDAG &DAG) const {
11371 SDLoc DL(Op);
11372
11373 SDValue Op1 = Op.getOperand(0);
11374 SDValue Op2 = Op.getOperand(1);
11375 SDValue Offset = Op.getOperand(2);
11376 SDValue Mask = Op.getOperand(3);
11377 SDValue EVL1 = Op.getOperand(4);
11378 SDValue EVL2 = Op.getOperand(5);
11379
11380 const MVT XLenVT = Subtarget.getXLenVT();
11381 MVT VT = Op.getSimpleValueType();
11382 MVT ContainerVT = VT;
11383 if (VT.isFixedLengthVector()) {
11384 ContainerVT = getContainerForFixedLengthVector(VT);
11385 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11386 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11387 MVT MaskVT = getMaskTypeFor(ContainerVT);
11388 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11389 }
11390
11391 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11392 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11393
11394 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11395 if (IsMaskVector) {
11396 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11397
11398 // Expand input operands
11399 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11400 DAG.getUNDEF(ContainerVT),
11401 DAG.getConstant(1, DL, XLenVT), EVL1);
11402 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11403 DAG.getUNDEF(ContainerVT),
11404 DAG.getConstant(0, DL, XLenVT), EVL1);
11405 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11406 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11407
11408 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11409 DAG.getUNDEF(ContainerVT),
11410 DAG.getConstant(1, DL, XLenVT), EVL2);
11411 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11412 DAG.getUNDEF(ContainerVT),
11413 DAG.getConstant(0, DL, XLenVT), EVL2);
11414 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11415 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11416 }
11417
11418 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11419 SDValue DownOffset, UpOffset;
11420 if (ImmValue >= 0) {
11421 // The operand is a TargetConstant, we need to rebuild it as a regular
11422 // constant.
11423 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11424 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11425 } else {
11426 // The operand is a TargetConstant, we need to rebuild it as a regular
11427 // constant rather than negating the original operand.
11428 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11429 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11430 }
11431
11432 SDValue SlideDown =
11433 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11434 Op1, DownOffset, Mask, UpOffset);
11435 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11436 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11437
11438 if (IsMaskVector) {
11439 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11440 Result = DAG.getNode(
11441 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11442 {Result, DAG.getConstant(0, DL, ContainerVT),
11443 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11444 Mask, EVL2});
11445 }
11446
11447 if (!VT.isFixedLengthVector())
11448 return Result;
11449 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11450}
11451
11452SDValue
11453RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11454 SelectionDAG &DAG) const {
11455 SDLoc DL(Op);
11456 MVT VT = Op.getSimpleValueType();
11457 MVT XLenVT = Subtarget.getXLenVT();
11458
11459 SDValue Op1 = Op.getOperand(0);
11460 SDValue Mask = Op.getOperand(1);
11461 SDValue EVL = Op.getOperand(2);
11462
11463 MVT ContainerVT = VT;
11464 if (VT.isFixedLengthVector()) {
11465 ContainerVT = getContainerForFixedLengthVector(VT);
11466 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11467 MVT MaskVT = getMaskTypeFor(ContainerVT);
11468 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11469 }
11470
11471 MVT GatherVT = ContainerVT;
11472 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11473 // Check if we are working with mask vectors
11474 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11475 if (IsMaskVector) {
11476 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11477
11478 // Expand input operand
11479 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11480 DAG.getUNDEF(IndicesVT),
11481 DAG.getConstant(1, DL, XLenVT), EVL);
11482 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11483 DAG.getUNDEF(IndicesVT),
11484 DAG.getConstant(0, DL, XLenVT), EVL);
11485 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11486 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11487 }
11488
11489 unsigned EltSize = GatherVT.getScalarSizeInBits();
11490 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11491 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11492 unsigned MaxVLMAX =
11493 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11494
11495 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11496 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11497 // to use vrgatherei16.vv.
11498 // TODO: It's also possible to use vrgatherei16.vv for other types to
11499 // decrease register width for the index calculation.
11500 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11501 if (MaxVLMAX > 256 && EltSize == 8) {
11502 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11503 // Split the vector in half and reverse each half using a full register
11504 // reverse.
11505 // Swap the halves and concatenate them.
11506 // Slide the concatenated result by (VLMax - VL).
11507 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11508 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11509 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11510
11511 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11512 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11513
11514 // Reassemble the low and high pieces reversed.
11515 // NOTE: this Result is unmasked (because we do not need masks for
11516 // shuffles). If in the future this has to change, we can use a SELECT_VL
11517 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11518 SDValue Result =
11519 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11520
11521 // Slide off any elements from past EVL that were reversed into the low
11522 // elements.
11523 unsigned MinElts = GatherVT.getVectorMinNumElements();
11524 SDValue VLMax =
11525 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11526 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11527
11528 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11529 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11530
11531 if (IsMaskVector) {
11532 // Truncate Result back to a mask vector
11533 Result =
11534 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11535 {Result, DAG.getConstant(0, DL, GatherVT),
11537 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11538 }
11539
11540 if (!VT.isFixedLengthVector())
11541 return Result;
11542 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11543 }
11544
11545 // Just promote the int type to i16 which will double the LMUL.
11546 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11547 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11548 }
11549
11550 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11551 SDValue VecLen =
11552 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11553 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11554 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11555 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11556 DAG.getUNDEF(IndicesVT), Mask, EVL);
11557 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11558 DAG.getUNDEF(GatherVT), Mask, EVL);
11559
11560 if (IsMaskVector) {
11561 // Truncate Result back to a mask vector
11562 Result = DAG.getNode(
11563 RISCVISD::SETCC_VL, DL, ContainerVT,
11564 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11565 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11566 }
11567
11568 if (!VT.isFixedLengthVector())
11569 return Result;
11570 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11571}
11572
11573SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11574 SelectionDAG &DAG) const {
11575 MVT VT = Op.getSimpleValueType();
11576 if (VT.getVectorElementType() != MVT::i1)
11577 return lowerVPOp(Op, DAG);
11578
11579 // It is safe to drop mask parameter as masked-off elements are undef.
11580 SDValue Op1 = Op->getOperand(0);
11581 SDValue Op2 = Op->getOperand(1);
11582 SDValue VL = Op->getOperand(3);
11583
11584 MVT ContainerVT = VT;
11585 const bool IsFixed = VT.isFixedLengthVector();
11586 if (IsFixed) {
11587 ContainerVT = getContainerForFixedLengthVector(VT);
11588 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11589 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11590 }
11591
11592 SDLoc DL(Op);
11593 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11594 if (!IsFixed)
11595 return Val;
11596 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11597}
11598
11599SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11600 SelectionDAG &DAG) const {
11601 SDLoc DL(Op);
11602 MVT XLenVT = Subtarget.getXLenVT();
11603 MVT VT = Op.getSimpleValueType();
11604 MVT ContainerVT = VT;
11605 if (VT.isFixedLengthVector())
11606 ContainerVT = getContainerForFixedLengthVector(VT);
11607
11608 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11609
11610 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11611 // Check if the mask is known to be all ones
11612 SDValue Mask = VPNode->getMask();
11613 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11614
11615 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11616 : Intrinsic::riscv_vlse_mask,
11617 DL, XLenVT);
11618 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11619 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11620 VPNode->getStride()};
11621 if (!IsUnmasked) {
11622 if (VT.isFixedLengthVector()) {
11623 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11624 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11625 }
11626 Ops.push_back(Mask);
11627 }
11628 Ops.push_back(VPNode->getVectorLength());
11629 if (!IsUnmasked) {
11630 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11631 Ops.push_back(Policy);
11632 }
11633
11634 SDValue Result =
11636 VPNode->getMemoryVT(), VPNode->getMemOperand());
11637 SDValue Chain = Result.getValue(1);
11638
11639 if (VT.isFixedLengthVector())
11640 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11641
11642 return DAG.getMergeValues({Result, Chain}, DL);
11643}
11644
11645SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11646 SelectionDAG &DAG) const {
11647 SDLoc DL(Op);
11648 MVT XLenVT = Subtarget.getXLenVT();
11649
11650 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11651 SDValue StoreVal = VPNode->getValue();
11652 MVT VT = StoreVal.getSimpleValueType();
11653 MVT ContainerVT = VT;
11654 if (VT.isFixedLengthVector()) {
11655 ContainerVT = getContainerForFixedLengthVector(VT);
11656 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11657 }
11658
11659 // Check if the mask is known to be all ones
11660 SDValue Mask = VPNode->getMask();
11661 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11662
11663 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11664 : Intrinsic::riscv_vsse_mask,
11665 DL, XLenVT);
11666 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11667 VPNode->getBasePtr(), VPNode->getStride()};
11668 if (!IsUnmasked) {
11669 if (VT.isFixedLengthVector()) {
11670 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 }
11673 Ops.push_back(Mask);
11674 }
11675 Ops.push_back(VPNode->getVectorLength());
11676
11677 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11678 Ops, VPNode->getMemoryVT(),
11679 VPNode->getMemOperand());
11680}
11681
11682// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11683// matched to a RVV indexed load. The RVV indexed load instructions only
11684// support the "unsigned unscaled" addressing mode; indices are implicitly
11685// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11686// signed or scaled indexing is extended to the XLEN value type and scaled
11687// accordingly.
11688SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11689 SelectionDAG &DAG) const {
11690 SDLoc DL(Op);
11691 MVT VT = Op.getSimpleValueType();
11692
11693 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11694 EVT MemVT = MemSD->getMemoryVT();
11695 MachineMemOperand *MMO = MemSD->getMemOperand();
11696 SDValue Chain = MemSD->getChain();
11697 SDValue BasePtr = MemSD->getBasePtr();
11698
11699 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11700 SDValue Index, Mask, PassThru, VL;
11701
11702 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11703 Index = VPGN->getIndex();
11704 Mask = VPGN->getMask();
11705 PassThru = DAG.getUNDEF(VT);
11706 VL = VPGN->getVectorLength();
11707 // VP doesn't support extending loads.
11709 } else {
11710 // Else it must be a MGATHER.
11711 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11712 Index = MGN->getIndex();
11713 Mask = MGN->getMask();
11714 PassThru = MGN->getPassThru();
11715 LoadExtType = MGN->getExtensionType();
11716 }
11717
11718 MVT IndexVT = Index.getSimpleValueType();
11719 MVT XLenVT = Subtarget.getXLenVT();
11720
11722 "Unexpected VTs!");
11723 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11724 // Targets have to explicitly opt-in for extending vector loads.
11725 assert(LoadExtType == ISD::NON_EXTLOAD &&
11726 "Unexpected extending MGATHER/VP_GATHER");
11727
11728 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11729 // the selection of the masked intrinsics doesn't do this for us.
11730 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11731
11732 MVT ContainerVT = VT;
11733 if (VT.isFixedLengthVector()) {
11734 ContainerVT = getContainerForFixedLengthVector(VT);
11735 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11736 ContainerVT.getVectorElementCount());
11737
11738 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11739
11740 if (!IsUnmasked) {
11741 MVT MaskVT = getMaskTypeFor(ContainerVT);
11742 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11743 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11744 }
11745 }
11746
11747 if (!VL)
11748 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11749
11750 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11751 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11752 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11753 }
11754
11755 unsigned IntID =
11756 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11757 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11758 if (IsUnmasked)
11759 Ops.push_back(DAG.getUNDEF(ContainerVT));
11760 else
11761 Ops.push_back(PassThru);
11762 Ops.push_back(BasePtr);
11763 Ops.push_back(Index);
11764 if (!IsUnmasked)
11765 Ops.push_back(Mask);
11766 Ops.push_back(VL);
11767 if (!IsUnmasked)
11769
11770 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11771 SDValue Result =
11772 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11773 Chain = Result.getValue(1);
11774
11775 if (VT.isFixedLengthVector())
11776 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11777
11778 return DAG.getMergeValues({Result, Chain}, DL);
11779}
11780
11781// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11782// matched to a RVV indexed store. The RVV indexed store instructions only
11783// support the "unsigned unscaled" addressing mode; indices are implicitly
11784// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11785// signed or scaled indexing is extended to the XLEN value type and scaled
11786// accordingly.
11787SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11788 SelectionDAG &DAG) const {
11789 SDLoc DL(Op);
11790 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11791 EVT MemVT = MemSD->getMemoryVT();
11792 MachineMemOperand *MMO = MemSD->getMemOperand();
11793 SDValue Chain = MemSD->getChain();
11794 SDValue BasePtr = MemSD->getBasePtr();
11795
11796 [[maybe_unused]] bool IsTruncatingStore = false;
11797 SDValue Index, Mask, Val, VL;
11798
11799 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11800 Index = VPSN->getIndex();
11801 Mask = VPSN->getMask();
11802 Val = VPSN->getValue();
11803 VL = VPSN->getVectorLength();
11804 // VP doesn't support truncating stores.
11805 IsTruncatingStore = false;
11806 } else {
11807 // Else it must be a MSCATTER.
11808 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11809 Index = MSN->getIndex();
11810 Mask = MSN->getMask();
11811 Val = MSN->getValue();
11812 IsTruncatingStore = MSN->isTruncatingStore();
11813 }
11814
11815 MVT VT = Val.getSimpleValueType();
11816 MVT IndexVT = Index.getSimpleValueType();
11817 MVT XLenVT = Subtarget.getXLenVT();
11818
11820 "Unexpected VTs!");
11821 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11822 // Targets have to explicitly opt-in for extending vector loads and
11823 // truncating vector stores.
11824 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11825
11826 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11827 // the selection of the masked intrinsics doesn't do this for us.
11828 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11829
11830 MVT ContainerVT = VT;
11831 if (VT.isFixedLengthVector()) {
11832 ContainerVT = getContainerForFixedLengthVector(VT);
11833 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11834 ContainerVT.getVectorElementCount());
11835
11836 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11837 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11838
11839 if (!IsUnmasked) {
11840 MVT MaskVT = getMaskTypeFor(ContainerVT);
11841 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11842 }
11843 }
11844
11845 if (!VL)
11846 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11847
11848 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11849 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11850 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11851 }
11852
11853 unsigned IntID =
11854 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11855 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11856 Ops.push_back(Val);
11857 Ops.push_back(BasePtr);
11858 Ops.push_back(Index);
11859 if (!IsUnmasked)
11860 Ops.push_back(Mask);
11861 Ops.push_back(VL);
11862
11864 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11865}
11866
11867SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11868 SelectionDAG &DAG) const {
11869 const MVT XLenVT = Subtarget.getXLenVT();
11870 SDLoc DL(Op);
11871 SDValue Chain = Op->getOperand(0);
11872 SDValue SysRegNo = DAG.getTargetConstant(
11873 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11874 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11875 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11876
11877 // Encoding used for rounding mode in RISC-V differs from that used in
11878 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11879 // table, which consists of a sequence of 4-bit fields, each representing
11880 // corresponding FLT_ROUNDS mode.
11881 static const int Table =
11887
11888 SDValue Shift =
11889 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11890 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11891 DAG.getConstant(Table, DL, XLenVT), Shift);
11892 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11893 DAG.getConstant(7, DL, XLenVT));
11894
11895 return DAG.getMergeValues({Masked, Chain}, DL);
11896}
11897
11898SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11899 SelectionDAG &DAG) const {
11900 const MVT XLenVT = Subtarget.getXLenVT();
11901 SDLoc DL(Op);
11902 SDValue Chain = Op->getOperand(0);
11903 SDValue RMValue = Op->getOperand(1);
11904 SDValue SysRegNo = DAG.getTargetConstant(
11905 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11906
11907 // Encoding used for rounding mode in RISC-V differs from that used in
11908 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11909 // a table, which consists of a sequence of 4-bit fields, each representing
11910 // corresponding RISC-V mode.
11911 static const unsigned Table =
11917
11918 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11919
11920 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11921 DAG.getConstant(2, DL, XLenVT));
11922 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11923 DAG.getConstant(Table, DL, XLenVT), Shift);
11924 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11925 DAG.getConstant(0x7, DL, XLenVT));
11926 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11927 RMValue);
11928}
11929
11930SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11931 SelectionDAG &DAG) const {
11933
11934 bool isRISCV64 = Subtarget.is64Bit();
11935 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11936
11937 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11938 return DAG.getFrameIndex(FI, PtrVT);
11939}
11940
11941// Returns the opcode of the target-specific SDNode that implements the 32-bit
11942// form of the given Opcode.
11943static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11944 switch (Opcode) {
11945 default:
11946 llvm_unreachable("Unexpected opcode");
11947 case ISD::SHL:
11948 return RISCVISD::SLLW;
11949 case ISD::SRA:
11950 return RISCVISD::SRAW;
11951 case ISD::SRL:
11952 return RISCVISD::SRLW;
11953 case ISD::SDIV:
11954 return RISCVISD::DIVW;
11955 case ISD::UDIV:
11956 return RISCVISD::DIVUW;
11957 case ISD::UREM:
11958 return RISCVISD::REMUW;
11959 case ISD::ROTL:
11960 return RISCVISD::ROLW;
11961 case ISD::ROTR:
11962 return RISCVISD::RORW;
11963 }
11964}
11965
11966// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11967// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11968// otherwise be promoted to i64, making it difficult to select the
11969// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11970// type i8/i16/i32 is lost.
11972 unsigned ExtOpc = ISD::ANY_EXTEND) {
11973 SDLoc DL(N);
11974 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11975 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11976 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11977 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11978 // ReplaceNodeResults requires we maintain the same type for the return value.
11979 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11980}
11981
11982// Converts the given 32-bit operation to a i64 operation with signed extension
11983// semantic to reduce the signed extension instructions.
11985 SDLoc DL(N);
11986 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11987 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11988 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11989 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11990 DAG.getValueType(MVT::i32));
11991 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11992}
11993
11996 SelectionDAG &DAG) const {
11997 SDLoc DL(N);
11998 switch (N->getOpcode()) {
11999 default:
12000 llvm_unreachable("Don't know how to custom type legalize this operation!");
12003 case ISD::FP_TO_SINT:
12004 case ISD::FP_TO_UINT: {
12005 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12006 "Unexpected custom legalisation");
12007 bool IsStrict = N->isStrictFPOpcode();
12008 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12009 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12010 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12011 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12013 if (!isTypeLegal(Op0.getValueType()))
12014 return;
12015 if (IsStrict) {
12016 SDValue Chain = N->getOperand(0);
12017 // In absense of Zfh, promote f16 to f32, then convert.
12018 if (Op0.getValueType() == MVT::f16 &&
12019 !Subtarget.hasStdExtZfhOrZhinx()) {
12020 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12021 {Chain, Op0});
12022 Chain = Op0.getValue(1);
12023 }
12024 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12026 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12027 SDValue Res = DAG.getNode(
12028 Opc, DL, VTs, Chain, Op0,
12029 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12030 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12031 Results.push_back(Res.getValue(1));
12032 return;
12033 }
12034 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12035 // convert.
12036 if ((Op0.getValueType() == MVT::f16 &&
12037 !Subtarget.hasStdExtZfhOrZhinx()) ||
12038 Op0.getValueType() == MVT::bf16)
12039 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12040
12041 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12042 SDValue Res =
12043 DAG.getNode(Opc, DL, MVT::i64, Op0,
12044 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12045 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12046 return;
12047 }
12048 // If the FP type needs to be softened, emit a library call using the 'si'
12049 // version. If we left it to default legalization we'd end up with 'di'. If
12050 // the FP type doesn't need to be softened just let generic type
12051 // legalization promote the result type.
12052 RTLIB::Libcall LC;
12053 if (IsSigned)
12054 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12055 else
12056 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12057 MakeLibCallOptions CallOptions;
12058 EVT OpVT = Op0.getValueType();
12059 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12060 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12061 SDValue Result;
12062 std::tie(Result, Chain) =
12063 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12064 Results.push_back(Result);
12065 if (IsStrict)
12066 Results.push_back(Chain);
12067 break;
12068 }
12069 case ISD::LROUND: {
12070 SDValue Op0 = N->getOperand(0);
12071 EVT Op0VT = Op0.getValueType();
12072 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12074 if (!isTypeLegal(Op0VT))
12075 return;
12076
12077 // In absense of Zfh, promote f16 to f32, then convert.
12078 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12079 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12080
12081 SDValue Res =
12082 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12083 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12084 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12085 return;
12086 }
12087 // If the FP type needs to be softened, emit a library call to lround. We'll
12088 // need to truncate the result. We assume any value that doesn't fit in i32
12089 // is allowed to return an unspecified value.
12090 RTLIB::Libcall LC =
12091 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12092 MakeLibCallOptions CallOptions;
12093 EVT OpVT = Op0.getValueType();
12094 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12095 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12096 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12097 Results.push_back(Result);
12098 break;
12099 }
12102 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12103 "has custom type legalization on riscv32");
12104
12105 SDValue LoCounter, HiCounter;
12106 MVT XLenVT = Subtarget.getXLenVT();
12107 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12108 LoCounter = DAG.getTargetConstant(
12109 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12110 HiCounter = DAG.getTargetConstant(
12111 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12112 } else {
12113 LoCounter = DAG.getTargetConstant(
12114 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12115 HiCounter = DAG.getTargetConstant(
12116 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12117 }
12118 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12120 N->getOperand(0), LoCounter, HiCounter);
12121
12122 Results.push_back(
12123 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12124 Results.push_back(RCW.getValue(2));
12125 break;
12126 }
12127 case ISD::LOAD: {
12128 if (!ISD::isNON_EXTLoad(N))
12129 return;
12130
12131 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12132 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12133 LoadSDNode *Ld = cast<LoadSDNode>(N);
12134
12135 SDLoc dl(N);
12136 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12137 Ld->getBasePtr(), Ld->getMemoryVT(),
12138 Ld->getMemOperand());
12139 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12140 Results.push_back(Res.getValue(1));
12141 return;
12142 }
12143 case ISD::MUL: {
12144 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12145 unsigned XLen = Subtarget.getXLen();
12146 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12147 if (Size > XLen) {
12148 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12149 SDValue LHS = N->getOperand(0);
12150 SDValue RHS = N->getOperand(1);
12151 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12152
12153 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12154 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12155 // We need exactly one side to be unsigned.
12156 if (LHSIsU == RHSIsU)
12157 return;
12158
12159 auto MakeMULPair = [&](SDValue S, SDValue U) {
12160 MVT XLenVT = Subtarget.getXLenVT();
12161 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12162 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12163 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12164 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12165 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12166 };
12167
12168 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12169 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12170
12171 // The other operand should be signed, but still prefer MULH when
12172 // possible.
12173 if (RHSIsU && LHSIsS && !RHSIsS)
12174 Results.push_back(MakeMULPair(LHS, RHS));
12175 else if (LHSIsU && RHSIsS && !LHSIsS)
12176 Results.push_back(MakeMULPair(RHS, LHS));
12177
12178 return;
12179 }
12180 [[fallthrough]];
12181 }
12182 case ISD::ADD:
12183 case ISD::SUB:
12184 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12185 "Unexpected custom legalisation");
12186 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12187 break;
12188 case ISD::SHL:
12189 case ISD::SRA:
12190 case ISD::SRL:
12191 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12192 "Unexpected custom legalisation");
12193 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12194 // If we can use a BSET instruction, allow default promotion to apply.
12195 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12196 isOneConstant(N->getOperand(0)))
12197 break;
12198 Results.push_back(customLegalizeToWOp(N, DAG));
12199 break;
12200 }
12201
12202 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12203 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12204 // shift amount.
12205 if (N->getOpcode() == ISD::SHL) {
12206 SDLoc DL(N);
12207 SDValue NewOp0 =
12208 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12209 SDValue NewOp1 =
12210 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12211 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12212 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12213 DAG.getValueType(MVT::i32));
12214 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12215 }
12216
12217 break;
12218 case ISD::ROTL:
12219 case ISD::ROTR:
12220 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12221 "Unexpected custom legalisation");
12222 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12223 Subtarget.hasVendorXTHeadBb()) &&
12224 "Unexpected custom legalization");
12225 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12226 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12227 return;
12228 Results.push_back(customLegalizeToWOp(N, DAG));
12229 break;
12230 case ISD::CTTZ:
12232 case ISD::CTLZ:
12233 case ISD::CTLZ_ZERO_UNDEF: {
12234 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12235 "Unexpected custom legalisation");
12236
12237 SDValue NewOp0 =
12238 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12239 bool IsCTZ =
12240 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12241 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12242 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12243 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12244 return;
12245 }
12246 case ISD::SDIV:
12247 case ISD::UDIV:
12248 case ISD::UREM: {
12249 MVT VT = N->getSimpleValueType(0);
12250 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12251 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12252 "Unexpected custom legalisation");
12253 // Don't promote division/remainder by constant since we should expand those
12254 // to multiply by magic constant.
12256 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12257 !isIntDivCheap(N->getValueType(0), Attr))
12258 return;
12259
12260 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12261 // the upper 32 bits. For other types we need to sign or zero extend
12262 // based on the opcode.
12263 unsigned ExtOpc = ISD::ANY_EXTEND;
12264 if (VT != MVT::i32)
12265 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12267
12268 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12269 break;
12270 }
12271 case ISD::SADDO: {
12272 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12273 "Unexpected custom legalisation");
12274
12275 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12276 // use the default legalization.
12277 if (!isa<ConstantSDNode>(N->getOperand(1)))
12278 return;
12279
12280 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12281 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12282 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12283 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12284 DAG.getValueType(MVT::i32));
12285
12286 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12287
12288 // For an addition, the result should be less than one of the operands (LHS)
12289 // if and only if the other operand (RHS) is negative, otherwise there will
12290 // be overflow.
12291 // For a subtraction, the result should be less than one of the operands
12292 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12293 // otherwise there will be overflow.
12294 EVT OType = N->getValueType(1);
12295 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12296 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12297
12298 SDValue Overflow =
12299 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12300 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12301 Results.push_back(Overflow);
12302 return;
12303 }
12304 case ISD::UADDO:
12305 case ISD::USUBO: {
12306 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12307 "Unexpected custom legalisation");
12308 bool IsAdd = N->getOpcode() == ISD::UADDO;
12309 // Create an ADDW or SUBW.
12310 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12311 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12312 SDValue Res =
12313 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12314 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12315 DAG.getValueType(MVT::i32));
12316
12317 SDValue Overflow;
12318 if (IsAdd && isOneConstant(RHS)) {
12319 // Special case uaddo X, 1 overflowed if the addition result is 0.
12320 // The general case (X + C) < C is not necessarily beneficial. Although we
12321 // reduce the live range of X, we may introduce the materialization of
12322 // constant C, especially when the setcc result is used by branch. We have
12323 // no compare with constant and branch instructions.
12324 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12325 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12326 } else if (IsAdd && isAllOnesConstant(RHS)) {
12327 // Special case uaddo X, -1 overflowed if X != 0.
12328 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12329 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12330 } else {
12331 // Sign extend the LHS and perform an unsigned compare with the ADDW
12332 // result. Since the inputs are sign extended from i32, this is equivalent
12333 // to comparing the lower 32 bits.
12334 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12335 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12336 IsAdd ? ISD::SETULT : ISD::SETUGT);
12337 }
12338
12339 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12340 Results.push_back(Overflow);
12341 return;
12342 }
12343 case ISD::UADDSAT:
12344 case ISD::USUBSAT: {
12345 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12346 "Unexpected custom legalisation");
12347 if (Subtarget.hasStdExtZbb()) {
12348 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12349 // sign extend allows overflow of the lower 32 bits to be detected on
12350 // the promoted size.
12351 SDValue LHS =
12352 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12353 SDValue RHS =
12354 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12355 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12356 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12357 return;
12358 }
12359
12360 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12361 // promotion for UADDO/USUBO.
12362 Results.push_back(expandAddSubSat(N, DAG));
12363 return;
12364 }
12365 case ISD::SADDSAT:
12366 case ISD::SSUBSAT: {
12367 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12368 "Unexpected custom legalisation");
12369 Results.push_back(expandAddSubSat(N, DAG));
12370 return;
12371 }
12372 case ISD::ABS: {
12373 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12374 "Unexpected custom legalisation");
12375
12376 if (Subtarget.hasStdExtZbb()) {
12377 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12378 // This allows us to remember that the result is sign extended. Expanding
12379 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12380 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12381 N->getOperand(0));
12382 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12383 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12384 return;
12385 }
12386
12387 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12388 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12389
12390 // Freeze the source so we can increase it's use count.
12391 Src = DAG.getFreeze(Src);
12392
12393 // Copy sign bit to all bits using the sraiw pattern.
12394 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12395 DAG.getValueType(MVT::i32));
12396 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12397 DAG.getConstant(31, DL, MVT::i64));
12398
12399 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12400 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12401
12402 // NOTE: The result is only required to be anyextended, but sext is
12403 // consistent with type legalization of sub.
12404 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12405 DAG.getValueType(MVT::i32));
12406 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12407 return;
12408 }
12409 case ISD::BITCAST: {
12410 EVT VT = N->getValueType(0);
12411 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12412 SDValue Op0 = N->getOperand(0);
12413 EVT Op0VT = Op0.getValueType();
12414 MVT XLenVT = Subtarget.getXLenVT();
12415 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12416 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12417 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12418 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12419 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12420 Subtarget.hasStdExtZfbfmin()) {
12421 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12422 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12423 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12424 Subtarget.hasStdExtFOrZfinx()) {
12425 SDValue FPConv =
12426 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12427 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12428 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12429 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12430 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12431 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12432 NewReg.getValue(0), NewReg.getValue(1));
12433 Results.push_back(RetReg);
12434 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12435 isTypeLegal(Op0VT)) {
12436 // Custom-legalize bitcasts from fixed-length vector types to illegal
12437 // scalar types in order to improve codegen. Bitcast the vector to a
12438 // one-element vector type whose element type is the same as the result
12439 // type, and extract the first element.
12440 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12441 if (isTypeLegal(BVT)) {
12442 SDValue BVec = DAG.getBitcast(BVT, Op0);
12443 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12444 DAG.getVectorIdxConstant(0, DL)));
12445 }
12446 }
12447 break;
12448 }
12449 case RISCVISD::BREV8: {
12450 MVT VT = N->getSimpleValueType(0);
12451 MVT XLenVT = Subtarget.getXLenVT();
12452 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12453 "Unexpected custom legalisation");
12454 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12455 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12456 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12457 // ReplaceNodeResults requires we maintain the same type for the return
12458 // value.
12459 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12460 break;
12461 }
12463 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12464 // type is illegal (currently only vXi64 RV32).
12465 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12466 // transferred to the destination register. We issue two of these from the
12467 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12468 // first element.
12469 SDValue Vec = N->getOperand(0);
12470 SDValue Idx = N->getOperand(1);
12471
12472 // The vector type hasn't been legalized yet so we can't issue target
12473 // specific nodes if it needs legalization.
12474 // FIXME: We would manually legalize if it's important.
12475 if (!isTypeLegal(Vec.getValueType()))
12476 return;
12477
12478 MVT VecVT = Vec.getSimpleValueType();
12479
12480 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12481 VecVT.getVectorElementType() == MVT::i64 &&
12482 "Unexpected EXTRACT_VECTOR_ELT legalization");
12483
12484 // If this is a fixed vector, we need to convert it to a scalable vector.
12485 MVT ContainerVT = VecVT;
12486 if (VecVT.isFixedLengthVector()) {
12487 ContainerVT = getContainerForFixedLengthVector(VecVT);
12488 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12489 }
12490
12491 MVT XLenVT = Subtarget.getXLenVT();
12492
12493 // Use a VL of 1 to avoid processing more elements than we need.
12494 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12495
12496 // Unless the index is known to be 0, we must slide the vector down to get
12497 // the desired element into index 0.
12498 if (!isNullConstant(Idx)) {
12499 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12500 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12501 }
12502
12503 // Extract the lower XLEN bits of the correct vector element.
12504 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12505
12506 // To extract the upper XLEN bits of the vector element, shift the first
12507 // element right by 32 bits and re-extract the lower XLEN bits.
12508 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12509 DAG.getUNDEF(ContainerVT),
12510 DAG.getConstant(32, DL, XLenVT), VL);
12511 SDValue LShr32 =
12512 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12513 DAG.getUNDEF(ContainerVT), Mask, VL);
12514
12515 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12516
12517 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12518 break;
12519 }
12521 unsigned IntNo = N->getConstantOperandVal(0);
12522 switch (IntNo) {
12523 default:
12525 "Don't know how to custom type legalize this intrinsic!");
12526 case Intrinsic::experimental_get_vector_length: {
12527 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12528 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12529 return;
12530 }
12531 case Intrinsic::experimental_cttz_elts: {
12532 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12533 Results.push_back(
12534 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12535 return;
12536 }
12537 case Intrinsic::riscv_orc_b:
12538 case Intrinsic::riscv_brev8:
12539 case Intrinsic::riscv_sha256sig0:
12540 case Intrinsic::riscv_sha256sig1:
12541 case Intrinsic::riscv_sha256sum0:
12542 case Intrinsic::riscv_sha256sum1:
12543 case Intrinsic::riscv_sm3p0:
12544 case Intrinsic::riscv_sm3p1: {
12545 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12546 return;
12547 unsigned Opc;
12548 switch (IntNo) {
12549 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12550 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12551 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12552 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12553 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12554 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12555 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12556 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12557 }
12558
12559 SDValue NewOp =
12560 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12561 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12562 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12563 return;
12564 }
12565 case Intrinsic::riscv_sm4ks:
12566 case Intrinsic::riscv_sm4ed: {
12567 unsigned Opc =
12568 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12569 SDValue NewOp0 =
12570 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12571 SDValue NewOp1 =
12572 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12573 SDValue Res =
12574 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12575 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12576 return;
12577 }
12578 case Intrinsic::riscv_mopr: {
12579 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12580 return;
12581 SDValue NewOp =
12582 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12583 SDValue Res = DAG.getNode(
12584 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12585 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12586 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12587 return;
12588 }
12589 case Intrinsic::riscv_moprr: {
12590 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12591 return;
12592 SDValue NewOp0 =
12593 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12594 SDValue NewOp1 =
12595 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12596 SDValue Res = DAG.getNode(
12597 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12598 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12599 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12600 return;
12601 }
12602 case Intrinsic::riscv_clmul: {
12603 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12604 return;
12605
12606 SDValue NewOp0 =
12607 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12608 SDValue NewOp1 =
12609 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12610 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12611 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12612 return;
12613 }
12614 case Intrinsic::riscv_clmulh:
12615 case Intrinsic::riscv_clmulr: {
12616 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12617 return;
12618
12619 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12620 // to the full 128-bit clmul result of multiplying two xlen values.
12621 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12622 // upper 32 bits.
12623 //
12624 // The alternative is to mask the inputs to 32 bits and use clmul, but
12625 // that requires two shifts to mask each input without zext.w.
12626 // FIXME: If the inputs are known zero extended or could be freely
12627 // zero extended, the mask form would be better.
12628 SDValue NewOp0 =
12629 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12630 SDValue NewOp1 =
12631 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12632 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12633 DAG.getConstant(32, DL, MVT::i64));
12634 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12635 DAG.getConstant(32, DL, MVT::i64));
12636 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12638 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12639 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12640 DAG.getConstant(32, DL, MVT::i64));
12641 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12642 return;
12643 }
12644 case Intrinsic::riscv_vmv_x_s: {
12645 EVT VT = N->getValueType(0);
12646 MVT XLenVT = Subtarget.getXLenVT();
12647 if (VT.bitsLT(XLenVT)) {
12648 // Simple case just extract using vmv.x.s and truncate.
12649 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12650 Subtarget.getXLenVT(), N->getOperand(1));
12651 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12652 return;
12653 }
12654
12655 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12656 "Unexpected custom legalization");
12657
12658 // We need to do the move in two steps.
12659 SDValue Vec = N->getOperand(1);
12660 MVT VecVT = Vec.getSimpleValueType();
12661
12662 // First extract the lower XLEN bits of the element.
12663 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12664
12665 // To extract the upper XLEN bits of the vector element, shift the first
12666 // element right by 32 bits and re-extract the lower XLEN bits.
12667 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12668
12669 SDValue ThirtyTwoV =
12670 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12671 DAG.getConstant(32, DL, XLenVT), VL);
12672 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12673 DAG.getUNDEF(VecVT), Mask, VL);
12674 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12675
12676 Results.push_back(
12677 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12678 break;
12679 }
12680 }
12681 break;
12682 }
12683 case ISD::VECREDUCE_ADD:
12684 case ISD::VECREDUCE_AND:
12685 case ISD::VECREDUCE_OR:
12686 case ISD::VECREDUCE_XOR:
12691 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12692 Results.push_back(V);
12693 break;
12694 case ISD::VP_REDUCE_ADD:
12695 case ISD::VP_REDUCE_AND:
12696 case ISD::VP_REDUCE_OR:
12697 case ISD::VP_REDUCE_XOR:
12698 case ISD::VP_REDUCE_SMAX:
12699 case ISD::VP_REDUCE_UMAX:
12700 case ISD::VP_REDUCE_SMIN:
12701 case ISD::VP_REDUCE_UMIN:
12702 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12703 Results.push_back(V);
12704 break;
12705 case ISD::GET_ROUNDING: {
12706 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12707 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12708 Results.push_back(Res.getValue(0));
12709 Results.push_back(Res.getValue(1));
12710 break;
12711 }
12712 }
12713}
12714
12715/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12716/// which corresponds to it.
12717static unsigned getVecReduceOpcode(unsigned Opc) {
12718 switch (Opc) {
12719 default:
12720 llvm_unreachable("Unhandled binary to transfrom reduction");
12721 case ISD::ADD:
12722 return ISD::VECREDUCE_ADD;
12723 case ISD::UMAX:
12724 return ISD::VECREDUCE_UMAX;
12725 case ISD::SMAX:
12726 return ISD::VECREDUCE_SMAX;
12727 case ISD::UMIN:
12728 return ISD::VECREDUCE_UMIN;
12729 case ISD::SMIN:
12730 return ISD::VECREDUCE_SMIN;
12731 case ISD::AND:
12732 return ISD::VECREDUCE_AND;
12733 case ISD::OR:
12734 return ISD::VECREDUCE_OR;
12735 case ISD::XOR:
12736 return ISD::VECREDUCE_XOR;
12737 case ISD::FADD:
12738 // Note: This is the associative form of the generic reduction opcode.
12739 return ISD::VECREDUCE_FADD;
12740 }
12741}
12742
12743/// Perform two related transforms whose purpose is to incrementally recognize
12744/// an explode_vector followed by scalar reduction as a vector reduction node.
12745/// This exists to recover from a deficiency in SLP which can't handle
12746/// forests with multiple roots sharing common nodes. In some cases, one
12747/// of the trees will be vectorized, and the other will remain (unprofitably)
12748/// scalarized.
12749static SDValue
12751 const RISCVSubtarget &Subtarget) {
12752
12753 // This transforms need to run before all integer types have been legalized
12754 // to i64 (so that the vector element type matches the add type), and while
12755 // it's safe to introduce odd sized vector types.
12757 return SDValue();
12758
12759 // Without V, this transform isn't useful. We could form the (illegal)
12760 // operations and let them be scalarized again, but there's really no point.
12761 if (!Subtarget.hasVInstructions())
12762 return SDValue();
12763
12764 const SDLoc DL(N);
12765 const EVT VT = N->getValueType(0);
12766 const unsigned Opc = N->getOpcode();
12767
12768 // For FADD, we only handle the case with reassociation allowed. We
12769 // could handle strict reduction order, but at the moment, there's no
12770 // known reason to, and the complexity isn't worth it.
12771 // TODO: Handle fminnum and fmaxnum here
12772 if (!VT.isInteger() &&
12773 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12774 return SDValue();
12775
12776 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12777 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12778 "Inconsistent mappings");
12779 SDValue LHS = N->getOperand(0);
12780 SDValue RHS = N->getOperand(1);
12781
12782 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12783 return SDValue();
12784
12785 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12786 std::swap(LHS, RHS);
12787
12788 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12789 !isa<ConstantSDNode>(RHS.getOperand(1)))
12790 return SDValue();
12791
12792 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12793 SDValue SrcVec = RHS.getOperand(0);
12794 EVT SrcVecVT = SrcVec.getValueType();
12795 assert(SrcVecVT.getVectorElementType() == VT);
12796 if (SrcVecVT.isScalableVector())
12797 return SDValue();
12798
12799 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12800 return SDValue();
12801
12802 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12803 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12804 // root of our reduction tree. TODO: We could extend this to any two
12805 // adjacent aligned constant indices if desired.
12806 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12807 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12808 uint64_t LHSIdx =
12809 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12810 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12811 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12812 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12813 DAG.getVectorIdxConstant(0, DL));
12814 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12815 }
12816 }
12817
12818 // Match (binop (reduce (extract_subvector V, 0),
12819 // (extract_vector_elt V, sizeof(SubVec))))
12820 // into a reduction of one more element from the original vector V.
12821 if (LHS.getOpcode() != ReduceOpc)
12822 return SDValue();
12823
12824 SDValue ReduceVec = LHS.getOperand(0);
12825 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12826 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12827 isNullConstant(ReduceVec.getOperand(1)) &&
12828 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12829 // For illegal types (e.g. 3xi32), most will be combined again into a
12830 // wider (hopefully legal) type. If this is a terminal state, we are
12831 // relying on type legalization here to produce something reasonable
12832 // and this lowering quality could probably be improved. (TODO)
12833 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12834 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12835 DAG.getVectorIdxConstant(0, DL));
12836 auto Flags = ReduceVec->getFlags();
12837 Flags.intersectWith(N->getFlags());
12838 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12839 }
12840
12841 return SDValue();
12842}
12843
12844
12845// Try to fold (<bop> x, (reduction.<bop> vec, start))
12847 const RISCVSubtarget &Subtarget) {
12848 auto BinOpToRVVReduce = [](unsigned Opc) {
12849 switch (Opc) {
12850 default:
12851 llvm_unreachable("Unhandled binary to transfrom reduction");
12852 case ISD::ADD:
12854 case ISD::UMAX:
12856 case ISD::SMAX:
12858 case ISD::UMIN:
12860 case ISD::SMIN:
12862 case ISD::AND:
12864 case ISD::OR:
12866 case ISD::XOR:
12868 case ISD::FADD:
12870 case ISD::FMAXNUM:
12872 case ISD::FMINNUM:
12874 }
12875 };
12876
12877 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12878 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12879 isNullConstant(V.getOperand(1)) &&
12880 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12881 };
12882
12883 unsigned Opc = N->getOpcode();
12884 unsigned ReduceIdx;
12885 if (IsReduction(N->getOperand(0), Opc))
12886 ReduceIdx = 0;
12887 else if (IsReduction(N->getOperand(1), Opc))
12888 ReduceIdx = 1;
12889 else
12890 return SDValue();
12891
12892 // Skip if FADD disallows reassociation but the combiner needs.
12893 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12894 return SDValue();
12895
12896 SDValue Extract = N->getOperand(ReduceIdx);
12897 SDValue Reduce = Extract.getOperand(0);
12898 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12899 return SDValue();
12900
12901 SDValue ScalarV = Reduce.getOperand(2);
12902 EVT ScalarVT = ScalarV.getValueType();
12903 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12904 ScalarV.getOperand(0)->isUndef() &&
12905 isNullConstant(ScalarV.getOperand(2)))
12906 ScalarV = ScalarV.getOperand(1);
12907
12908 // Make sure that ScalarV is a splat with VL=1.
12909 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12910 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12911 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12912 return SDValue();
12913
12914 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12915 return SDValue();
12916
12917 // Check the scalar of ScalarV is neutral element
12918 // TODO: Deal with value other than neutral element.
12919 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12920 0))
12921 return SDValue();
12922
12923 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12924 // FIXME: We might be able to improve this if operand 0 is undef.
12925 if (!isNonZeroAVL(Reduce.getOperand(5)))
12926 return SDValue();
12927
12928 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12929
12930 SDLoc DL(N);
12931 SDValue NewScalarV =
12932 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12933 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12934
12935 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12936 if (ScalarVT != ScalarV.getValueType())
12937 NewScalarV =
12938 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12939 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12940
12941 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12942 NewScalarV, Reduce.getOperand(3),
12943 Reduce.getOperand(4), Reduce.getOperand(5)};
12944 SDValue NewReduce =
12945 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12946 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12947 Extract.getOperand(1));
12948}
12949
12950// Optimize (add (shl x, c0), (shl y, c1)) ->
12951// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12953 const RISCVSubtarget &Subtarget) {
12954 // Perform this optimization only in the zba extension.
12955 if (!Subtarget.hasStdExtZba())
12956 return SDValue();
12957
12958 // Skip for vector types and larger types.
12959 EVT VT = N->getValueType(0);
12960 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12961 return SDValue();
12962
12963 // The two operand nodes must be SHL and have no other use.
12964 SDValue N0 = N->getOperand(0);
12965 SDValue N1 = N->getOperand(1);
12966 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12967 !N0->hasOneUse() || !N1->hasOneUse())
12968 return SDValue();
12969
12970 // Check c0 and c1.
12971 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12972 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12973 if (!N0C || !N1C)
12974 return SDValue();
12975 int64_t C0 = N0C->getSExtValue();
12976 int64_t C1 = N1C->getSExtValue();
12977 if (C0 <= 0 || C1 <= 0)
12978 return SDValue();
12979
12980 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12981 int64_t Bits = std::min(C0, C1);
12982 int64_t Diff = std::abs(C0 - C1);
12983 if (Diff != 1 && Diff != 2 && Diff != 3)
12984 return SDValue();
12985
12986 // Build nodes.
12987 SDLoc DL(N);
12988 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12989 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12990 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
12991 DAG.getConstant(Diff, DL, VT), NS);
12992 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
12993}
12994
12995// Combine a constant select operand into its use:
12996//
12997// (and (select cond, -1, c), x)
12998// -> (select cond, x, (and x, c)) [AllOnes=1]
12999// (or (select cond, 0, c), x)
13000// -> (select cond, x, (or x, c)) [AllOnes=0]
13001// (xor (select cond, 0, c), x)
13002// -> (select cond, x, (xor x, c)) [AllOnes=0]
13003// (add (select cond, 0, c), x)
13004// -> (select cond, x, (add x, c)) [AllOnes=0]
13005// (sub x, (select cond, 0, c))
13006// -> (select cond, x, (sub x, c)) [AllOnes=0]
13008 SelectionDAG &DAG, bool AllOnes,
13009 const RISCVSubtarget &Subtarget) {
13010 EVT VT = N->getValueType(0);
13011
13012 // Skip vectors.
13013 if (VT.isVector())
13014 return SDValue();
13015
13016 if (!Subtarget.hasConditionalMoveFusion()) {
13017 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13018 if ((!Subtarget.hasStdExtZicond() &&
13019 !Subtarget.hasVendorXVentanaCondOps()) ||
13020 N->getOpcode() != ISD::AND)
13021 return SDValue();
13022
13023 // Maybe harmful when condition code has multiple use.
13024 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13025 return SDValue();
13026
13027 // Maybe harmful when VT is wider than XLen.
13028 if (VT.getSizeInBits() > Subtarget.getXLen())
13029 return SDValue();
13030 }
13031
13032 if ((Slct.getOpcode() != ISD::SELECT &&
13033 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13034 !Slct.hasOneUse())
13035 return SDValue();
13036
13037 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13039 };
13040
13041 bool SwapSelectOps;
13042 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13043 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13044 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13045 SDValue NonConstantVal;
13046 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13047 SwapSelectOps = false;
13048 NonConstantVal = FalseVal;
13049 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13050 SwapSelectOps = true;
13051 NonConstantVal = TrueVal;
13052 } else
13053 return SDValue();
13054
13055 // Slct is now know to be the desired identity constant when CC is true.
13056 TrueVal = OtherOp;
13057 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13058 // Unless SwapSelectOps says the condition should be false.
13059 if (SwapSelectOps)
13060 std::swap(TrueVal, FalseVal);
13061
13062 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13063 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13064 {Slct.getOperand(0), Slct.getOperand(1),
13065 Slct.getOperand(2), TrueVal, FalseVal});
13066
13067 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13068 {Slct.getOperand(0), TrueVal, FalseVal});
13069}
13070
13071// Attempt combineSelectAndUse on each operand of a commutative operator N.
13073 bool AllOnes,
13074 const RISCVSubtarget &Subtarget) {
13075 SDValue N0 = N->getOperand(0);
13076 SDValue N1 = N->getOperand(1);
13077 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13078 return Result;
13079 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13080 return Result;
13081 return SDValue();
13082}
13083
13084// Transform (add (mul x, c0), c1) ->
13085// (add (mul (add x, c1/c0), c0), c1%c0).
13086// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13087// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13088// to an infinite loop in DAGCombine if transformed.
13089// Or transform (add (mul x, c0), c1) ->
13090// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13091// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13092// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13093// lead to an infinite loop in DAGCombine if transformed.
13094// Or transform (add (mul x, c0), c1) ->
13095// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13096// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13097// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13098// lead to an infinite loop in DAGCombine if transformed.
13099// Or transform (add (mul x, c0), c1) ->
13100// (mul (add x, c1/c0), c0).
13101// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13103 const RISCVSubtarget &Subtarget) {
13104 // Skip for vector types and larger types.
13105 EVT VT = N->getValueType(0);
13106 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13107 return SDValue();
13108 // The first operand node must be a MUL and has no other use.
13109 SDValue N0 = N->getOperand(0);
13110 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13111 return SDValue();
13112 // Check if c0 and c1 match above conditions.
13113 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13114 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13115 if (!N0C || !N1C)
13116 return SDValue();
13117 // If N0C has multiple uses it's possible one of the cases in
13118 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13119 // in an infinite loop.
13120 if (!N0C->hasOneUse())
13121 return SDValue();
13122 int64_t C0 = N0C->getSExtValue();
13123 int64_t C1 = N1C->getSExtValue();
13124 int64_t CA, CB;
13125 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13126 return SDValue();
13127 // Search for proper CA (non-zero) and CB that both are simm12.
13128 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13129 !isInt<12>(C0 * (C1 / C0))) {
13130 CA = C1 / C0;
13131 CB = C1 % C0;
13132 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13133 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13134 CA = C1 / C0 + 1;
13135 CB = C1 % C0 - C0;
13136 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13137 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13138 CA = C1 / C0 - 1;
13139 CB = C1 % C0 + C0;
13140 } else
13141 return SDValue();
13142 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13143 SDLoc DL(N);
13144 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13145 DAG.getConstant(CA, DL, VT));
13146 SDValue New1 =
13147 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
13148 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
13149}
13150
13151// add (zext, zext) -> zext (add (zext, zext))
13152// sub (zext, zext) -> sext (sub (zext, zext))
13153// mul (zext, zext) -> zext (mul (zext, zext))
13154// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13155// udiv (zext, zext) -> zext (udiv (zext, zext))
13156// srem (zext, zext) -> zext (srem (zext, zext))
13157// urem (zext, zext) -> zext (urem (zext, zext))
13158//
13159// where the sum of the extend widths match, and the the range of the bin op
13160// fits inside the width of the narrower bin op. (For profitability on rvv, we
13161// use a power of two for both inner and outer extend.)
13163
13164 EVT VT = N->getValueType(0);
13165 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13166 return SDValue();
13167
13168 SDValue N0 = N->getOperand(0);
13169 SDValue N1 = N->getOperand(1);
13171 return SDValue();
13172 if (!N0.hasOneUse() || !N1.hasOneUse())
13173 return SDValue();
13174
13175 SDValue Src0 = N0.getOperand(0);
13176 SDValue Src1 = N1.getOperand(0);
13177 EVT SrcVT = Src0.getValueType();
13178 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13179 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13180 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13181 return SDValue();
13182
13183 LLVMContext &C = *DAG.getContext();
13185 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13186
13187 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13188 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13189
13190 // Src0 and Src1 are zero extended, so they're always positive if signed.
13191 //
13192 // sub can produce a negative from two positive operands, so it needs sign
13193 // extended. Other nodes produce a positive from two positive operands, so
13194 // zero extend instead.
13195 unsigned OuterExtend =
13196 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13197
13198 return DAG.getNode(
13199 OuterExtend, SDLoc(N), VT,
13200 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13201}
13202
13203// Try to turn (add (xor bool, 1) -1) into (neg bool).
13205 SDValue N0 = N->getOperand(0);
13206 SDValue N1 = N->getOperand(1);
13207 EVT VT = N->getValueType(0);
13208 SDLoc DL(N);
13209
13210 // RHS should be -1.
13211 if (!isAllOnesConstant(N1))
13212 return SDValue();
13213
13214 // Look for (xor X, 1).
13215 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13216 return SDValue();
13217
13218 // First xor input should be 0 or 1.
13220 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13221 return SDValue();
13222
13223 // Emit a negate of the setcc.
13224 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13225 N0.getOperand(0));
13226}
13227
13230 const RISCVSubtarget &Subtarget) {
13231 SelectionDAG &DAG = DCI.DAG;
13232 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13233 return V;
13234 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13235 return V;
13236 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13237 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13238 return V;
13239 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13240 return V;
13241 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13242 return V;
13243 if (SDValue V = combineBinOpOfZExt(N, DAG))
13244 return V;
13245
13246 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13247 // (select lhs, rhs, cc, x, (add x, y))
13248 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13249}
13250
13251// Try to turn a sub boolean RHS and constant LHS into an addi.
13253 SDValue N0 = N->getOperand(0);
13254 SDValue N1 = N->getOperand(1);
13255 EVT VT = N->getValueType(0);
13256 SDLoc DL(N);
13257
13258 // Require a constant LHS.
13259 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13260 if (!N0C)
13261 return SDValue();
13262
13263 // All our optimizations involve subtracting 1 from the immediate and forming
13264 // an ADDI. Make sure the new immediate is valid for an ADDI.
13265 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13266 if (!ImmValMinus1.isSignedIntN(12))
13267 return SDValue();
13268
13269 SDValue NewLHS;
13270 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13271 // (sub constant, (setcc x, y, eq/neq)) ->
13272 // (add (setcc x, y, neq/eq), constant - 1)
13273 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13274 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13275 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13276 return SDValue();
13277 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13278 NewLHS =
13279 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13280 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13281 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13282 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13283 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13284 NewLHS = N1.getOperand(0);
13285 } else
13286 return SDValue();
13287
13288 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13289 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13290}
13291
13293 const RISCVSubtarget &Subtarget) {
13294 if (SDValue V = combineSubOfBoolean(N, DAG))
13295 return V;
13296
13297 EVT VT = N->getValueType(0);
13298 SDValue N0 = N->getOperand(0);
13299 SDValue N1 = N->getOperand(1);
13300 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13301 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13302 isNullConstant(N1.getOperand(1))) {
13303 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13304 if (CCVal == ISD::SETLT) {
13305 SDLoc DL(N);
13306 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13307 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13308 DAG.getConstant(ShAmt, DL, VT));
13309 }
13310 }
13311
13312 if (SDValue V = combineBinOpOfZExt(N, DAG))
13313 return V;
13314
13315 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13316 // (select lhs, rhs, cc, x, (sub x, y))
13317 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13318}
13319
13320// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13321// Legalizing setcc can introduce xors like this. Doing this transform reduces
13322// the number of xors and may allow the xor to fold into a branch condition.
13324 SDValue N0 = N->getOperand(0);
13325 SDValue N1 = N->getOperand(1);
13326 bool IsAnd = N->getOpcode() == ISD::AND;
13327
13328 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13329 return SDValue();
13330
13331 if (!N0.hasOneUse() || !N1.hasOneUse())
13332 return SDValue();
13333
13334 SDValue N01 = N0.getOperand(1);
13335 SDValue N11 = N1.getOperand(1);
13336
13337 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13338 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13339 // operation is And, allow one of the Xors to use -1.
13340 if (isOneConstant(N01)) {
13341 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13342 return SDValue();
13343 } else if (isOneConstant(N11)) {
13344 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13345 if (!(IsAnd && isAllOnesConstant(N01)))
13346 return SDValue();
13347 } else
13348 return SDValue();
13349
13350 EVT VT = N->getValueType(0);
13351
13352 SDValue N00 = N0.getOperand(0);
13353 SDValue N10 = N1.getOperand(0);
13354
13355 // The LHS of the xors needs to be 0/1.
13357 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13358 return SDValue();
13359
13360 // Invert the opcode and insert a new xor.
13361 SDLoc DL(N);
13362 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13363 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13364 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13365}
13366
13368 const RISCVSubtarget &Subtarget) {
13369 SDValue N0 = N->getOperand(0);
13370 EVT VT = N->getValueType(0);
13371
13372 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13373 // extending X. This is safe since we only need the LSB after the shift and
13374 // shift amounts larger than 31 would produce poison. If we wait until
13375 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13376 // to use a BEXT instruction.
13377 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13378 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13379 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13380 SDLoc DL(N0);
13381 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13382 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13383 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13384 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13385 }
13386
13387 return SDValue();
13388}
13389
13390// Combines two comparison operation and logic operation to one selection
13391// operation(min, max) and logic operation. Returns new constructed Node if
13392// conditions for optimization are satisfied.
13395 const RISCVSubtarget &Subtarget) {
13396 SelectionDAG &DAG = DCI.DAG;
13397
13398 SDValue N0 = N->getOperand(0);
13399 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13400 // extending X. This is safe since we only need the LSB after the shift and
13401 // shift amounts larger than 31 would produce poison. If we wait until
13402 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13403 // to use a BEXT instruction.
13404 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13405 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13406 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13407 N0.hasOneUse()) {
13408 SDLoc DL(N);
13409 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13410 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13411 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13412 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13413 DAG.getConstant(1, DL, MVT::i64));
13414 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13415 }
13416
13417 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13418 return V;
13419 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13420 return V;
13421
13422 if (DCI.isAfterLegalizeDAG())
13423 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13424 return V;
13425
13426 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13427 // (select lhs, rhs, cc, x, (and x, y))
13428 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13429}
13430
13431// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13432// FIXME: Generalize to other binary operators with same operand.
13434 SelectionDAG &DAG) {
13435 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13436
13437 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13439 !N0.hasOneUse() || !N1.hasOneUse())
13440 return SDValue();
13441
13442 // Should have the same condition.
13443 SDValue Cond = N0.getOperand(1);
13444 if (Cond != N1.getOperand(1))
13445 return SDValue();
13446
13447 SDValue TrueV = N0.getOperand(0);
13448 SDValue FalseV = N1.getOperand(0);
13449
13450 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13451 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13452 !isOneConstant(TrueV.getOperand(1)) ||
13453 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13454 return SDValue();
13455
13456 EVT VT = N->getValueType(0);
13457 SDLoc DL(N);
13458
13459 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13460 Cond);
13461 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13462 Cond);
13463 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13464 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13465}
13466
13468 const RISCVSubtarget &Subtarget) {
13469 SelectionDAG &DAG = DCI.DAG;
13470
13471 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13472 return V;
13473 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13474 return V;
13475
13476 if (DCI.isAfterLegalizeDAG())
13477 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13478 return V;
13479
13480 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13481 // We may be able to pull a common operation out of the true and false value.
13482 SDValue N0 = N->getOperand(0);
13483 SDValue N1 = N->getOperand(1);
13484 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13485 return V;
13486 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13487 return V;
13488
13489 // fold (or (select cond, 0, y), x) ->
13490 // (select cond, x, (or x, y))
13491 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13492}
13493
13495 const RISCVSubtarget &Subtarget) {
13496 SDValue N0 = N->getOperand(0);
13497 SDValue N1 = N->getOperand(1);
13498
13499 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13500 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13501 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13502 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13503 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13504 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13505 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13506 SDLoc DL(N);
13507 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13508 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13509 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13510 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13511 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13512 }
13513
13514 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13515 // NOTE: Assumes ROL being legal means ROLW is legal.
13516 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13517 if (N0.getOpcode() == RISCVISD::SLLW &&
13519 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13520 SDLoc DL(N);
13521 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13522 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13523 }
13524
13525 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13526 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13527 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13528 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13529 if (ConstN00 && CC == ISD::SETLT) {
13530 EVT VT = N0.getValueType();
13531 SDLoc DL(N0);
13532 const APInt &Imm = ConstN00->getAPIntValue();
13533 if ((Imm + 1).isSignedIntN(12))
13534 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13535 DAG.getConstant(Imm + 1, DL, VT), CC);
13536 }
13537 }
13538
13539 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13540 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13541 // would have been promoted to i32, but the setcc would have i64 result.
13542 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13543 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13544 SDValue N00 = N0.getOperand(0);
13545 SDLoc DL(N);
13546 SDValue LHS = N00.getOperand(0);
13547 SDValue RHS = N00.getOperand(1);
13548 SDValue CC = N00.getOperand(2);
13549 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13550 LHS.getValueType());
13551 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13552 LHS, RHS, NotCC);
13553 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13554 }
13555
13556 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13557 return V;
13558 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13559 return V;
13560
13561 // fold (xor (select cond, 0, y), x) ->
13562 // (select cond, x, (xor x, y))
13563 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13564}
13565
13566// Try to expand a scalar multiply to a faster sequence.
13569 const RISCVSubtarget &Subtarget) {
13570
13571 EVT VT = N->getValueType(0);
13572
13573 // LI + MUL is usually smaller than the alternative sequence.
13575 return SDValue();
13576
13577 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13578 return SDValue();
13579
13580 if (VT != Subtarget.getXLenVT())
13581 return SDValue();
13582
13583 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13584 return SDValue();
13585
13586 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13587 if (!CNode)
13588 return SDValue();
13589 uint64_t MulAmt = CNode->getZExtValue();
13590
13591 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13592 // We're adding additional uses of X here, and in principle, we should be freezing
13593 // X before doing so. However, adding freeze here causes real regressions, and no
13594 // other target properly freezes X in these cases either.
13595 SDValue X = N->getOperand(0);
13596
13597 for (uint64_t Divisor : {3, 5, 9}) {
13598 if (MulAmt % Divisor != 0)
13599 continue;
13600 uint64_t MulAmt2 = MulAmt / Divisor;
13601 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13602 if (isPowerOf2_64(MulAmt2)) {
13603 SDLoc DL(N);
13604 SDValue X = N->getOperand(0);
13605 // Put the shift first if we can fold a zext into the
13606 // shift forming a slli.uw.
13607 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13608 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13609 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13610 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13611 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13612 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), Shl);
13613 }
13614 // Otherwise, put rhe shl second so that it can fold with following
13615 // instructions (e.g. sext or add).
13616 SDValue Mul359 =
13617 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13618 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13619 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13620 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13621 }
13622
13623 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13624 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13625 SDLoc DL(N);
13626 SDValue Mul359 =
13627 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13628 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13629 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13630 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13631 Mul359);
13632 }
13633 }
13634
13635 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13636 // shXadd. First check if this a sum of two power of 2s because that's
13637 // easy. Then count how many zeros are up to the first bit.
13638 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13639 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13640 if (ScaleShift >= 1 && ScaleShift < 4) {
13641 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13642 SDLoc DL(N);
13643 SDValue Shift1 =
13644 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13645 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13646 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13647 }
13648 }
13649
13650 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13651 // This is the two instruction form, there are also three instruction
13652 // variants we could implement. e.g.
13653 // (2^(1,2,3) * 3,5,9 + 1) << C2
13654 // 2^(C1>3) * 3,5,9 +/- 1
13655 for (uint64_t Divisor : {3, 5, 9}) {
13656 uint64_t C = MulAmt - 1;
13657 if (C <= Divisor)
13658 continue;
13659 unsigned TZ = llvm::countr_zero(C);
13660 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13661 SDLoc DL(N);
13662 SDValue Mul359 =
13663 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13664 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13665 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13666 DAG.getConstant(TZ, DL, VT), X);
13667 }
13668 }
13669
13670 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13671 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13672 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13673 if (ScaleShift >= 1 && ScaleShift < 4) {
13674 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13675 SDLoc DL(N);
13676 SDValue Shift1 =
13677 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13678 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13679 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13680 DAG.getConstant(ScaleShift, DL, VT), X));
13681 }
13682 }
13683
13684 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13685 for (uint64_t Offset : {3, 5, 9}) {
13686 if (isPowerOf2_64(MulAmt + Offset)) {
13687 SDLoc DL(N);
13688 SDValue Shift1 =
13689 DAG.getNode(ISD::SHL, DL, VT, X,
13690 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13691 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13692 DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13693 X);
13694 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13695 }
13696 }
13697
13698 return SDValue();
13699}
13700
13701
13704 const RISCVSubtarget &Subtarget) {
13705 EVT VT = N->getValueType(0);
13706 if (!VT.isVector())
13707 return expandMul(N, DAG, DCI, Subtarget);
13708
13709 SDLoc DL(N);
13710 SDValue N0 = N->getOperand(0);
13711 SDValue N1 = N->getOperand(1);
13712 SDValue MulOper;
13713 unsigned AddSubOpc;
13714
13715 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13716 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13717 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13718 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13719 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13720 AddSubOpc = V->getOpcode();
13721 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13722 SDValue Opnd = V->getOperand(1);
13723 MulOper = V->getOperand(0);
13724 if (AddSubOpc == ISD::SUB)
13725 std::swap(Opnd, MulOper);
13726 if (isOneOrOneSplat(Opnd))
13727 return true;
13728 }
13729 return false;
13730 };
13731
13732 if (IsAddSubWith1(N0)) {
13733 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13734 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13735 }
13736
13737 if (IsAddSubWith1(N1)) {
13738 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13739 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13740 }
13741
13742 if (SDValue V = combineBinOpOfZExt(N, DAG))
13743 return V;
13744
13745 return SDValue();
13746}
13747
13748/// According to the property that indexed load/store instructions zero-extend
13749/// their indices, try to narrow the type of index operand.
13750static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13751 if (isIndexTypeSigned(IndexType))
13752 return false;
13753
13754 if (!N->hasOneUse())
13755 return false;
13756
13757 EVT VT = N.getValueType();
13758 SDLoc DL(N);
13759
13760 // In general, what we're doing here is seeing if we can sink a truncate to
13761 // a smaller element type into the expression tree building our index.
13762 // TODO: We can generalize this and handle a bunch more cases if useful.
13763
13764 // Narrow a buildvector to the narrowest element type. This requires less
13765 // work and less register pressure at high LMUL, and creates smaller constants
13766 // which may be cheaper to materialize.
13767 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13768 KnownBits Known = DAG.computeKnownBits(N);
13769 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13770 LLVMContext &C = *DAG.getContext();
13771 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13772 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13773 N = DAG.getNode(ISD::TRUNCATE, DL,
13774 VT.changeVectorElementType(ResultVT), N);
13775 return true;
13776 }
13777 }
13778
13779 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13780 if (N.getOpcode() != ISD::SHL)
13781 return false;
13782
13783 SDValue N0 = N.getOperand(0);
13784 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13786 return false;
13787 if (!N0->hasOneUse())
13788 return false;
13789
13790 APInt ShAmt;
13791 SDValue N1 = N.getOperand(1);
13792 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13793 return false;
13794
13795 SDValue Src = N0.getOperand(0);
13796 EVT SrcVT = Src.getValueType();
13797 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13798 unsigned ShAmtV = ShAmt.getZExtValue();
13799 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13800 NewElen = std::max(NewElen, 8U);
13801
13802 // Skip if NewElen is not narrower than the original extended type.
13803 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13804 return false;
13805
13806 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13807 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13808
13809 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13810 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13811 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13812 return true;
13813}
13814
13815// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13816// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13817// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13818// can become a sext.w instead of a shift pair.
13820 const RISCVSubtarget &Subtarget) {
13821 SDValue N0 = N->getOperand(0);
13822 SDValue N1 = N->getOperand(1);
13823 EVT VT = N->getValueType(0);
13824 EVT OpVT = N0.getValueType();
13825
13826 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13827 return SDValue();
13828
13829 // RHS needs to be a constant.
13830 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13831 if (!N1C)
13832 return SDValue();
13833
13834 // LHS needs to be (and X, 0xffffffff).
13835 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13836 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13837 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13838 return SDValue();
13839
13840 // Looking for an equality compare.
13841 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13842 if (!isIntEqualitySetCC(Cond))
13843 return SDValue();
13844
13845 // Don't do this if the sign bit is provably zero, it will be turned back into
13846 // an AND.
13847 APInt SignMask = APInt::getOneBitSet(64, 31);
13848 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13849 return SDValue();
13850
13851 const APInt &C1 = N1C->getAPIntValue();
13852
13853 SDLoc dl(N);
13854 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13855 // to be equal.
13856 if (C1.getActiveBits() > 32)
13857 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13858
13859 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13860 N0.getOperand(0), DAG.getValueType(MVT::i32));
13861 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13862 dl, OpVT), Cond);
13863}
13864
13865static SDValue
13867 const RISCVSubtarget &Subtarget) {
13868 SDValue Src = N->getOperand(0);
13869 EVT VT = N->getValueType(0);
13870
13871 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13872 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13873 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13874 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13875 Src.getOperand(0));
13876
13877 return SDValue();
13878}
13879
13880namespace {
13881// Forward declaration of the structure holding the necessary information to
13882// apply a combine.
13883struct CombineResult;
13884
13885enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13886/// Helper class for folding sign/zero extensions.
13887/// In particular, this class is used for the following combines:
13888/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13889/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13890/// mul | mul_vl -> vwmul(u) | vwmul_su
13891/// shl | shl_vl -> vwsll
13892/// fadd -> vfwadd | vfwadd_w
13893/// fsub -> vfwsub | vfwsub_w
13894/// fmul -> vfwmul
13895/// An object of this class represents an operand of the operation we want to
13896/// combine.
13897/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13898/// NodeExtensionHelper for `a` and one for `b`.
13899///
13900/// This class abstracts away how the extension is materialized and
13901/// how its number of users affect the combines.
13902///
13903/// In particular:
13904/// - VWADD_W is conceptually == add(op0, sext(op1))
13905/// - VWADDU_W == add(op0, zext(op1))
13906/// - VWSUB_W == sub(op0, sext(op1))
13907/// - VWSUBU_W == sub(op0, zext(op1))
13908/// - VFWADD_W == fadd(op0, fpext(op1))
13909/// - VFWSUB_W == fsub(op0, fpext(op1))
13910/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13911/// zext|sext(smaller_value).
13912struct NodeExtensionHelper {
13913 /// Records if this operand is like being zero extended.
13914 bool SupportsZExt;
13915 /// Records if this operand is like being sign extended.
13916 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13917 /// instance, a splat constant (e.g., 3), would support being both sign and
13918 /// zero extended.
13919 bool SupportsSExt;
13920 /// Records if this operand is like being floating-Point extended.
13921 bool SupportsFPExt;
13922 /// This boolean captures whether we care if this operand would still be
13923 /// around after the folding happens.
13924 bool EnforceOneUse;
13925 /// Original value that this NodeExtensionHelper represents.
13926 SDValue OrigOperand;
13927
13928 /// Get the value feeding the extension or the value itself.
13929 /// E.g., for zext(a), this would return a.
13930 SDValue getSource() const {
13931 switch (OrigOperand.getOpcode()) {
13932 case ISD::ZERO_EXTEND:
13933 case ISD::SIGN_EXTEND:
13934 case RISCVISD::VSEXT_VL:
13935 case RISCVISD::VZEXT_VL:
13937 return OrigOperand.getOperand(0);
13938 default:
13939 return OrigOperand;
13940 }
13941 }
13942
13943 /// Check if this instance represents a splat.
13944 bool isSplat() const {
13945 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13946 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13947 }
13948
13949 /// Get the extended opcode.
13950 unsigned getExtOpc(ExtKind SupportsExt) const {
13951 switch (SupportsExt) {
13952 case ExtKind::SExt:
13953 return RISCVISD::VSEXT_VL;
13954 case ExtKind::ZExt:
13955 return RISCVISD::VZEXT_VL;
13956 case ExtKind::FPExt:
13958 }
13959 llvm_unreachable("Unknown ExtKind enum");
13960 }
13961
13962 /// Get or create a value that can feed \p Root with the given extension \p
13963 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13964 /// operand. \see ::getSource().
13965 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13966 const RISCVSubtarget &Subtarget,
13967 std::optional<ExtKind> SupportsExt) const {
13968 if (!SupportsExt.has_value())
13969 return OrigOperand;
13970
13971 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13972
13973 SDValue Source = getSource();
13974 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13975 if (Source.getValueType() == NarrowVT)
13976 return Source;
13977
13978 unsigned ExtOpc = getExtOpc(*SupportsExt);
13979
13980 // If we need an extension, we should be changing the type.
13981 SDLoc DL(OrigOperand);
13982 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13983 switch (OrigOperand.getOpcode()) {
13984 case ISD::ZERO_EXTEND:
13985 case ISD::SIGN_EXTEND:
13986 case RISCVISD::VSEXT_VL:
13987 case RISCVISD::VZEXT_VL:
13989 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13990 case ISD::SPLAT_VECTOR:
13991 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13993 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13994 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13995 default:
13996 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13997 // and that operand should already have the right NarrowVT so no
13998 // extension should be required at this point.
13999 llvm_unreachable("Unsupported opcode");
14000 }
14001 }
14002
14003 /// Helper function to get the narrow type for \p Root.
14004 /// The narrow type is the type of \p Root where we divided the size of each
14005 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14006 /// \pre Both the narrow type and the original type should be legal.
14007 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14008 MVT VT = Root->getSimpleValueType(0);
14009
14010 // Determine the narrow size.
14011 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14012
14013 MVT EltVT = SupportsExt == ExtKind::FPExt
14014 ? MVT::getFloatingPointVT(NarrowSize)
14015 : MVT::getIntegerVT(NarrowSize);
14016
14017 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14018 "Trying to extend something we can't represent");
14019 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14020 return NarrowVT;
14021 }
14022
14023 /// Get the opcode to materialize:
14024 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14025 static unsigned getSExtOpcode(unsigned Opcode) {
14026 switch (Opcode) {
14027 case ISD::ADD:
14028 case RISCVISD::ADD_VL:
14031 case ISD::OR:
14032 return RISCVISD::VWADD_VL;
14033 case ISD::SUB:
14034 case RISCVISD::SUB_VL:
14037 return RISCVISD::VWSUB_VL;
14038 case ISD::MUL:
14039 case RISCVISD::MUL_VL:
14040 return RISCVISD::VWMUL_VL;
14041 default:
14042 llvm_unreachable("Unexpected opcode");
14043 }
14044 }
14045
14046 /// Get the opcode to materialize:
14047 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14048 static unsigned getZExtOpcode(unsigned Opcode) {
14049 switch (Opcode) {
14050 case ISD::ADD:
14051 case RISCVISD::ADD_VL:
14054 case ISD::OR:
14055 return RISCVISD::VWADDU_VL;
14056 case ISD::SUB:
14057 case RISCVISD::SUB_VL:
14060 return RISCVISD::VWSUBU_VL;
14061 case ISD::MUL:
14062 case RISCVISD::MUL_VL:
14063 return RISCVISD::VWMULU_VL;
14064 case ISD::SHL:
14065 case RISCVISD::SHL_VL:
14066 return RISCVISD::VWSLL_VL;
14067 default:
14068 llvm_unreachable("Unexpected opcode");
14069 }
14070 }
14071
14072 /// Get the opcode to materialize:
14073 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14074 static unsigned getFPExtOpcode(unsigned Opcode) {
14075 switch (Opcode) {
14076 case RISCVISD::FADD_VL:
14078 return RISCVISD::VFWADD_VL;
14079 case RISCVISD::FSUB_VL:
14081 return RISCVISD::VFWSUB_VL;
14082 case RISCVISD::FMUL_VL:
14083 return RISCVISD::VFWMUL_VL;
14084 default:
14085 llvm_unreachable("Unexpected opcode");
14086 }
14087 }
14088
14089 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14090 /// newOpcode(a, b).
14091 static unsigned getSUOpcode(unsigned Opcode) {
14092 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14093 "SU is only supported for MUL");
14094 return RISCVISD::VWMULSU_VL;
14095 }
14096
14097 /// Get the opcode to materialize
14098 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14099 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14100 switch (Opcode) {
14101 case ISD::ADD:
14102 case RISCVISD::ADD_VL:
14103 case ISD::OR:
14104 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14106 case ISD::SUB:
14107 case RISCVISD::SUB_VL:
14108 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14110 case RISCVISD::FADD_VL:
14111 return RISCVISD::VFWADD_W_VL;
14112 case RISCVISD::FSUB_VL:
14113 return RISCVISD::VFWSUB_W_VL;
14114 default:
14115 llvm_unreachable("Unexpected opcode");
14116 }
14117 }
14118
14119 using CombineToTry = std::function<std::optional<CombineResult>(
14120 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14121 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14122 const RISCVSubtarget &)>;
14123
14124 /// Check if this node needs to be fully folded or extended for all users.
14125 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14126
14127 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14128 const RISCVSubtarget &Subtarget) {
14129 unsigned Opc = OrigOperand.getOpcode();
14130 MVT VT = OrigOperand.getSimpleValueType();
14131
14132 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14133 "Unexpected Opcode");
14134
14135 // The pasthru must be undef for tail agnostic.
14136 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14137 return;
14138
14139 // Get the scalar value.
14140 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14141 : OrigOperand.getOperand(1);
14142
14143 // See if we have enough sign bits or zero bits in the scalar to use a
14144 // widening opcode by splatting to smaller element size.
14145 unsigned EltBits = VT.getScalarSizeInBits();
14146 unsigned ScalarBits = Op.getValueSizeInBits();
14147 // Make sure we're getting all element bits from the scalar register.
14148 // FIXME: Support implicit sign extension of vmv.v.x?
14149 if (ScalarBits < EltBits)
14150 return;
14151
14152 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14153 // If the narrow type cannot be expressed with a legal VMV,
14154 // this is not a valid candidate.
14155 if (NarrowSize < 8)
14156 return;
14157
14158 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14159 SupportsSExt = true;
14160
14161 if (DAG.MaskedValueIsZero(Op,
14162 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14163 SupportsZExt = true;
14164
14165 EnforceOneUse = false;
14166 }
14167
14168 /// Helper method to set the various fields of this struct based on the
14169 /// type of \p Root.
14170 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14171 const RISCVSubtarget &Subtarget) {
14172 SupportsZExt = false;
14173 SupportsSExt = false;
14174 SupportsFPExt = false;
14175 EnforceOneUse = true;
14176 unsigned Opc = OrigOperand.getOpcode();
14177 // For the nodes we handle below, we end up using their inputs directly: see
14178 // getSource(). However since they either don't have a passthru or we check
14179 // that their passthru is undef, we can safely ignore their mask and VL.
14180 switch (Opc) {
14181 case ISD::ZERO_EXTEND:
14182 case ISD::SIGN_EXTEND: {
14183 MVT VT = OrigOperand.getSimpleValueType();
14184 if (!VT.isVector())
14185 break;
14186
14187 SDValue NarrowElt = OrigOperand.getOperand(0);
14188 MVT NarrowVT = NarrowElt.getSimpleValueType();
14189 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14190 if (NarrowVT.getVectorElementType() == MVT::i1)
14191 break;
14192
14193 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14194 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14195 break;
14196 }
14197 case RISCVISD::VZEXT_VL:
14198 SupportsZExt = true;
14199 break;
14200 case RISCVISD::VSEXT_VL:
14201 SupportsSExt = true;
14202 break;
14204 SupportsFPExt = true;
14205 break;
14206 case ISD::SPLAT_VECTOR:
14208 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14209 break;
14210 default:
14211 break;
14212 }
14213 }
14214
14215 /// Check if \p Root supports any extension folding combines.
14216 static bool isSupportedRoot(const SDNode *Root,
14217 const RISCVSubtarget &Subtarget) {
14218 switch (Root->getOpcode()) {
14219 case ISD::ADD:
14220 case ISD::SUB:
14221 case ISD::MUL: {
14222 return Root->getValueType(0).isScalableVector();
14223 }
14224 case ISD::OR: {
14225 return Root->getValueType(0).isScalableVector() &&
14226 Root->getFlags().hasDisjoint();
14227 }
14228 // Vector Widening Integer Add/Sub/Mul Instructions
14229 case RISCVISD::ADD_VL:
14230 case RISCVISD::MUL_VL:
14233 case RISCVISD::SUB_VL:
14236 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14237 case RISCVISD::FADD_VL:
14238 case RISCVISD::FSUB_VL:
14239 case RISCVISD::FMUL_VL:
14242 return true;
14243 case ISD::SHL:
14244 return Root->getValueType(0).isScalableVector() &&
14245 Subtarget.hasStdExtZvbb();
14246 case RISCVISD::SHL_VL:
14247 return Subtarget.hasStdExtZvbb();
14248 default:
14249 return false;
14250 }
14251 }
14252
14253 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14254 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14255 const RISCVSubtarget &Subtarget) {
14256 assert(isSupportedRoot(Root, Subtarget) &&
14257 "Trying to build an helper with an "
14258 "unsupported root");
14259 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14261 OrigOperand = Root->getOperand(OperandIdx);
14262
14263 unsigned Opc = Root->getOpcode();
14264 switch (Opc) {
14265 // We consider
14266 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14267 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14268 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14275 if (OperandIdx == 1) {
14276 SupportsZExt =
14278 SupportsSExt =
14280 SupportsFPExt =
14282 // There's no existing extension here, so we don't have to worry about
14283 // making sure it gets removed.
14284 EnforceOneUse = false;
14285 break;
14286 }
14287 [[fallthrough]];
14288 default:
14289 fillUpExtensionSupport(Root, DAG, Subtarget);
14290 break;
14291 }
14292 }
14293
14294 /// Helper function to get the Mask and VL from \p Root.
14295 static std::pair<SDValue, SDValue>
14296 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14297 const RISCVSubtarget &Subtarget) {
14298 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14299 switch (Root->getOpcode()) {
14300 case ISD::ADD:
14301 case ISD::SUB:
14302 case ISD::MUL:
14303 case ISD::OR:
14304 case ISD::SHL: {
14305 SDLoc DL(Root);
14306 MVT VT = Root->getSimpleValueType(0);
14307 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14308 }
14309 default:
14310 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14311 }
14312 }
14313
14314 /// Helper function to check if \p N is commutative with respect to the
14315 /// foldings that are supported by this class.
14316 static bool isCommutative(const SDNode *N) {
14317 switch (N->getOpcode()) {
14318 case ISD::ADD:
14319 case ISD::MUL:
14320 case ISD::OR:
14321 case RISCVISD::ADD_VL:
14322 case RISCVISD::MUL_VL:
14325 case RISCVISD::FADD_VL:
14326 case RISCVISD::FMUL_VL:
14328 return true;
14329 case ISD::SUB:
14330 case RISCVISD::SUB_VL:
14333 case RISCVISD::FSUB_VL:
14335 case ISD::SHL:
14336 case RISCVISD::SHL_VL:
14337 return false;
14338 default:
14339 llvm_unreachable("Unexpected opcode");
14340 }
14341 }
14342
14343 /// Get a list of combine to try for folding extensions in \p Root.
14344 /// Note that each returned CombineToTry function doesn't actually modify
14345 /// anything. Instead they produce an optional CombineResult that if not None,
14346 /// need to be materialized for the combine to be applied.
14347 /// \see CombineResult::materialize.
14348 /// If the related CombineToTry function returns std::nullopt, that means the
14349 /// combine didn't match.
14350 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14351};
14352
14353/// Helper structure that holds all the necessary information to materialize a
14354/// combine that does some extension folding.
14355struct CombineResult {
14356 /// Opcode to be generated when materializing the combine.
14357 unsigned TargetOpcode;
14358 // No value means no extension is needed.
14359 std::optional<ExtKind> LHSExt;
14360 std::optional<ExtKind> RHSExt;
14361 /// Root of the combine.
14362 SDNode *Root;
14363 /// LHS of the TargetOpcode.
14364 NodeExtensionHelper LHS;
14365 /// RHS of the TargetOpcode.
14366 NodeExtensionHelper RHS;
14367
14368 CombineResult(unsigned TargetOpcode, SDNode *Root,
14369 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14370 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14371 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14372 LHS(LHS), RHS(RHS) {}
14373
14374 /// Return a value that uses TargetOpcode and that can be used to replace
14375 /// Root.
14376 /// The actual replacement is *not* done in that method.
14377 SDValue materialize(SelectionDAG &DAG,
14378 const RISCVSubtarget &Subtarget) const {
14379 SDValue Mask, VL, Merge;
14380 std::tie(Mask, VL) =
14381 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14382 switch (Root->getOpcode()) {
14383 default:
14384 Merge = Root->getOperand(2);
14385 break;
14386 case ISD::ADD:
14387 case ISD::SUB:
14388 case ISD::MUL:
14389 case ISD::OR:
14390 case ISD::SHL:
14391 Merge = DAG.getUNDEF(Root->getValueType(0));
14392 break;
14393 }
14394 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14395 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14396 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14397 Merge, Mask, VL);
14398 }
14399};
14400
14401/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14402/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14403/// are zext) and LHS and RHS can be folded into Root.
14404/// AllowExtMask define which form `ext` can take in this pattern.
14405///
14406/// \note If the pattern can match with both zext and sext, the returned
14407/// CombineResult will feature the zext result.
14408///
14409/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14410/// can be used to apply the pattern.
14411static std::optional<CombineResult>
14412canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14413 const NodeExtensionHelper &RHS,
14414 uint8_t AllowExtMask, SelectionDAG &DAG,
14415 const RISCVSubtarget &Subtarget) {
14416 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14417 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14418 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14419 /*RHSExt=*/{ExtKind::ZExt});
14420 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14421 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14422 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14423 /*RHSExt=*/{ExtKind::SExt});
14424 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14425 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14426 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14427 /*RHSExt=*/{ExtKind::FPExt});
14428 return std::nullopt;
14429}
14430
14431/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14432/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14433/// are zext) and LHS and RHS can be folded into Root.
14434///
14435/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14436/// can be used to apply the pattern.
14437static std::optional<CombineResult>
14438canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14439 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14440 const RISCVSubtarget &Subtarget) {
14441 return canFoldToVWWithSameExtensionImpl(
14442 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14443 Subtarget);
14444}
14445
14446/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14447///
14448/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14449/// can be used to apply the pattern.
14450static std::optional<CombineResult>
14451canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14452 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14453 const RISCVSubtarget &Subtarget) {
14454 if (RHS.SupportsFPExt)
14455 return CombineResult(
14456 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14457 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14458
14459 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14460 // sext/zext?
14461 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14462 // purposes.
14463 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14464 return CombineResult(
14465 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14466 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14467 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14468 return CombineResult(
14469 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14470 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14471 return std::nullopt;
14472}
14473
14474/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14475///
14476/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14477/// can be used to apply the pattern.
14478static std::optional<CombineResult>
14479canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14480 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14481 const RISCVSubtarget &Subtarget) {
14482 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14483 Subtarget);
14484}
14485
14486/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14487///
14488/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14489/// can be used to apply the pattern.
14490static std::optional<CombineResult>
14491canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14492 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14493 const RISCVSubtarget &Subtarget) {
14494 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14495 Subtarget);
14496}
14497
14498/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14499///
14500/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14501/// can be used to apply the pattern.
14502static std::optional<CombineResult>
14503canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14504 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14505 const RISCVSubtarget &Subtarget) {
14506 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14507 Subtarget);
14508}
14509
14510/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14511///
14512/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14513/// can be used to apply the pattern.
14514static std::optional<CombineResult>
14515canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14516 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14517 const RISCVSubtarget &Subtarget) {
14518
14519 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14520 return std::nullopt;
14521 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14522 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14523 /*RHSExt=*/{ExtKind::ZExt});
14524}
14525
14527NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14528 SmallVector<CombineToTry> Strategies;
14529 switch (Root->getOpcode()) {
14530 case ISD::ADD:
14531 case ISD::SUB:
14532 case ISD::OR:
14533 case RISCVISD::ADD_VL:
14534 case RISCVISD::SUB_VL:
14535 case RISCVISD::FADD_VL:
14536 case RISCVISD::FSUB_VL:
14537 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14538 Strategies.push_back(canFoldToVWWithSameExtension);
14539 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14540 Strategies.push_back(canFoldToVW_W);
14541 break;
14542 case RISCVISD::FMUL_VL:
14543 Strategies.push_back(canFoldToVWWithSameExtension);
14544 break;
14545 case ISD::MUL:
14546 case RISCVISD::MUL_VL:
14547 // mul -> vwmul(u)
14548 Strategies.push_back(canFoldToVWWithSameExtension);
14549 // mul -> vwmulsu
14550 Strategies.push_back(canFoldToVW_SU);
14551 break;
14552 case ISD::SHL:
14553 case RISCVISD::SHL_VL:
14554 // shl -> vwsll
14555 Strategies.push_back(canFoldToVWWithZEXT);
14556 break;
14559 // vwadd_w|vwsub_w -> vwadd|vwsub
14560 Strategies.push_back(canFoldToVWWithSEXT);
14561 break;
14564 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14565 Strategies.push_back(canFoldToVWWithZEXT);
14566 break;
14569 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14570 Strategies.push_back(canFoldToVWWithFPEXT);
14571 break;
14572 default:
14573 llvm_unreachable("Unexpected opcode");
14574 }
14575 return Strategies;
14576}
14577} // End anonymous namespace.
14578
14579/// Combine a binary operation to its equivalent VW or VW_W form.
14580/// The supported combines are:
14581/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14582/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14583/// mul | mul_vl -> vwmul(u) | vwmul_su
14584/// shl | shl_vl -> vwsll
14585/// fadd_vl -> vfwadd | vfwadd_w
14586/// fsub_vl -> vfwsub | vfwsub_w
14587/// fmul_vl -> vfwmul
14588/// vwadd_w(u) -> vwadd(u)
14589/// vwsub_w(u) -> vwsub(u)
14590/// vfwadd_w -> vfwadd
14591/// vfwsub_w -> vfwsub
14594 const RISCVSubtarget &Subtarget) {
14595 SelectionDAG &DAG = DCI.DAG;
14596 if (DCI.isBeforeLegalize())
14597 return SDValue();
14598
14599 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14600 return SDValue();
14601
14602 SmallVector<SDNode *> Worklist;
14603 SmallSet<SDNode *, 8> Inserted;
14604 Worklist.push_back(N);
14605 Inserted.insert(N);
14606 SmallVector<CombineResult> CombinesToApply;
14607
14608 while (!Worklist.empty()) {
14609 SDNode *Root = Worklist.pop_back_val();
14610 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14611 return SDValue();
14612
14613 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14614 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14615 auto AppendUsersIfNeeded = [&Worklist,
14616 &Inserted](const NodeExtensionHelper &Op) {
14617 if (Op.needToPromoteOtherUsers()) {
14618 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14619 if (Inserted.insert(TheUse).second)
14620 Worklist.push_back(TheUse);
14621 }
14622 }
14623 };
14624
14625 // Control the compile time by limiting the number of node we look at in
14626 // total.
14627 if (Inserted.size() > ExtensionMaxWebSize)
14628 return SDValue();
14629
14631 NodeExtensionHelper::getSupportedFoldings(N);
14632
14633 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14634 bool Matched = false;
14635 for (int Attempt = 0;
14636 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14637 ++Attempt) {
14638
14639 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14640 FoldingStrategies) {
14641 std::optional<CombineResult> Res =
14642 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14643 if (Res) {
14644 Matched = true;
14645 CombinesToApply.push_back(*Res);
14646 // All the inputs that are extended need to be folded, otherwise
14647 // we would be leaving the old input (since it is may still be used),
14648 // and the new one.
14649 if (Res->LHSExt.has_value())
14650 AppendUsersIfNeeded(LHS);
14651 if (Res->RHSExt.has_value())
14652 AppendUsersIfNeeded(RHS);
14653 break;
14654 }
14655 }
14656 std::swap(LHS, RHS);
14657 }
14658 // Right now we do an all or nothing approach.
14659 if (!Matched)
14660 return SDValue();
14661 }
14662 // Store the value for the replacement of the input node separately.
14663 SDValue InputRootReplacement;
14664 // We do the RAUW after we materialize all the combines, because some replaced
14665 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14666 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14667 // yet-to-be-visited CombinesToApply roots.
14669 ValuesToReplace.reserve(CombinesToApply.size());
14670 for (CombineResult Res : CombinesToApply) {
14671 SDValue NewValue = Res.materialize(DAG, Subtarget);
14672 if (!InputRootReplacement) {
14673 assert(Res.Root == N &&
14674 "First element is expected to be the current node");
14675 InputRootReplacement = NewValue;
14676 } else {
14677 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14678 }
14679 }
14680 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14681 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14682 DCI.AddToWorklist(OldNewValues.second.getNode());
14683 }
14684 return InputRootReplacement;
14685}
14686
14687// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14688// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14689// y will be the Passthru and cond will be the Mask.
14691 unsigned Opc = N->getOpcode();
14694
14695 SDValue Y = N->getOperand(0);
14696 SDValue MergeOp = N->getOperand(1);
14697 unsigned MergeOpc = MergeOp.getOpcode();
14698
14699 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14700 return SDValue();
14701
14702 SDValue X = MergeOp->getOperand(1);
14703
14704 if (!MergeOp.hasOneUse())
14705 return SDValue();
14706
14707 // Passthru should be undef
14708 SDValue Passthru = N->getOperand(2);
14709 if (!Passthru.isUndef())
14710 return SDValue();
14711
14712 // Mask should be all ones
14713 SDValue Mask = N->getOperand(3);
14714 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14715 return SDValue();
14716
14717 // False value of MergeOp should be all zeros
14718 SDValue Z = MergeOp->getOperand(2);
14719
14720 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14721 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14722 Z = Z.getOperand(1);
14723
14724 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14725 return SDValue();
14726
14727 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14728 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14729 N->getFlags());
14730}
14731
14734 const RISCVSubtarget &Subtarget) {
14735 [[maybe_unused]] unsigned Opc = N->getOpcode();
14738
14739 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14740 return V;
14741
14742 return combineVWADDSUBWSelect(N, DCI.DAG);
14743}
14744
14745// Helper function for performMemPairCombine.
14746// Try to combine the memory loads/stores LSNode1 and LSNode2
14747// into a single memory pair operation.
14749 LSBaseSDNode *LSNode2, SDValue BasePtr,
14750 uint64_t Imm) {
14752 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14753
14754 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14755 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14756 return SDValue();
14757
14759 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14760
14761 // The new operation has twice the width.
14762 MVT XLenVT = Subtarget.getXLenVT();
14763 EVT MemVT = LSNode1->getMemoryVT();
14764 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14765 MachineMemOperand *MMO = LSNode1->getMemOperand();
14767 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14768
14769 if (LSNode1->getOpcode() == ISD::LOAD) {
14770 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14771 unsigned Opcode;
14772 if (MemVT == MVT::i32)
14773 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14774 else
14775 Opcode = RISCVISD::TH_LDD;
14776
14777 SDValue Res = DAG.getMemIntrinsicNode(
14778 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14779 {LSNode1->getChain(), BasePtr,
14780 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14781 NewMemVT, NewMMO);
14782
14783 SDValue Node1 =
14784 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14785 SDValue Node2 =
14786 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14787
14788 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14789 return Node1;
14790 } else {
14791 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14792
14793 SDValue Res = DAG.getMemIntrinsicNode(
14794 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14795 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14796 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14797 NewMemVT, NewMMO);
14798
14799 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14800 return Res;
14801 }
14802}
14803
14804// Try to combine two adjacent loads/stores to a single pair instruction from
14805// the XTHeadMemPair vendor extension.
14808 SelectionDAG &DAG = DCI.DAG;
14810 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14811
14812 // Target does not support load/store pair.
14813 if (!Subtarget.hasVendorXTHeadMemPair())
14814 return SDValue();
14815
14816 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14817 EVT MemVT = LSNode1->getMemoryVT();
14818 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14819
14820 // No volatile, indexed or atomic loads/stores.
14821 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14822 return SDValue();
14823
14824 // Function to get a base + constant representation from a memory value.
14825 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14826 if (Ptr->getOpcode() == ISD::ADD)
14827 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14828 return {Ptr->getOperand(0), C1->getZExtValue()};
14829 return {Ptr, 0};
14830 };
14831
14832 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14833
14834 SDValue Chain = N->getOperand(0);
14835 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14836 UI != UE; ++UI) {
14837 SDUse &Use = UI.getUse();
14838 if (Use.getUser() != N && Use.getResNo() == 0 &&
14839 Use.getUser()->getOpcode() == N->getOpcode()) {
14840 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14841
14842 // No volatile, indexed or atomic loads/stores.
14843 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14844 continue;
14845
14846 // Check if LSNode1 and LSNode2 have the same type and extension.
14847 if (LSNode1->getOpcode() == ISD::LOAD)
14848 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14849 cast<LoadSDNode>(LSNode1)->getExtensionType())
14850 continue;
14851
14852 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14853 continue;
14854
14855 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14856
14857 // Check if the base pointer is the same for both instruction.
14858 if (Base1 != Base2)
14859 continue;
14860
14861 // Check if the offsets match the XTHeadMemPair encoding contraints.
14862 bool Valid = false;
14863 if (MemVT == MVT::i32) {
14864 // Check for adjacent i32 values and a 2-bit index.
14865 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14866 Valid = true;
14867 } else if (MemVT == MVT::i64) {
14868 // Check for adjacent i64 values and a 2-bit index.
14869 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14870 Valid = true;
14871 }
14872
14873 if (!Valid)
14874 continue;
14875
14876 // Try to combine.
14877 if (SDValue Res =
14878 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14879 return Res;
14880 }
14881 }
14882
14883 return SDValue();
14884}
14885
14886// Fold
14887// (fp_to_int (froundeven X)) -> fcvt X, rne
14888// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14889// (fp_to_int (ffloor X)) -> fcvt X, rdn
14890// (fp_to_int (fceil X)) -> fcvt X, rup
14891// (fp_to_int (fround X)) -> fcvt X, rmm
14892// (fp_to_int (frint X)) -> fcvt X
14895 const RISCVSubtarget &Subtarget) {
14896 SelectionDAG &DAG = DCI.DAG;
14897 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14898 MVT XLenVT = Subtarget.getXLenVT();
14899
14900 SDValue Src = N->getOperand(0);
14901
14902 // Don't do this for strict-fp Src.
14903 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14904 return SDValue();
14905
14906 // Ensure the FP type is legal.
14907 if (!TLI.isTypeLegal(Src.getValueType()))
14908 return SDValue();
14909
14910 // Don't do this for f16 with Zfhmin and not Zfh.
14911 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14912 return SDValue();
14913
14914 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14915 // If the result is invalid, we didn't find a foldable instruction.
14916 if (FRM == RISCVFPRndMode::Invalid)
14917 return SDValue();
14918
14919 SDLoc DL(N);
14920 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14921 EVT VT = N->getValueType(0);
14922
14923 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14924 MVT SrcVT = Src.getSimpleValueType();
14925 MVT SrcContainerVT = SrcVT;
14926 MVT ContainerVT = VT.getSimpleVT();
14927 SDValue XVal = Src.getOperand(0);
14928
14929 // For widening and narrowing conversions we just combine it into a
14930 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14931 // end up getting lowered to their appropriate pseudo instructions based on
14932 // their operand types
14933 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14934 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14935 return SDValue();
14936
14937 // Make fixed-length vectors scalable first
14938 if (SrcVT.isFixedLengthVector()) {
14939 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14940 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14941 ContainerVT =
14942 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14943 }
14944
14945 auto [Mask, VL] =
14946 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14947
14948 SDValue FpToInt;
14949 if (FRM == RISCVFPRndMode::RTZ) {
14950 // Use the dedicated trunc static rounding mode if we're truncating so we
14951 // don't need to generate calls to fsrmi/fsrm
14952 unsigned Opc =
14954 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14955 } else if (FRM == RISCVFPRndMode::DYN) {
14956 unsigned Opc =
14958 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14959 } else {
14960 unsigned Opc =
14962 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14963 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14964 }
14965
14966 // If converted from fixed-length to scalable, convert back
14967 if (VT.isFixedLengthVector())
14968 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14969
14970 return FpToInt;
14971 }
14972
14973 // Only handle XLen or i32 types. Other types narrower than XLen will
14974 // eventually be legalized to XLenVT.
14975 if (VT != MVT::i32 && VT != XLenVT)
14976 return SDValue();
14977
14978 unsigned Opc;
14979 if (VT == XLenVT)
14980 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14981 else
14983
14984 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14985 DAG.getTargetConstant(FRM, DL, XLenVT));
14986 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14987}
14988
14989// Fold
14990// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14991// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14992// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14993// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14994// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14995// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14998 const RISCVSubtarget &Subtarget) {
14999 SelectionDAG &DAG = DCI.DAG;
15000 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15001 MVT XLenVT = Subtarget.getXLenVT();
15002
15003 // Only handle XLen types. Other types narrower than XLen will eventually be
15004 // legalized to XLenVT.
15005 EVT DstVT = N->getValueType(0);
15006 if (DstVT != XLenVT)
15007 return SDValue();
15008
15009 SDValue Src = N->getOperand(0);
15010
15011 // Don't do this for strict-fp Src.
15012 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15013 return SDValue();
15014
15015 // Ensure the FP type is also legal.
15016 if (!TLI.isTypeLegal(Src.getValueType()))
15017 return SDValue();
15018
15019 // Don't do this for f16 with Zfhmin and not Zfh.
15020 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15021 return SDValue();
15022
15023 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15024
15025 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15026 if (FRM == RISCVFPRndMode::Invalid)
15027 return SDValue();
15028
15029 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15030
15031 unsigned Opc;
15032 if (SatVT == DstVT)
15033 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15034 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15036 else
15037 return SDValue();
15038 // FIXME: Support other SatVTs by clamping before or after the conversion.
15039
15040 Src = Src.getOperand(0);
15041
15042 SDLoc DL(N);
15043 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15044 DAG.getTargetConstant(FRM, DL, XLenVT));
15045
15046 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15047 // extend.
15048 if (Opc == RISCVISD::FCVT_WU_RV64)
15049 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15050
15051 // RISC-V FP-to-int conversions saturate to the destination register size, but
15052 // don't produce 0 for nan.
15053 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15054 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15055}
15056
15057// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15058// smaller than XLenVT.
15060 const RISCVSubtarget &Subtarget) {
15061 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15062
15063 SDValue Src = N->getOperand(0);
15064 if (Src.getOpcode() != ISD::BSWAP)
15065 return SDValue();
15066
15067 EVT VT = N->getValueType(0);
15068 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15069 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15070 return SDValue();
15071
15072 SDLoc DL(N);
15073 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15074}
15075
15076// Convert from one FMA opcode to another based on whether we are negating the
15077// multiply result and/or the accumulator.
15078// NOTE: Only supports RVV operations with VL.
15079static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15080 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15081 if (NegMul) {
15082 // clang-format off
15083 switch (Opcode) {
15084 default: llvm_unreachable("Unexpected opcode");
15085 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15086 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15087 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15088 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15093 }
15094 // clang-format on
15095 }
15096
15097 // Negating the accumulator changes ADD<->SUB.
15098 if (NegAcc) {
15099 // clang-format off
15100 switch (Opcode) {
15101 default: llvm_unreachable("Unexpected opcode");
15102 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15103 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15104 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15105 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15110 }
15111 // clang-format on
15112 }
15113
15114 return Opcode;
15115}
15116
15118 // Fold FNEG_VL into FMA opcodes.
15119 // The first operand of strict-fp is chain.
15120 unsigned Offset = N->isTargetStrictFPOpcode();
15121 SDValue A = N->getOperand(0 + Offset);
15122 SDValue B = N->getOperand(1 + Offset);
15123 SDValue C = N->getOperand(2 + Offset);
15124 SDValue Mask = N->getOperand(3 + Offset);
15125 SDValue VL = N->getOperand(4 + Offset);
15126
15127 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15128 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15129 V.getOperand(2) == VL) {
15130 // Return the negated input.
15131 V = V.getOperand(0);
15132 return true;
15133 }
15134
15135 return false;
15136 };
15137
15138 bool NegA = invertIfNegative(A);
15139 bool NegB = invertIfNegative(B);
15140 bool NegC = invertIfNegative(C);
15141
15142 // If no operands are negated, we're done.
15143 if (!NegA && !NegB && !NegC)
15144 return SDValue();
15145
15146 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15147 if (N->isTargetStrictFPOpcode())
15148 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15149 {N->getOperand(0), A, B, C, Mask, VL});
15150 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15151 VL);
15152}
15153
15155 const RISCVSubtarget &Subtarget) {
15157 return V;
15158
15159 if (N->getValueType(0).isScalableVector() &&
15160 N->getValueType(0).getVectorElementType() == MVT::f32 &&
15161 (Subtarget.hasVInstructionsF16Minimal() &&
15162 !Subtarget.hasVInstructionsF16())) {
15163 return SDValue();
15164 }
15165
15166 // FIXME: Ignore strict opcodes for now.
15167 if (N->isTargetStrictFPOpcode())
15168 return SDValue();
15169
15170 // Try to form widening FMA.
15171 SDValue Op0 = N->getOperand(0);
15172 SDValue Op1 = N->getOperand(1);
15173 SDValue Mask = N->getOperand(3);
15174 SDValue VL = N->getOperand(4);
15175
15176 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
15178 return SDValue();
15179
15180 // TODO: Refactor to handle more complex cases similar to
15181 // combineBinOp_VLToVWBinOp_VL.
15182 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
15183 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
15184 return SDValue();
15185
15186 // Check the mask and VL are the same.
15187 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15188 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15189 return SDValue();
15190
15191 unsigned NewOpc;
15192 switch (N->getOpcode()) {
15193 default:
15194 llvm_unreachable("Unexpected opcode");
15196 NewOpc = RISCVISD::VFWMADD_VL;
15197 break;
15199 NewOpc = RISCVISD::VFWNMSUB_VL;
15200 break;
15202 NewOpc = RISCVISD::VFWNMADD_VL;
15203 break;
15205 NewOpc = RISCVISD::VFWMSUB_VL;
15206 break;
15207 }
15208
15209 Op0 = Op0.getOperand(0);
15210 Op1 = Op1.getOperand(0);
15211
15212 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15213 N->getOperand(2), Mask, VL);
15214}
15215
15217 const RISCVSubtarget &Subtarget) {
15218 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15219
15220 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15221 return SDValue();
15222
15223 if (!isa<ConstantSDNode>(N->getOperand(1)))
15224 return SDValue();
15225 uint64_t ShAmt = N->getConstantOperandVal(1);
15226 if (ShAmt > 32)
15227 return SDValue();
15228
15229 SDValue N0 = N->getOperand(0);
15230
15231 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15232 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15233 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15234 if (ShAmt < 32 &&
15235 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15236 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15237 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15238 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15239 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15240 if (LShAmt < 32) {
15241 SDLoc ShlDL(N0.getOperand(0));
15242 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15243 N0.getOperand(0).getOperand(0),
15244 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15245 SDLoc DL(N);
15246 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15247 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15248 }
15249 }
15250
15251 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15252 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15253 //
15254 // Also try these folds where an add or sub is in the middle.
15255 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15256 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15257 SDValue Shl;
15258 ConstantSDNode *AddC = nullptr;
15259
15260 // We might have an ADD or SUB between the SRA and SHL.
15261 bool IsAdd = N0.getOpcode() == ISD::ADD;
15262 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15263 // Other operand needs to be a constant we can modify.
15264 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15265 if (!AddC)
15266 return SDValue();
15267
15268 // AddC needs to have at least 32 trailing zeros.
15269 if (AddC->getAPIntValue().countr_zero() < 32)
15270 return SDValue();
15271
15272 // All users should be a shift by constant less than or equal to 32. This
15273 // ensures we'll do this optimization for each of them to produce an
15274 // add/sub+sext_inreg they can all share.
15275 for (SDNode *U : N0->uses()) {
15276 if (U->getOpcode() != ISD::SRA ||
15277 !isa<ConstantSDNode>(U->getOperand(1)) ||
15278 U->getConstantOperandVal(1) > 32)
15279 return SDValue();
15280 }
15281
15282 Shl = N0.getOperand(IsAdd ? 0 : 1);
15283 } else {
15284 // Not an ADD or SUB.
15285 Shl = N0;
15286 }
15287
15288 // Look for a shift left by 32.
15289 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15290 Shl.getConstantOperandVal(1) != 32)
15291 return SDValue();
15292
15293 // We if we didn't look through an add/sub, then the shl should have one use.
15294 // If we did look through an add/sub, the sext_inreg we create is free so
15295 // we're only creating 2 new instructions. It's enough to only remove the
15296 // original sra+add/sub.
15297 if (!AddC && !Shl.hasOneUse())
15298 return SDValue();
15299
15300 SDLoc DL(N);
15301 SDValue In = Shl.getOperand(0);
15302
15303 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15304 // constant.
15305 if (AddC) {
15306 SDValue ShiftedAddC =
15307 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15308 if (IsAdd)
15309 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15310 else
15311 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15312 }
15313
15314 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15315 DAG.getValueType(MVT::i32));
15316 if (ShAmt == 32)
15317 return SExt;
15318
15319 return DAG.getNode(
15320 ISD::SHL, DL, MVT::i64, SExt,
15321 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15322}
15323
15324// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15325// the result is used as the conditon of a br_cc or select_cc we can invert,
15326// inverting the setcc is free, and Z is 0/1. Caller will invert the
15327// br_cc/select_cc.
15329 bool IsAnd = Cond.getOpcode() == ISD::AND;
15330 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15331 return SDValue();
15332
15333 if (!Cond.hasOneUse())
15334 return SDValue();
15335
15336 SDValue Setcc = Cond.getOperand(0);
15337 SDValue Xor = Cond.getOperand(1);
15338 // Canonicalize setcc to LHS.
15339 if (Setcc.getOpcode() != ISD::SETCC)
15340 std::swap(Setcc, Xor);
15341 // LHS should be a setcc and RHS should be an xor.
15342 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15343 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15344 return SDValue();
15345
15346 // If the condition is an And, SimplifyDemandedBits may have changed
15347 // (xor Z, 1) to (not Z).
15348 SDValue Xor1 = Xor.getOperand(1);
15349 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15350 return SDValue();
15351
15352 EVT VT = Cond.getValueType();
15353 SDValue Xor0 = Xor.getOperand(0);
15354
15355 // The LHS of the xor needs to be 0/1.
15357 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15358 return SDValue();
15359
15360 // We can only invert integer setccs.
15361 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15362 if (!SetCCOpVT.isScalarInteger())
15363 return SDValue();
15364
15365 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15366 if (ISD::isIntEqualitySetCC(CCVal)) {
15367 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15368 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15369 Setcc.getOperand(1), CCVal);
15370 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15371 // Invert (setlt 0, X) by converting to (setlt X, 1).
15372 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15373 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15374 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15375 // (setlt X, 1) by converting to (setlt 0, X).
15376 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15377 DAG.getConstant(0, SDLoc(Setcc), VT),
15378 Setcc.getOperand(0), CCVal);
15379 } else
15380 return SDValue();
15381
15382 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15383 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15384}
15385
15386// Perform common combines for BR_CC and SELECT_CC condtions.
15387static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15388 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15389 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15390
15391 // As far as arithmetic right shift always saves the sign,
15392 // shift can be omitted.
15393 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15394 // setge (sra X, N), 0 -> setge X, 0
15395 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15396 LHS.getOpcode() == ISD::SRA) {
15397 LHS = LHS.getOperand(0);
15398 return true;
15399 }
15400
15401 if (!ISD::isIntEqualitySetCC(CCVal))
15402 return false;
15403
15404 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15405 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15406 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15407 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15408 // If we're looking for eq 0 instead of ne 0, we need to invert the
15409 // condition.
15410 bool Invert = CCVal == ISD::SETEQ;
15411 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15412 if (Invert)
15413 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15414
15415 RHS = LHS.getOperand(1);
15416 LHS = LHS.getOperand(0);
15417 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15418
15419 CC = DAG.getCondCode(CCVal);
15420 return true;
15421 }
15422
15423 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15424 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15425 RHS = LHS.getOperand(1);
15426 LHS = LHS.getOperand(0);
15427 return true;
15428 }
15429
15430 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15431 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15432 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15433 SDValue LHS0 = LHS.getOperand(0);
15434 if (LHS0.getOpcode() == ISD::AND &&
15435 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15436 uint64_t Mask = LHS0.getConstantOperandVal(1);
15437 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15438 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15439 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15440 CC = DAG.getCondCode(CCVal);
15441
15442 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15443 LHS = LHS0.getOperand(0);
15444 if (ShAmt != 0)
15445 LHS =
15446 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15447 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15448 return true;
15449 }
15450 }
15451 }
15452
15453 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15454 // This can occur when legalizing some floating point comparisons.
15455 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15456 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15457 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15458 CC = DAG.getCondCode(CCVal);
15459 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15460 return true;
15461 }
15462
15463 if (isNullConstant(RHS)) {
15464 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15465 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15466 CC = DAG.getCondCode(CCVal);
15467 LHS = NewCond;
15468 return true;
15469 }
15470 }
15471
15472 return false;
15473}
15474
15475// Fold
15476// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15477// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15478// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15479// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15481 SDValue TrueVal, SDValue FalseVal,
15482 bool Swapped) {
15483 bool Commutative = true;
15484 unsigned Opc = TrueVal.getOpcode();
15485 switch (Opc) {
15486 default:
15487 return SDValue();
15488 case ISD::SHL:
15489 case ISD::SRA:
15490 case ISD::SRL:
15491 case ISD::SUB:
15492 Commutative = false;
15493 break;
15494 case ISD::ADD:
15495 case ISD::OR:
15496 case ISD::XOR:
15497 break;
15498 }
15499
15500 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15501 return SDValue();
15502
15503 unsigned OpToFold;
15504 if (FalseVal == TrueVal.getOperand(0))
15505 OpToFold = 0;
15506 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15507 OpToFold = 1;
15508 else
15509 return SDValue();
15510
15511 EVT VT = N->getValueType(0);
15512 SDLoc DL(N);
15513 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15514 EVT OtherOpVT = OtherOp.getValueType();
15515 SDValue IdentityOperand =
15516 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15517 if (!Commutative)
15518 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15519 assert(IdentityOperand && "No identity operand!");
15520
15521 if (Swapped)
15522 std::swap(OtherOp, IdentityOperand);
15523 SDValue NewSel =
15524 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15525 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15526}
15527
15528// This tries to get rid of `select` and `icmp` that are being used to handle
15529// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15531 SDValue Cond = N->getOperand(0);
15532
15533 // This represents either CTTZ or CTLZ instruction.
15534 SDValue CountZeroes;
15535
15536 SDValue ValOnZero;
15537
15538 if (Cond.getOpcode() != ISD::SETCC)
15539 return SDValue();
15540
15541 if (!isNullConstant(Cond->getOperand(1)))
15542 return SDValue();
15543
15544 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15545 if (CCVal == ISD::CondCode::SETEQ) {
15546 CountZeroes = N->getOperand(2);
15547 ValOnZero = N->getOperand(1);
15548 } else if (CCVal == ISD::CondCode::SETNE) {
15549 CountZeroes = N->getOperand(1);
15550 ValOnZero = N->getOperand(2);
15551 } else {
15552 return SDValue();
15553 }
15554
15555 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15556 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15557 CountZeroes = CountZeroes.getOperand(0);
15558
15559 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15560 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15561 CountZeroes.getOpcode() != ISD::CTLZ &&
15562 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15563 return SDValue();
15564
15565 if (!isNullConstant(ValOnZero))
15566 return SDValue();
15567
15568 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15569 if (Cond->getOperand(0) != CountZeroesArgument)
15570 return SDValue();
15571
15572 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15573 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15574 CountZeroes.getValueType(), CountZeroesArgument);
15575 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15576 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15577 CountZeroes.getValueType(), CountZeroesArgument);
15578 }
15579
15580 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15581 SDValue BitWidthMinusOne =
15582 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15583
15584 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15585 CountZeroes, BitWidthMinusOne);
15586 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15587}
15588
15590 const RISCVSubtarget &Subtarget) {
15591 SDValue Cond = N->getOperand(0);
15592 SDValue True = N->getOperand(1);
15593 SDValue False = N->getOperand(2);
15594 SDLoc DL(N);
15595 EVT VT = N->getValueType(0);
15596 EVT CondVT = Cond.getValueType();
15597
15598 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15599 return SDValue();
15600
15601 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15602 // BEXTI, where C is power of 2.
15603 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15604 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15605 SDValue LHS = Cond.getOperand(0);
15606 SDValue RHS = Cond.getOperand(1);
15607 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15608 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15609 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15610 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15611 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15612 return DAG.getSelect(DL, VT,
15613 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15614 False, True);
15615 }
15616 }
15617 return SDValue();
15618}
15619
15621 const RISCVSubtarget &Subtarget) {
15622 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15623 return Folded;
15624
15625 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15626 return V;
15627
15628 if (Subtarget.hasConditionalMoveFusion())
15629 return SDValue();
15630
15631 SDValue TrueVal = N->getOperand(1);
15632 SDValue FalseVal = N->getOperand(2);
15633 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15634 return V;
15635 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15636}
15637
15638/// If we have a build_vector where each lane is binop X, C, where C
15639/// is a constant (but not necessarily the same constant on all lanes),
15640/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15641/// We assume that materializing a constant build vector will be no more
15642/// expensive that performing O(n) binops.
15644 const RISCVSubtarget &Subtarget,
15645 const RISCVTargetLowering &TLI) {
15646 SDLoc DL(N);
15647 EVT VT = N->getValueType(0);
15648
15649 assert(!VT.isScalableVector() && "unexpected build vector");
15650
15651 if (VT.getVectorNumElements() == 1)
15652 return SDValue();
15653
15654 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15655 if (!TLI.isBinOp(Opcode))
15656 return SDValue();
15657
15658 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15659 return SDValue();
15660
15661 // This BUILD_VECTOR involves an implicit truncation, and sinking
15662 // truncates through binops is non-trivial.
15663 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15664 return SDValue();
15665
15666 SmallVector<SDValue> LHSOps;
15667 SmallVector<SDValue> RHSOps;
15668 for (SDValue Op : N->ops()) {
15669 if (Op.isUndef()) {
15670 // We can't form a divide or remainder from undef.
15671 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15672 return SDValue();
15673
15674 LHSOps.push_back(Op);
15675 RHSOps.push_back(Op);
15676 continue;
15677 }
15678
15679 // TODO: We can handle operations which have an neutral rhs value
15680 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15681 // of profit in a more explicit manner.
15682 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15683 return SDValue();
15684
15685 LHSOps.push_back(Op.getOperand(0));
15686 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15687 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15688 return SDValue();
15689 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15690 // have different LHS and RHS types.
15691 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15692 return SDValue();
15693
15694 RHSOps.push_back(Op.getOperand(1));
15695 }
15696
15697 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15698 DAG.getBuildVector(VT, DL, RHSOps));
15699}
15700
15702 const RISCVSubtarget &Subtarget,
15703 const RISCVTargetLowering &TLI) {
15704 SDValue InVec = N->getOperand(0);
15705 SDValue InVal = N->getOperand(1);
15706 SDValue EltNo = N->getOperand(2);
15707 SDLoc DL(N);
15708
15709 EVT VT = InVec.getValueType();
15710 if (VT.isScalableVector())
15711 return SDValue();
15712
15713 if (!InVec.hasOneUse())
15714 return SDValue();
15715
15716 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15717 // move the insert_vector_elts into the arms of the binop. Note that
15718 // the new RHS must be a constant.
15719 const unsigned InVecOpcode = InVec->getOpcode();
15720 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15721 InVal.hasOneUse()) {
15722 SDValue InVecLHS = InVec->getOperand(0);
15723 SDValue InVecRHS = InVec->getOperand(1);
15724 SDValue InValLHS = InVal->getOperand(0);
15725 SDValue InValRHS = InVal->getOperand(1);
15726
15728 return SDValue();
15729 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15730 return SDValue();
15731 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15732 // have different LHS and RHS types.
15733 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15734 return SDValue();
15736 InVecLHS, InValLHS, EltNo);
15738 InVecRHS, InValRHS, EltNo);
15739 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15740 }
15741
15742 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15743 // move the insert_vector_elt to the source operand of the concat_vector.
15744 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15745 return SDValue();
15746
15747 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15748 if (!IndexC)
15749 return SDValue();
15750 unsigned Elt = IndexC->getZExtValue();
15751
15752 EVT ConcatVT = InVec.getOperand(0).getValueType();
15753 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15754 return SDValue();
15755 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15756 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15757
15758 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15759 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15760 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15761 ConcatOp, InVal, NewIdx);
15762
15763 SmallVector<SDValue> ConcatOps;
15764 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15765 ConcatOps[ConcatOpIdx] = ConcatOp;
15766 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15767}
15768
15769// If we're concatenating a series of vector loads like
15770// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15771// Then we can turn this into a strided load by widening the vector elements
15772// vlse32 p, stride=n
15774 const RISCVSubtarget &Subtarget,
15775 const RISCVTargetLowering &TLI) {
15776 SDLoc DL(N);
15777 EVT VT = N->getValueType(0);
15778
15779 // Only perform this combine on legal MVTs.
15780 if (!TLI.isTypeLegal(VT))
15781 return SDValue();
15782
15783 // TODO: Potentially extend this to scalable vectors
15784 if (VT.isScalableVector())
15785 return SDValue();
15786
15787 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15788 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15789 !SDValue(BaseLd, 0).hasOneUse())
15790 return SDValue();
15791
15792 EVT BaseLdVT = BaseLd->getValueType(0);
15793
15794 // Go through the loads and check that they're strided
15796 Lds.push_back(BaseLd);
15797 Align Align = BaseLd->getAlign();
15798 for (SDValue Op : N->ops().drop_front()) {
15799 auto *Ld = dyn_cast<LoadSDNode>(Op);
15800 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15801 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15802 Ld->getValueType(0) != BaseLdVT)
15803 return SDValue();
15804
15805 Lds.push_back(Ld);
15806
15807 // The common alignment is the most restrictive (smallest) of all the loads
15808 Align = std::min(Align, Ld->getAlign());
15809 }
15810
15811 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15812 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15813 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15814 // If the load ptrs can be decomposed into a common (Base + Index) with a
15815 // common constant stride, then return the constant stride.
15816 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15817 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15818 if (BIO1.equalBaseIndex(BIO2, DAG))
15819 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15820
15821 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15822 SDValue P1 = Ld1->getBasePtr();
15823 SDValue P2 = Ld2->getBasePtr();
15824 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15825 return {{P2.getOperand(1), false}};
15826 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15827 return {{P1.getOperand(1), true}};
15828
15829 return std::nullopt;
15830 };
15831
15832 // Get the distance between the first and second loads
15833 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15834 if (!BaseDiff)
15835 return SDValue();
15836
15837 // Check all the loads are the same distance apart
15838 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15839 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15840 return SDValue();
15841
15842 // TODO: At this point, we've successfully matched a generalized gather
15843 // load. Maybe we should emit that, and then move the specialized
15844 // matchers above and below into a DAG combine?
15845
15846 // Get the widened scalar type, e.g. v4i8 -> i64
15847 unsigned WideScalarBitWidth =
15848 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15849 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15850
15851 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15852 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15853 if (!TLI.isTypeLegal(WideVecVT))
15854 return SDValue();
15855
15856 // Check that the operation is legal
15857 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15858 return SDValue();
15859
15860 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15861 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15862 ? std::get<SDValue>(StrideVariant)
15863 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15864 Lds[0]->getOffset().getValueType());
15865 if (MustNegateStride)
15866 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15867
15868 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15869 SDValue IntID =
15870 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15871 Subtarget.getXLenVT());
15872
15873 SDValue AllOneMask =
15874 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15875 DAG.getConstant(1, DL, MVT::i1));
15876
15877 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15878 BaseLd->getBasePtr(), Stride, AllOneMask};
15879
15880 uint64_t MemSize;
15881 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15882 ConstStride && ConstStride->getSExtValue() >= 0)
15883 // total size = (elsize * n) + (stride - elsize) * (n-1)
15884 // = elsize + stride * (n-1)
15885 MemSize = WideScalarVT.getSizeInBits() +
15886 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15887 else
15888 // If Stride isn't constant, then we can't know how much it will load
15890
15892 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15893 Align);
15894
15895 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15896 Ops, WideVecVT, MMO);
15897 for (SDValue Ld : N->ops())
15898 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15899
15900 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15901}
15902
15904 const RISCVSubtarget &Subtarget) {
15905
15906 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15907
15908 if (N->getValueType(0).isFixedLengthVector())
15909 return SDValue();
15910
15911 SDValue Addend = N->getOperand(0);
15912 SDValue MulOp = N->getOperand(1);
15913
15914 if (N->getOpcode() == RISCVISD::ADD_VL) {
15915 SDValue AddMergeOp = N->getOperand(2);
15916 if (!AddMergeOp.isUndef())
15917 return SDValue();
15918 }
15919
15920 auto IsVWMulOpc = [](unsigned Opc) {
15921 switch (Opc) {
15922 case RISCVISD::VWMUL_VL:
15925 return true;
15926 default:
15927 return false;
15928 }
15929 };
15930
15931 if (!IsVWMulOpc(MulOp.getOpcode()))
15932 std::swap(Addend, MulOp);
15933
15934 if (!IsVWMulOpc(MulOp.getOpcode()))
15935 return SDValue();
15936
15937 SDValue MulMergeOp = MulOp.getOperand(2);
15938
15939 if (!MulMergeOp.isUndef())
15940 return SDValue();
15941
15942 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15943 const RISCVSubtarget &Subtarget) {
15944 if (N->getOpcode() == ISD::ADD) {
15945 SDLoc DL(N);
15946 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15947 Subtarget);
15948 }
15949 return std::make_pair(N->getOperand(3), N->getOperand(4));
15950 }(N, DAG, Subtarget);
15951
15952 SDValue MulMask = MulOp.getOperand(3);
15953 SDValue MulVL = MulOp.getOperand(4);
15954
15955 if (AddMask != MulMask || AddVL != MulVL)
15956 return SDValue();
15957
15958 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15959 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15960 "Unexpected opcode after VWMACC_VL");
15961 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15962 "Unexpected opcode after VWMACC_VL!");
15963 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15964 "Unexpected opcode after VWMUL_VL!");
15965 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15966 "Unexpected opcode after VWMUL_VL!");
15967
15968 SDLoc DL(N);
15969 EVT VT = N->getValueType(0);
15970 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15971 AddVL};
15972 return DAG.getNode(Opc, DL, VT, Ops);
15973}
15974
15976 ISD::MemIndexType &IndexType,
15978 if (!DCI.isBeforeLegalize())
15979 return false;
15980
15981 SelectionDAG &DAG = DCI.DAG;
15982 const MVT XLenVT =
15983 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15984
15985 const EVT IndexVT = Index.getValueType();
15986
15987 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15988 // mode, so anything else must be manually legalized.
15989 if (!isIndexTypeSigned(IndexType))
15990 return false;
15991
15992 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15993 // Any index legalization should first promote to XLenVT, so we don't lose
15994 // bits when scaling. This may create an illegal index type so we let
15995 // LLVM's legalization take care of the splitting.
15996 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15998 IndexVT.changeVectorElementType(XLenVT), Index);
15999 }
16000 IndexType = ISD::UNSIGNED_SCALED;
16001 return true;
16002}
16003
16004/// Match the index vector of a scatter or gather node as the shuffle mask
16005/// which performs the rearrangement if possible. Will only match if
16006/// all lanes are touched, and thus replacing the scatter or gather with
16007/// a unit strided access and shuffle is legal.
16009 SmallVector<int> &ShuffleMask) {
16010 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16011 return false;
16013 return false;
16014
16015 const unsigned ElementSize = VT.getScalarStoreSize();
16016 const unsigned NumElems = VT.getVectorNumElements();
16017
16018 // Create the shuffle mask and check all bits active
16019 assert(ShuffleMask.empty());
16020 BitVector ActiveLanes(NumElems);
16021 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16022 // TODO: We've found an active bit of UB, and could be
16023 // more aggressive here if desired.
16024 if (Index->getOperand(i)->isUndef())
16025 return false;
16026 uint64_t C = Index->getConstantOperandVal(i);
16027 if (C % ElementSize != 0)
16028 return false;
16029 C = C / ElementSize;
16030 if (C >= NumElems)
16031 return false;
16032 ShuffleMask.push_back(C);
16033 ActiveLanes.set(C);
16034 }
16035 return ActiveLanes.all();
16036}
16037
16038/// Match the index of a gather or scatter operation as an operation
16039/// with twice the element width and half the number of elements. This is
16040/// generally profitable (if legal) because these operations are linear
16041/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16042/// come out ahead.
16044 Align BaseAlign, const RISCVSubtarget &ST) {
16045 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16046 return false;
16048 return false;
16049
16050 // Attempt a doubling. If we can use a element type 4x or 8x in
16051 // size, this will happen via multiply iterations of the transform.
16052 const unsigned NumElems = VT.getVectorNumElements();
16053 if (NumElems % 2 != 0)
16054 return false;
16055
16056 const unsigned ElementSize = VT.getScalarStoreSize();
16057 const unsigned WiderElementSize = ElementSize * 2;
16058 if (WiderElementSize > ST.getELen()/8)
16059 return false;
16060
16061 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16062 return false;
16063
16064 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16065 // TODO: We've found an active bit of UB, and could be
16066 // more aggressive here if desired.
16067 if (Index->getOperand(i)->isUndef())
16068 return false;
16069 // TODO: This offset check is too strict if we support fully
16070 // misaligned memory operations.
16071 uint64_t C = Index->getConstantOperandVal(i);
16072 if (i % 2 == 0) {
16073 if (C % WiderElementSize != 0)
16074 return false;
16075 continue;
16076 }
16077 uint64_t Last = Index->getConstantOperandVal(i-1);
16078 if (C != Last + ElementSize)
16079 return false;
16080 }
16081 return true;
16082}
16083
16084
16086 DAGCombinerInfo &DCI) const {
16087 SelectionDAG &DAG = DCI.DAG;
16088 const MVT XLenVT = Subtarget.getXLenVT();
16089 SDLoc DL(N);
16090
16091 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16092 // bits are demanded. N will be added to the Worklist if it was not deleted.
16093 // Caller should return SDValue(N, 0) if this returns true.
16094 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16095 SDValue Op = N->getOperand(OpNo);
16096 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16097 if (!SimplifyDemandedBits(Op, Mask, DCI))
16098 return false;
16099
16100 if (N->getOpcode() != ISD::DELETED_NODE)
16101 DCI.AddToWorklist(N);
16102 return true;
16103 };
16104
16105 switch (N->getOpcode()) {
16106 default:
16107 break;
16108 case RISCVISD::SplitF64: {
16109 SDValue Op0 = N->getOperand(0);
16110 // If the input to SplitF64 is just BuildPairF64 then the operation is
16111 // redundant. Instead, use BuildPairF64's operands directly.
16112 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16113 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16114
16115 if (Op0->isUndef()) {
16116 SDValue Lo = DAG.getUNDEF(MVT::i32);
16117 SDValue Hi = DAG.getUNDEF(MVT::i32);
16118 return DCI.CombineTo(N, Lo, Hi);
16119 }
16120
16121 // It's cheaper to materialise two 32-bit integers than to load a double
16122 // from the constant pool and transfer it to integer registers through the
16123 // stack.
16124 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16125 APInt V = C->getValueAPF().bitcastToAPInt();
16126 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16127 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16128 return DCI.CombineTo(N, Lo, Hi);
16129 }
16130
16131 // This is a target-specific version of a DAGCombine performed in
16132 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16133 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16134 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16135 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16136 !Op0.getNode()->hasOneUse())
16137 break;
16138 SDValue NewSplitF64 =
16139 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16140 Op0.getOperand(0));
16141 SDValue Lo = NewSplitF64.getValue(0);
16142 SDValue Hi = NewSplitF64.getValue(1);
16143 APInt SignBit = APInt::getSignMask(32);
16144 if (Op0.getOpcode() == ISD::FNEG) {
16145 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16146 DAG.getConstant(SignBit, DL, MVT::i32));
16147 return DCI.CombineTo(N, Lo, NewHi);
16148 }
16149 assert(Op0.getOpcode() == ISD::FABS);
16150 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16151 DAG.getConstant(~SignBit, DL, MVT::i32));
16152 return DCI.CombineTo(N, Lo, NewHi);
16153 }
16154 case RISCVISD::SLLW:
16155 case RISCVISD::SRAW:
16156 case RISCVISD::SRLW:
16157 case RISCVISD::RORW:
16158 case RISCVISD::ROLW: {
16159 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16160 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16161 SimplifyDemandedLowBitsHelper(1, 5))
16162 return SDValue(N, 0);
16163
16164 break;
16165 }
16166 case RISCVISD::CLZW:
16167 case RISCVISD::CTZW: {
16168 // Only the lower 32 bits of the first operand are read
16169 if (SimplifyDemandedLowBitsHelper(0, 32))
16170 return SDValue(N, 0);
16171 break;
16172 }
16174 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16175 // conversion is unnecessary and can be replaced with the
16176 // FMV_X_ANYEXTW_RV64 operand.
16177 SDValue Op0 = N->getOperand(0);
16179 return Op0.getOperand(0);
16180 break;
16181 }
16184 SDLoc DL(N);
16185 SDValue Op0 = N->getOperand(0);
16186 MVT VT = N->getSimpleValueType(0);
16187 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16188 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16189 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16190 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16191 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16192 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16193 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16194 assert(Op0.getOperand(0).getValueType() == VT &&
16195 "Unexpected value type!");
16196 return Op0.getOperand(0);
16197 }
16198
16199 // This is a target-specific version of a DAGCombine performed in
16200 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16201 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16202 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16203 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16204 !Op0.getNode()->hasOneUse())
16205 break;
16206 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16207 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16208 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16209 if (Op0.getOpcode() == ISD::FNEG)
16210 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16211 DAG.getConstant(SignBit, DL, VT));
16212
16213 assert(Op0.getOpcode() == ISD::FABS);
16214 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16215 DAG.getConstant(~SignBit, DL, VT));
16216 }
16217 case ISD::ABS: {
16218 EVT VT = N->getValueType(0);
16219 SDValue N0 = N->getOperand(0);
16220 // abs (sext) -> zext (abs)
16221 // abs (zext) -> zext (handled elsewhere)
16222 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16223 SDValue Src = N0.getOperand(0);
16224 SDLoc DL(N);
16225 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16226 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16227 }
16228 break;
16229 }
16230 case ISD::ADD: {
16231 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16232 return V;
16233 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16234 return V;
16235 return performADDCombine(N, DCI, Subtarget);
16236 }
16237 case ISD::SUB: {
16238 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16239 return V;
16240 return performSUBCombine(N, DAG, Subtarget);
16241 }
16242 case ISD::AND:
16243 return performANDCombine(N, DCI, Subtarget);
16244 case ISD::OR: {
16245 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16246 return V;
16247 return performORCombine(N, DCI, Subtarget);
16248 }
16249 case ISD::XOR:
16250 return performXORCombine(N, DAG, Subtarget);
16251 case ISD::MUL:
16252 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16253 return V;
16254 return performMULCombine(N, DAG, DCI, Subtarget);
16255 case ISD::SDIV:
16256 case ISD::UDIV:
16257 case ISD::SREM:
16258 case ISD::UREM:
16259 if (SDValue V = combineBinOpOfZExt(N, DAG))
16260 return V;
16261 break;
16262 case ISD::FADD:
16263 case ISD::UMAX:
16264 case ISD::UMIN:
16265 case ISD::SMAX:
16266 case ISD::SMIN:
16267 case ISD::FMAXNUM:
16268 case ISD::FMINNUM: {
16269 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16270 return V;
16271 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16272 return V;
16273 return SDValue();
16274 }
16275 case ISD::SETCC:
16276 return performSETCCCombine(N, DAG, Subtarget);
16278 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16279 case ISD::ZERO_EXTEND:
16280 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16281 // type legalization. This is safe because fp_to_uint produces poison if
16282 // it overflows.
16283 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16284 SDValue Src = N->getOperand(0);
16285 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16286 isTypeLegal(Src.getOperand(0).getValueType()))
16287 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16288 Src.getOperand(0));
16289 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16290 isTypeLegal(Src.getOperand(1).getValueType())) {
16291 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16292 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16293 Src.getOperand(0), Src.getOperand(1));
16294 DCI.CombineTo(N, Res);
16295 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16296 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16297 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16298 }
16299 }
16300 return SDValue();
16302 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16303 // This would be benefit for the cases where X and Y are both the same value
16304 // type of low precision vectors. Since the truncate would be lowered into
16305 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16306 // restriction, such pattern would be expanded into a series of "vsetvli"
16307 // and "vnsrl" instructions later to reach this point.
16308 auto IsTruncNode = [](SDValue V) {
16309 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16310 return false;
16311 SDValue VL = V.getOperand(2);
16312 auto *C = dyn_cast<ConstantSDNode>(VL);
16313 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16314 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16315 (isa<RegisterSDNode>(VL) &&
16316 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16317 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16318 IsVLMAXForVMSET;
16319 };
16320
16321 SDValue Op = N->getOperand(0);
16322
16323 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16324 // to distinguish such pattern.
16325 while (IsTruncNode(Op)) {
16326 if (!Op.hasOneUse())
16327 return SDValue();
16328 Op = Op.getOperand(0);
16329 }
16330
16331 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16332 SDValue N0 = Op.getOperand(0);
16333 SDValue N1 = Op.getOperand(1);
16334 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16335 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16336 SDValue N00 = N0.getOperand(0);
16337 SDValue N10 = N1.getOperand(0);
16338 if (N00.getValueType().isVector() &&
16339 N00.getValueType() == N10.getValueType() &&
16340 N->getValueType(0) == N10.getValueType()) {
16341 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16342 SDValue SMin = DAG.getNode(
16343 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16344 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16345 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16346 }
16347 }
16348 }
16349 break;
16350 }
16351 case ISD::TRUNCATE:
16352 return performTRUNCATECombine(N, DAG, Subtarget);
16353 case ISD::SELECT:
16354 return performSELECTCombine(N, DAG, Subtarget);
16356 case RISCVISD::CZERO_NEZ: {
16357 SDValue Val = N->getOperand(0);
16358 SDValue Cond = N->getOperand(1);
16359
16360 unsigned Opc = N->getOpcode();
16361
16362 // czero_eqz x, x -> x
16363 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16364 return Val;
16365
16366 unsigned InvOpc =
16368
16369 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16370 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16371 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16372 SDValue NewCond = Cond.getOperand(0);
16373 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16374 if (DAG.MaskedValueIsZero(NewCond, Mask))
16375 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16376 }
16377 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16378 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16379 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16380 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16381 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16382 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16383 if (ISD::isIntEqualitySetCC(CCVal))
16384 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16385 N->getValueType(0), Val, Cond.getOperand(0));
16386 }
16387 return SDValue();
16388 }
16389 case RISCVISD::SELECT_CC: {
16390 // Transform
16391 SDValue LHS = N->getOperand(0);
16392 SDValue RHS = N->getOperand(1);
16393 SDValue CC = N->getOperand(2);
16394 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16395 SDValue TrueV = N->getOperand(3);
16396 SDValue FalseV = N->getOperand(4);
16397 SDLoc DL(N);
16398 EVT VT = N->getValueType(0);
16399
16400 // If the True and False values are the same, we don't need a select_cc.
16401 if (TrueV == FalseV)
16402 return TrueV;
16403
16404 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16405 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16406 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16407 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16408 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16409 if (CCVal == ISD::CondCode::SETGE)
16410 std::swap(TrueV, FalseV);
16411
16412 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16413 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16414 // Only handle simm12, if it is not in this range, it can be considered as
16415 // register.
16416 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16417 isInt<12>(TrueSImm - FalseSImm)) {
16418 SDValue SRA =
16419 DAG.getNode(ISD::SRA, DL, VT, LHS,
16420 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16421 SDValue AND =
16422 DAG.getNode(ISD::AND, DL, VT, SRA,
16423 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16424 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16425 }
16426
16427 if (CCVal == ISD::CondCode::SETGE)
16428 std::swap(TrueV, FalseV);
16429 }
16430
16431 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16432 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16433 {LHS, RHS, CC, TrueV, FalseV});
16434
16435 if (!Subtarget.hasConditionalMoveFusion()) {
16436 // (select c, -1, y) -> -c | y
16437 if (isAllOnesConstant(TrueV)) {
16438 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16439 SDValue Neg = DAG.getNegative(C, DL, VT);
16440 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16441 }
16442 // (select c, y, -1) -> -!c | y
16443 if (isAllOnesConstant(FalseV)) {
16444 SDValue C =
16445 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16446 SDValue Neg = DAG.getNegative(C, DL, VT);
16447 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16448 }
16449
16450 // (select c, 0, y) -> -!c & y
16451 if (isNullConstant(TrueV)) {
16452 SDValue C =
16453 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16454 SDValue Neg = DAG.getNegative(C, DL, VT);
16455 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16456 }
16457 // (select c, y, 0) -> -c & y
16458 if (isNullConstant(FalseV)) {
16459 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16460 SDValue Neg = DAG.getNegative(C, DL, VT);
16461 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16462 }
16463 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16464 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16465 if (((isOneConstant(FalseV) && LHS == TrueV &&
16466 CCVal == ISD::CondCode::SETNE) ||
16467 (isOneConstant(TrueV) && LHS == FalseV &&
16468 CCVal == ISD::CondCode::SETEQ)) &&
16470 // freeze it to be safe.
16471 LHS = DAG.getFreeze(LHS);
16473 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16474 }
16475 }
16476
16477 // If both true/false are an xor with 1, pull through the select.
16478 // This can occur after op legalization if both operands are setccs that
16479 // require an xor to invert.
16480 // FIXME: Generalize to other binary ops with identical operand?
16481 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16482 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16483 isOneConstant(TrueV.getOperand(1)) &&
16484 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16485 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16486 TrueV.getOperand(0), FalseV.getOperand(0));
16487 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16488 }
16489
16490 return SDValue();
16491 }
16492 case RISCVISD::BR_CC: {
16493 SDValue LHS = N->getOperand(1);
16494 SDValue RHS = N->getOperand(2);
16495 SDValue CC = N->getOperand(3);
16496 SDLoc DL(N);
16497
16498 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16499 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16500 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16501
16502 return SDValue();
16503 }
16504 case ISD::BITREVERSE:
16505 return performBITREVERSECombine(N, DAG, Subtarget);
16506 case ISD::FP_TO_SINT:
16507 case ISD::FP_TO_UINT:
16508 return performFP_TO_INTCombine(N, DCI, Subtarget);
16511 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16512 case ISD::FCOPYSIGN: {
16513 EVT VT = N->getValueType(0);
16514 if (!VT.isVector())
16515 break;
16516 // There is a form of VFSGNJ which injects the negated sign of its second
16517 // operand. Try and bubble any FNEG up after the extend/round to produce
16518 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16519 // TRUNC=1.
16520 SDValue In2 = N->getOperand(1);
16521 // Avoid cases where the extend/round has multiple uses, as duplicating
16522 // those is typically more expensive than removing a fneg.
16523 if (!In2.hasOneUse())
16524 break;
16525 if (In2.getOpcode() != ISD::FP_EXTEND &&
16526 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16527 break;
16528 In2 = In2.getOperand(0);
16529 if (In2.getOpcode() != ISD::FNEG)
16530 break;
16531 SDLoc DL(N);
16532 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16533 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16534 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16535 }
16536 case ISD::MGATHER: {
16537 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16538 const EVT VT = N->getValueType(0);
16539 SDValue Index = MGN->getIndex();
16540 SDValue ScaleOp = MGN->getScale();
16541 ISD::MemIndexType IndexType = MGN->getIndexType();
16542 assert(!MGN->isIndexScaled() &&
16543 "Scaled gather/scatter should not be formed");
16544
16545 SDLoc DL(N);
16546 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16547 return DAG.getMaskedGather(
16548 N->getVTList(), MGN->getMemoryVT(), DL,
16549 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16550 MGN->getBasePtr(), Index, ScaleOp},
16551 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16552
16553 if (narrowIndex(Index, IndexType, DAG))
16554 return DAG.getMaskedGather(
16555 N->getVTList(), MGN->getMemoryVT(), DL,
16556 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16557 MGN->getBasePtr(), Index, ScaleOp},
16558 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16559
16560 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16561 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16562 // The sequence will be XLenVT, not the type of Index. Tell
16563 // isSimpleVIDSequence this so we avoid overflow.
16564 if (std::optional<VIDSequence> SimpleVID =
16565 isSimpleVIDSequence(Index, Subtarget.getXLen());
16566 SimpleVID && SimpleVID->StepDenominator == 1) {
16567 const int64_t StepNumerator = SimpleVID->StepNumerator;
16568 const int64_t Addend = SimpleVID->Addend;
16569
16570 // Note: We don't need to check alignment here since (by assumption
16571 // from the existance of the gather), our offsets must be sufficiently
16572 // aligned.
16573
16574 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16575 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16576 assert(IndexType == ISD::UNSIGNED_SCALED);
16577 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16578 DAG.getConstant(Addend, DL, PtrVT));
16579
16580 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16581 SDValue IntID =
16582 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16583 XLenVT);
16584 SDValue Ops[] =
16585 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16586 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16588 Ops, VT, MGN->getMemOperand());
16589 }
16590 }
16591
16592 SmallVector<int> ShuffleMask;
16593 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16594 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16595 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16596 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16597 MGN->getMask(), DAG.getUNDEF(VT),
16598 MGN->getMemoryVT(), MGN->getMemOperand(),
16600 SDValue Shuffle =
16601 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16602 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16603 }
16604
16605 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16606 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16607 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16608 SmallVector<SDValue> NewIndices;
16609 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16610 NewIndices.push_back(Index.getOperand(i));
16611 EVT IndexVT = Index.getValueType()
16613 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16614
16615 unsigned ElementSize = VT.getScalarStoreSize();
16616 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16617 auto EltCnt = VT.getVectorElementCount();
16618 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16619 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16620 EltCnt.divideCoefficientBy(2));
16621 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16622 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16623 EltCnt.divideCoefficientBy(2));
16624 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16625
16626 SDValue Gather =
16627 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16628 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16629 Index, ScaleOp},
16630 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16631 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16632 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16633 }
16634 break;
16635 }
16636 case ISD::MSCATTER:{
16637 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16638 SDValue Index = MSN->getIndex();
16639 SDValue ScaleOp = MSN->getScale();
16640 ISD::MemIndexType IndexType = MSN->getIndexType();
16641 assert(!MSN->isIndexScaled() &&
16642 "Scaled gather/scatter should not be formed");
16643
16644 SDLoc DL(N);
16645 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16646 return DAG.getMaskedScatter(
16647 N->getVTList(), MSN->getMemoryVT(), DL,
16648 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16649 Index, ScaleOp},
16650 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16651
16652 if (narrowIndex(Index, IndexType, DAG))
16653 return DAG.getMaskedScatter(
16654 N->getVTList(), MSN->getMemoryVT(), DL,
16655 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16656 Index, ScaleOp},
16657 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16658
16659 EVT VT = MSN->getValue()->getValueType(0);
16660 SmallVector<int> ShuffleMask;
16661 if (!MSN->isTruncatingStore() &&
16662 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16663 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16664 DAG.getUNDEF(VT), ShuffleMask);
16665 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16666 DAG.getUNDEF(XLenVT), MSN->getMask(),
16667 MSN->getMemoryVT(), MSN->getMemOperand(),
16668 ISD::UNINDEXED, false);
16669 }
16670 break;
16671 }
16672 case ISD::VP_GATHER: {
16673 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16674 SDValue Index = VPGN->getIndex();
16675 SDValue ScaleOp = VPGN->getScale();
16676 ISD::MemIndexType IndexType = VPGN->getIndexType();
16677 assert(!VPGN->isIndexScaled() &&
16678 "Scaled gather/scatter should not be formed");
16679
16680 SDLoc DL(N);
16681 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16682 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16683 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16684 ScaleOp, VPGN->getMask(),
16685 VPGN->getVectorLength()},
16686 VPGN->getMemOperand(), IndexType);
16687
16688 if (narrowIndex(Index, IndexType, DAG))
16689 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16690 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16691 ScaleOp, VPGN->getMask(),
16692 VPGN->getVectorLength()},
16693 VPGN->getMemOperand(), IndexType);
16694
16695 break;
16696 }
16697 case ISD::VP_SCATTER: {
16698 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16699 SDValue Index = VPSN->getIndex();
16700 SDValue ScaleOp = VPSN->getScale();
16701 ISD::MemIndexType IndexType = VPSN->getIndexType();
16702 assert(!VPSN->isIndexScaled() &&
16703 "Scaled gather/scatter should not be formed");
16704
16705 SDLoc DL(N);
16706 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16707 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16708 {VPSN->getChain(), VPSN->getValue(),
16709 VPSN->getBasePtr(), Index, ScaleOp,
16710 VPSN->getMask(), VPSN->getVectorLength()},
16711 VPSN->getMemOperand(), IndexType);
16712
16713 if (narrowIndex(Index, IndexType, DAG))
16714 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16715 {VPSN->getChain(), VPSN->getValue(),
16716 VPSN->getBasePtr(), Index, ScaleOp,
16717 VPSN->getMask(), VPSN->getVectorLength()},
16718 VPSN->getMemOperand(), IndexType);
16719 break;
16720 }
16721 case RISCVISD::SHL_VL:
16722 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16723 return V;
16724 [[fallthrough]];
16725 case RISCVISD::SRA_VL:
16726 case RISCVISD::SRL_VL: {
16727 SDValue ShAmt = N->getOperand(1);
16729 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16730 SDLoc DL(N);
16731 SDValue VL = N->getOperand(4);
16732 EVT VT = N->getValueType(0);
16733 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16734 ShAmt.getOperand(1), VL);
16735 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16736 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16737 }
16738 break;
16739 }
16740 case ISD::SRA:
16741 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16742 return V;
16743 [[fallthrough]];
16744 case ISD::SRL:
16745 case ISD::SHL: {
16746 if (N->getOpcode() == ISD::SHL) {
16747 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16748 return V;
16749 }
16750 SDValue ShAmt = N->getOperand(1);
16752 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16753 SDLoc DL(N);
16754 EVT VT = N->getValueType(0);
16755 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16756 ShAmt.getOperand(1),
16757 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16758 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16759 }
16760 break;
16761 }
16762 case RISCVISD::ADD_VL:
16763 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16764 return V;
16765 return combineToVWMACC(N, DAG, Subtarget);
16770 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16771 case RISCVISD::SUB_VL:
16772 case RISCVISD::MUL_VL:
16773 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16782 return performVFMADD_VLCombine(N, DAG, Subtarget);
16783 case RISCVISD::FADD_VL:
16784 case RISCVISD::FSUB_VL:
16785 case RISCVISD::FMUL_VL:
16787 case RISCVISD::VFWSUB_W_VL: {
16788 if (N->getValueType(0).isScalableVector() &&
16789 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16790 (Subtarget.hasVInstructionsF16Minimal() &&
16791 !Subtarget.hasVInstructionsF16()))
16792 return SDValue();
16793 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16794 }
16795 case ISD::LOAD:
16796 case ISD::STORE: {
16797 if (DCI.isAfterLegalizeDAG())
16798 if (SDValue V = performMemPairCombine(N, DCI))
16799 return V;
16800
16801 if (N->getOpcode() != ISD::STORE)
16802 break;
16803
16804 auto *Store = cast<StoreSDNode>(N);
16805 SDValue Chain = Store->getChain();
16806 EVT MemVT = Store->getMemoryVT();
16807 SDValue Val = Store->getValue();
16808 SDLoc DL(N);
16809
16810 bool IsScalarizable =
16811 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16812 Store->isSimple() &&
16813 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16814 isPowerOf2_64(MemVT.getSizeInBits()) &&
16815 MemVT.getSizeInBits() <= Subtarget.getXLen();
16816
16817 // If sufficiently aligned we can scalarize stores of constant vectors of
16818 // any power-of-two size up to XLen bits, provided that they aren't too
16819 // expensive to materialize.
16820 // vsetivli zero, 2, e8, m1, ta, ma
16821 // vmv.v.i v8, 4
16822 // vse64.v v8, (a0)
16823 // ->
16824 // li a1, 1028
16825 // sh a1, 0(a0)
16826 if (DCI.isBeforeLegalize() && IsScalarizable &&
16828 // Get the constant vector bits
16829 APInt NewC(Val.getValueSizeInBits(), 0);
16830 uint64_t EltSize = Val.getScalarValueSizeInBits();
16831 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16832 if (Val.getOperand(i).isUndef())
16833 continue;
16834 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16835 i * EltSize);
16836 }
16837 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16838
16839 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16840 true) <= 2 &&
16842 NewVT, *Store->getMemOperand())) {
16843 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16844 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16845 Store->getPointerInfo(), Store->getOriginalAlign(),
16846 Store->getMemOperand()->getFlags());
16847 }
16848 }
16849
16850 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16851 // vsetivli zero, 2, e16, m1, ta, ma
16852 // vle16.v v8, (a0)
16853 // vse16.v v8, (a1)
16854 if (auto *L = dyn_cast<LoadSDNode>(Val);
16855 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16856 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16857 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16858 L->getMemoryVT() == MemVT) {
16859 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16861 NewVT, *Store->getMemOperand()) &&
16863 NewVT, *L->getMemOperand())) {
16864 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16865 L->getPointerInfo(), L->getOriginalAlign(),
16866 L->getMemOperand()->getFlags());
16867 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16868 Store->getPointerInfo(), Store->getOriginalAlign(),
16869 Store->getMemOperand()->getFlags());
16870 }
16871 }
16872
16873 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16874 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16875 // any illegal types.
16876 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16877 (DCI.isAfterLegalizeDAG() &&
16879 isNullConstant(Val.getOperand(1)))) {
16880 SDValue Src = Val.getOperand(0);
16881 MVT VecVT = Src.getSimpleValueType();
16882 // VecVT should be scalable and memory VT should match the element type.
16883 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16884 MemVT == VecVT.getVectorElementType()) {
16885 SDLoc DL(N);
16886 MVT MaskVT = getMaskTypeFor(VecVT);
16887 return DAG.getStoreVP(
16888 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16889 DAG.getConstant(1, DL, MaskVT),
16890 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16891 Store->getMemOperand(), Store->getAddressingMode(),
16892 Store->isTruncatingStore(), /*IsCompress*/ false);
16893 }
16894 }
16895
16896 break;
16897 }
16898 case ISD::SPLAT_VECTOR: {
16899 EVT VT = N->getValueType(0);
16900 // Only perform this combine on legal MVT types.
16901 if (!isTypeLegal(VT))
16902 break;
16903 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16904 DAG, Subtarget))
16905 return Gather;
16906 break;
16907 }
16908 case ISD::BUILD_VECTOR:
16909 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16910 return V;
16911 break;
16913 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16914 return V;
16915 break;
16917 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16918 return V;
16919 break;
16920 case RISCVISD::VFMV_V_F_VL: {
16921 const MVT VT = N->getSimpleValueType(0);
16922 SDValue Passthru = N->getOperand(0);
16923 SDValue Scalar = N->getOperand(1);
16924 SDValue VL = N->getOperand(2);
16925
16926 // If VL is 1, we can use vfmv.s.f.
16927 if (isOneConstant(VL))
16928 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16929 break;
16930 }
16931 case RISCVISD::VMV_V_X_VL: {
16932 const MVT VT = N->getSimpleValueType(0);
16933 SDValue Passthru = N->getOperand(0);
16934 SDValue Scalar = N->getOperand(1);
16935 SDValue VL = N->getOperand(2);
16936
16937 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16938 // scalar input.
16939 unsigned ScalarSize = Scalar.getValueSizeInBits();
16940 unsigned EltWidth = VT.getScalarSizeInBits();
16941 if (ScalarSize > EltWidth && Passthru.isUndef())
16942 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16943 return SDValue(N, 0);
16944
16945 // If VL is 1 and the scalar value won't benefit from immediate, we can
16946 // use vmv.s.x.
16947 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16948 if (isOneConstant(VL) &&
16949 (!Const || Const->isZero() ||
16950 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16951 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16952
16953 break;
16954 }
16955 case RISCVISD::VFMV_S_F_VL: {
16956 SDValue Src = N->getOperand(1);
16957 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16958 // into an undef vector.
16959 // TODO: Could use a vslide or vmv.v.v for non-undef.
16960 if (N->getOperand(0).isUndef() &&
16961 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16962 isNullConstant(Src.getOperand(1)) &&
16963 Src.getOperand(0).getValueType().isScalableVector()) {
16964 EVT VT = N->getValueType(0);
16965 EVT SrcVT = Src.getOperand(0).getValueType();
16967 // Widths match, just return the original vector.
16968 if (SrcVT == VT)
16969 return Src.getOperand(0);
16970 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16971 }
16972 [[fallthrough]];
16973 }
16974 case RISCVISD::VMV_S_X_VL: {
16975 const MVT VT = N->getSimpleValueType(0);
16976 SDValue Passthru = N->getOperand(0);
16977 SDValue Scalar = N->getOperand(1);
16978 SDValue VL = N->getOperand(2);
16979
16980 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
16981 Scalar.getOperand(0).getValueType() == N->getValueType(0))
16982 return Scalar.getOperand(0);
16983
16984 // Use M1 or smaller to avoid over constraining register allocation
16985 const MVT M1VT = getLMUL1VT(VT);
16986 if (M1VT.bitsLT(VT)) {
16987 SDValue M1Passthru =
16988 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16989 DAG.getVectorIdxConstant(0, DL));
16990 SDValue Result =
16991 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16992 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16993 DAG.getVectorIdxConstant(0, DL));
16994 return Result;
16995 }
16996
16997 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16998 // higher would involve overly constraining the register allocator for
16999 // no purpose.
17000 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17001 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17002 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17003 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17004
17005 break;
17006 }
17007 case RISCVISD::VMV_X_S: {
17008 SDValue Vec = N->getOperand(0);
17009 MVT VecVT = N->getOperand(0).getSimpleValueType();
17010 const MVT M1VT = getLMUL1VT(VecVT);
17011 if (M1VT.bitsLT(VecVT)) {
17012 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17013 DAG.getVectorIdxConstant(0, DL));
17014 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17015 }
17016 break;
17017 }
17021 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17022 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17023 switch (IntNo) {
17024 // By default we do not combine any intrinsic.
17025 default:
17026 return SDValue();
17027 case Intrinsic::riscv_masked_strided_load: {
17028 MVT VT = N->getSimpleValueType(0);
17029 auto *Load = cast<MemIntrinsicSDNode>(N);
17030 SDValue PassThru = N->getOperand(2);
17031 SDValue Base = N->getOperand(3);
17032 SDValue Stride = N->getOperand(4);
17033 SDValue Mask = N->getOperand(5);
17034
17035 // If the stride is equal to the element size in bytes, we can use
17036 // a masked.load.
17037 const unsigned ElementSize = VT.getScalarStoreSize();
17038 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17039 StrideC && StrideC->getZExtValue() == ElementSize)
17040 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
17041 DAG.getUNDEF(XLenVT), Mask, PassThru,
17042 Load->getMemoryVT(), Load->getMemOperand(),
17044 return SDValue();
17045 }
17046 case Intrinsic::riscv_masked_strided_store: {
17047 auto *Store = cast<MemIntrinsicSDNode>(N);
17048 SDValue Value = N->getOperand(2);
17049 SDValue Base = N->getOperand(3);
17050 SDValue Stride = N->getOperand(4);
17051 SDValue Mask = N->getOperand(5);
17052
17053 // If the stride is equal to the element size in bytes, we can use
17054 // a masked.store.
17055 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
17056 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17057 StrideC && StrideC->getZExtValue() == ElementSize)
17058 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
17059 DAG.getUNDEF(XLenVT), Mask,
17060 Value.getValueType(), Store->getMemOperand(),
17061 ISD::UNINDEXED, false);
17062 return SDValue();
17063 }
17064 case Intrinsic::riscv_vcpop:
17065 case Intrinsic::riscv_vcpop_mask:
17066 case Intrinsic::riscv_vfirst:
17067 case Intrinsic::riscv_vfirst_mask: {
17068 SDValue VL = N->getOperand(2);
17069 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17070 IntNo == Intrinsic::riscv_vfirst_mask)
17071 VL = N->getOperand(3);
17072 if (!isNullConstant(VL))
17073 return SDValue();
17074 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17075 SDLoc DL(N);
17076 EVT VT = N->getValueType(0);
17077 if (IntNo == Intrinsic::riscv_vfirst ||
17078 IntNo == Intrinsic::riscv_vfirst_mask)
17079 return DAG.getConstant(-1, DL, VT);
17080 return DAG.getConstant(0, DL, VT);
17081 }
17082 }
17083 }
17084 case ISD::BITCAST: {
17086 SDValue N0 = N->getOperand(0);
17087 EVT VT = N->getValueType(0);
17088 EVT SrcVT = N0.getValueType();
17089 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17090 // type, widen both sides to avoid a trip through memory.
17091 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17092 VT.isScalarInteger()) {
17093 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17094 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17095 Ops[0] = N0;
17096 SDLoc DL(N);
17097 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17098 N0 = DAG.getBitcast(MVT::i8, N0);
17099 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17100 }
17101
17102 return SDValue();
17103 }
17104 }
17105
17106 return SDValue();
17107}
17108
17110 EVT XVT, unsigned KeptBits) const {
17111 // For vectors, we don't have a preference..
17112 if (XVT.isVector())
17113 return false;
17114
17115 if (XVT != MVT::i32 && XVT != MVT::i64)
17116 return false;
17117
17118 // We can use sext.w for RV64 or an srai 31 on RV32.
17119 if (KeptBits == 32 || KeptBits == 64)
17120 return true;
17121
17122 // With Zbb we can use sext.h/sext.b.
17123 return Subtarget.hasStdExtZbb() &&
17124 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17125 KeptBits == 16);
17126}
17127
17129 const SDNode *N, CombineLevel Level) const {
17130 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17131 N->getOpcode() == ISD::SRL) &&
17132 "Expected shift op");
17133
17134 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17135 // materialised in fewer instructions than `(OP _, c1)`:
17136 //
17137 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17138 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17139 SDValue N0 = N->getOperand(0);
17140 EVT Ty = N0.getValueType();
17141 if (Ty.isScalarInteger() &&
17142 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17143 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17144 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17145 if (C1 && C2) {
17146 const APInt &C1Int = C1->getAPIntValue();
17147 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17148
17149 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17150 // and the combine should happen, to potentially allow further combines
17151 // later.
17152 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17153 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17154 return true;
17155
17156 // We can materialise `c1` in an add immediate, so it's "free", and the
17157 // combine should be prevented.
17158 if (C1Int.getSignificantBits() <= 64 &&
17160 return false;
17161
17162 // Neither constant will fit into an immediate, so find materialisation
17163 // costs.
17164 int C1Cost =
17165 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17166 /*CompressionCost*/ true);
17167 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17168 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17169 /*CompressionCost*/ true);
17170
17171 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17172 // combine should be prevented.
17173 if (C1Cost < ShiftedC1Cost)
17174 return false;
17175 }
17176 }
17177 return true;
17178}
17179
17181 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17182 TargetLoweringOpt &TLO) const {
17183 // Delay this optimization as late as possible.
17184 if (!TLO.LegalOps)
17185 return false;
17186
17187 EVT VT = Op.getValueType();
17188 if (VT.isVector())
17189 return false;
17190
17191 unsigned Opcode = Op.getOpcode();
17192 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17193 return false;
17194
17195 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17196 if (!C)
17197 return false;
17198
17199 const APInt &Mask = C->getAPIntValue();
17200
17201 // Clear all non-demanded bits initially.
17202 APInt ShrunkMask = Mask & DemandedBits;
17203
17204 // Try to make a smaller immediate by setting undemanded bits.
17205
17206 APInt ExpandedMask = Mask | ~DemandedBits;
17207
17208 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17209 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17210 };
17211 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17212 if (NewMask == Mask)
17213 return true;
17214 SDLoc DL(Op);
17215 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17216 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17217 Op.getOperand(0), NewC);
17218 return TLO.CombineTo(Op, NewOp);
17219 };
17220
17221 // If the shrunk mask fits in sign extended 12 bits, let the target
17222 // independent code apply it.
17223 if (ShrunkMask.isSignedIntN(12))
17224 return false;
17225
17226 // And has a few special cases for zext.
17227 if (Opcode == ISD::AND) {
17228 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17229 // otherwise use SLLI + SRLI.
17230 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17231 if (IsLegalMask(NewMask))
17232 return UseMask(NewMask);
17233
17234 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17235 if (VT == MVT::i64) {
17236 APInt NewMask = APInt(64, 0xffffffff);
17237 if (IsLegalMask(NewMask))
17238 return UseMask(NewMask);
17239 }
17240 }
17241
17242 // For the remaining optimizations, we need to be able to make a negative
17243 // number through a combination of mask and undemanded bits.
17244 if (!ExpandedMask.isNegative())
17245 return false;
17246
17247 // What is the fewest number of bits we need to represent the negative number.
17248 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17249
17250 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17251 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17252 // If we can't create a simm12, we shouldn't change opaque constants.
17253 APInt NewMask = ShrunkMask;
17254 if (MinSignedBits <= 12)
17255 NewMask.setBitsFrom(11);
17256 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17257 NewMask.setBitsFrom(31);
17258 else
17259 return false;
17260
17261 // Check that our new mask is a subset of the demanded mask.
17262 assert(IsLegalMask(NewMask));
17263 return UseMask(NewMask);
17264}
17265
17266static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17267 static const uint64_t GREVMasks[] = {
17268 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17269 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17270
17271 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17272 unsigned Shift = 1 << Stage;
17273 if (ShAmt & Shift) {
17274 uint64_t Mask = GREVMasks[Stage];
17275 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17276 if (IsGORC)
17277 Res |= x;
17278 x = Res;
17279 }
17280 }
17281
17282 return x;
17283}
17284
17286 KnownBits &Known,
17287 const APInt &DemandedElts,
17288 const SelectionDAG &DAG,
17289 unsigned Depth) const {
17290 unsigned BitWidth = Known.getBitWidth();
17291 unsigned Opc = Op.getOpcode();
17292 assert((Opc >= ISD::BUILTIN_OP_END ||
17293 Opc == ISD::INTRINSIC_WO_CHAIN ||
17294 Opc == ISD::INTRINSIC_W_CHAIN ||
17295 Opc == ISD::INTRINSIC_VOID) &&
17296 "Should use MaskedValueIsZero if you don't know whether Op"
17297 " is a target node!");
17298
17299 Known.resetAll();
17300 switch (Opc) {
17301 default: break;
17302 case RISCVISD::SELECT_CC: {
17303 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17304 // If we don't know any bits, early out.
17305 if (Known.isUnknown())
17306 break;
17307 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17308
17309 // Only known if known in both the LHS and RHS.
17310 Known = Known.intersectWith(Known2);
17311 break;
17312 }
17315 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17316 // Result is either all zero or operand 0. We can propagate zeros, but not
17317 // ones.
17318 Known.One.clearAllBits();
17319 break;
17320 case RISCVISD::REMUW: {
17321 KnownBits Known2;
17322 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17323 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17324 // We only care about the lower 32 bits.
17325 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17326 // Restore the original width by sign extending.
17327 Known = Known.sext(BitWidth);
17328 break;
17329 }
17330 case RISCVISD::DIVUW: {
17331 KnownBits Known2;
17332 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17333 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17334 // We only care about the lower 32 bits.
17335 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17336 // Restore the original width by sign extending.
17337 Known = Known.sext(BitWidth);
17338 break;
17339 }
17340 case RISCVISD::SLLW: {
17341 KnownBits Known2;
17342 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17343 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17344 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17345 // Restore the original width by sign extending.
17346 Known = Known.sext(BitWidth);
17347 break;
17348 }
17349 case RISCVISD::CTZW: {
17350 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17351 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17352 unsigned LowBits = llvm::bit_width(PossibleTZ);
17353 Known.Zero.setBitsFrom(LowBits);
17354 break;
17355 }
17356 case RISCVISD::CLZW: {
17357 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17358 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17359 unsigned LowBits = llvm::bit_width(PossibleLZ);
17360 Known.Zero.setBitsFrom(LowBits);
17361 break;
17362 }
17363 case RISCVISD::BREV8:
17364 case RISCVISD::ORC_B: {
17365 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17366 // control value of 7 is equivalent to brev8 and orc.b.
17367 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17368 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17369 // To compute zeros, we need to invert the value and invert it back after.
17370 Known.Zero =
17371 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17372 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17373 break;
17374 }
17375 case RISCVISD::READ_VLENB: {
17376 // We can use the minimum and maximum VLEN values to bound VLENB. We
17377 // know VLEN must be a power of two.
17378 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17379 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17380 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17381 Known.Zero.setLowBits(Log2_32(MinVLenB));
17382 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17383 if (MaxVLenB == MinVLenB)
17384 Known.One.setBit(Log2_32(MinVLenB));
17385 break;
17386 }
17387 case RISCVISD::FCLASS: {
17388 // fclass will only set one of the low 10 bits.
17389 Known.Zero.setBitsFrom(10);
17390 break;
17391 }
17394 unsigned IntNo =
17395 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17396 switch (IntNo) {
17397 default:
17398 // We can't do anything for most intrinsics.
17399 break;
17400 case Intrinsic::riscv_vsetvli:
17401 case Intrinsic::riscv_vsetvlimax: {
17402 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17403 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17404 RISCVII::VLMUL VLMUL =
17405 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17406 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17407 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17408 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17409 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17410
17411 // Result of vsetvli must be not larger than AVL.
17412 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17413 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17414
17415 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17416 if (BitWidth > KnownZeroFirstBit)
17417 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17418 break;
17419 }
17420 }
17421 break;
17422 }
17423 }
17424}
17425
17427 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17428 unsigned Depth) const {
17429 switch (Op.getOpcode()) {
17430 default:
17431 break;
17432 case RISCVISD::SELECT_CC: {
17433 unsigned Tmp =
17434 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17435 if (Tmp == 1) return 1; // Early out.
17436 unsigned Tmp2 =
17437 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17438 return std::min(Tmp, Tmp2);
17439 }
17442 // Output is either all zero or operand 0. We can propagate sign bit count
17443 // from operand 0.
17444 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17445 case RISCVISD::ABSW: {
17446 // We expand this at isel to negw+max. The result will have 33 sign bits
17447 // if the input has at least 33 sign bits.
17448 unsigned Tmp =
17449 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17450 if (Tmp < 33) return 1;
17451 return 33;
17452 }
17453 case RISCVISD::SLLW:
17454 case RISCVISD::SRAW:
17455 case RISCVISD::SRLW:
17456 case RISCVISD::DIVW:
17457 case RISCVISD::DIVUW:
17458 case RISCVISD::REMUW:
17459 case RISCVISD::ROLW:
17460 case RISCVISD::RORW:
17465 // TODO: As the result is sign-extended, this is conservatively correct. A
17466 // more precise answer could be calculated for SRAW depending on known
17467 // bits in the shift amount.
17468 return 33;
17469 case RISCVISD::VMV_X_S: {
17470 // The number of sign bits of the scalar result is computed by obtaining the
17471 // element type of the input vector operand, subtracting its width from the
17472 // XLEN, and then adding one (sign bit within the element type). If the
17473 // element type is wider than XLen, the least-significant XLEN bits are
17474 // taken.
17475 unsigned XLen = Subtarget.getXLen();
17476 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17477 if (EltBits <= XLen)
17478 return XLen - EltBits + 1;
17479 break;
17480 }
17482 unsigned IntNo = Op.getConstantOperandVal(1);
17483 switch (IntNo) {
17484 default:
17485 break;
17486 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17487 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17488 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17489 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17490 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17491 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17492 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17493 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17494 case Intrinsic::riscv_masked_cmpxchg_i64:
17495 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17496 // narrow atomic operation. These are implemented using atomic
17497 // operations at the minimum supported atomicrmw/cmpxchg width whose
17498 // result is then sign extended to XLEN. With +A, the minimum width is
17499 // 32 for both 64 and 32.
17500 assert(Subtarget.getXLen() == 64);
17502 assert(Subtarget.hasStdExtA());
17503 return 33;
17504 }
17505 break;
17506 }
17507 }
17508
17509 return 1;
17510}
17511
17513 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17514 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17515
17516 // TODO: Add more target nodes.
17517 switch (Op.getOpcode()) {
17519 // Integer select_cc cannot create poison.
17520 // TODO: What are the FP poison semantics?
17521 // TODO: This instruction blocks poison from the unselected operand, can
17522 // we do anything with that?
17523 return !Op.getValueType().isInteger();
17524 }
17526 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17527}
17528
17529const Constant *
17531 assert(Ld && "Unexpected null LoadSDNode");
17532 if (!ISD::isNormalLoad(Ld))
17533 return nullptr;
17534
17535 SDValue Ptr = Ld->getBasePtr();
17536
17537 // Only constant pools with no offset are supported.
17538 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17539 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17540 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17541 CNode->getOffset() != 0)
17542 return nullptr;
17543
17544 return CNode;
17545 };
17546
17547 // Simple case, LLA.
17548 if (Ptr.getOpcode() == RISCVISD::LLA) {
17549 auto *CNode = GetSupportedConstantPool(Ptr);
17550 if (!CNode || CNode->getTargetFlags() != 0)
17551 return nullptr;
17552
17553 return CNode->getConstVal();
17554 }
17555
17556 // Look for a HI and ADD_LO pair.
17557 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17558 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17559 return nullptr;
17560
17561 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17562 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17563
17564 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17565 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17566 return nullptr;
17567
17568 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17569 return nullptr;
17570
17571 return CNodeLo->getConstVal();
17572}
17573
17575 MachineBasicBlock *BB) {
17576 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17577
17578 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17579 // Should the count have wrapped while it was being read, we need to try
17580 // again.
17581 // For example:
17582 // ```
17583 // read:
17584 // csrrs x3, counterh # load high word of counter
17585 // csrrs x2, counter # load low word of counter
17586 // csrrs x4, counterh # load high word of counter
17587 // bne x3, x4, read # check if high word reads match, otherwise try again
17588 // ```
17589
17590 MachineFunction &MF = *BB->getParent();
17591 const BasicBlock *LLVMBB = BB->getBasicBlock();
17593
17594 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17595 MF.insert(It, LoopMBB);
17596
17597 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17598 MF.insert(It, DoneMBB);
17599
17600 // Transfer the remainder of BB and its successor edges to DoneMBB.
17601 DoneMBB->splice(DoneMBB->begin(), BB,
17602 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17604
17605 BB->addSuccessor(LoopMBB);
17606
17608 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17609 Register LoReg = MI.getOperand(0).getReg();
17610 Register HiReg = MI.getOperand(1).getReg();
17611 int64_t LoCounter = MI.getOperand(2).getImm();
17612 int64_t HiCounter = MI.getOperand(3).getImm();
17613 DebugLoc DL = MI.getDebugLoc();
17614
17616 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17617 .addImm(HiCounter)
17618 .addReg(RISCV::X0);
17619 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17620 .addImm(LoCounter)
17621 .addReg(RISCV::X0);
17622 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17623 .addImm(HiCounter)
17624 .addReg(RISCV::X0);
17625
17626 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17627 .addReg(HiReg)
17628 .addReg(ReadAgainReg)
17629 .addMBB(LoopMBB);
17630
17631 LoopMBB->addSuccessor(LoopMBB);
17632 LoopMBB->addSuccessor(DoneMBB);
17633
17634 MI.eraseFromParent();
17635
17636 return DoneMBB;
17637}
17638
17641 const RISCVSubtarget &Subtarget) {
17642 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17643
17644 MachineFunction &MF = *BB->getParent();
17645 DebugLoc DL = MI.getDebugLoc();
17648 Register LoReg = MI.getOperand(0).getReg();
17649 Register HiReg = MI.getOperand(1).getReg();
17650 Register SrcReg = MI.getOperand(2).getReg();
17651
17652 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17653 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17654
17655 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17656 RI, Register());
17658 MachineMemOperand *MMOLo =
17662 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17663 .addFrameIndex(FI)
17664 .addImm(0)
17665 .addMemOperand(MMOLo);
17666 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17667 .addFrameIndex(FI)
17668 .addImm(4)
17669 .addMemOperand(MMOHi);
17670 MI.eraseFromParent(); // The pseudo instruction is gone now.
17671 return BB;
17672}
17673
17676 const RISCVSubtarget &Subtarget) {
17677 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17678 "Unexpected instruction");
17679
17680 MachineFunction &MF = *BB->getParent();
17681 DebugLoc DL = MI.getDebugLoc();
17684 Register DstReg = MI.getOperand(0).getReg();
17685 Register LoReg = MI.getOperand(1).getReg();
17686 Register HiReg = MI.getOperand(2).getReg();
17687
17688 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17689 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17690
17692 MachineMemOperand *MMOLo =
17696 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17697 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17698 .addFrameIndex(FI)
17699 .addImm(0)
17700 .addMemOperand(MMOLo);
17701 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17702 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17703 .addFrameIndex(FI)
17704 .addImm(4)
17705 .addMemOperand(MMOHi);
17706 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17707 MI.eraseFromParent(); // The pseudo instruction is gone now.
17708 return BB;
17709}
17710
17712 switch (MI.getOpcode()) {
17713 default:
17714 return false;
17715 case RISCV::Select_GPR_Using_CC_GPR:
17716 case RISCV::Select_GPR_Using_CC_Imm:
17717 case RISCV::Select_FPR16_Using_CC_GPR:
17718 case RISCV::Select_FPR16INX_Using_CC_GPR:
17719 case RISCV::Select_FPR32_Using_CC_GPR:
17720 case RISCV::Select_FPR32INX_Using_CC_GPR:
17721 case RISCV::Select_FPR64_Using_CC_GPR:
17722 case RISCV::Select_FPR64INX_Using_CC_GPR:
17723 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17724 return true;
17725 }
17726}
17727
17729 unsigned RelOpcode, unsigned EqOpcode,
17730 const RISCVSubtarget &Subtarget) {
17731 DebugLoc DL = MI.getDebugLoc();
17732 Register DstReg = MI.getOperand(0).getReg();
17733 Register Src1Reg = MI.getOperand(1).getReg();
17734 Register Src2Reg = MI.getOperand(2).getReg();
17736 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17738
17739 // Save the current FFLAGS.
17740 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17741
17742 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17743 .addReg(Src1Reg)
17744 .addReg(Src2Reg);
17747
17748 // Restore the FFLAGS.
17749 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17750 .addReg(SavedFFlags, RegState::Kill);
17751
17752 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17753 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17754 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17755 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17758
17759 // Erase the pseudoinstruction.
17760 MI.eraseFromParent();
17761 return BB;
17762}
17763
17764static MachineBasicBlock *
17766 MachineBasicBlock *ThisMBB,
17767 const RISCVSubtarget &Subtarget) {
17768 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17769 // Without this, custom-inserter would have generated:
17770 //
17771 // A
17772 // | \
17773 // | B
17774 // | /
17775 // C
17776 // | \
17777 // | D
17778 // | /
17779 // E
17780 //
17781 // A: X = ...; Y = ...
17782 // B: empty
17783 // C: Z = PHI [X, A], [Y, B]
17784 // D: empty
17785 // E: PHI [X, C], [Z, D]
17786 //
17787 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17788 //
17789 // A
17790 // | \
17791 // | C
17792 // | /|
17793 // |/ |
17794 // | |
17795 // | D
17796 // | /
17797 // E
17798 //
17799 // A: X = ...; Y = ...
17800 // D: empty
17801 // E: PHI [X, A], [X, C], [Y, D]
17802
17803 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17804 const DebugLoc &DL = First.getDebugLoc();
17805 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17806 MachineFunction *F = ThisMBB->getParent();
17807 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17808 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17809 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17810 MachineFunction::iterator It = ++ThisMBB->getIterator();
17811 F->insert(It, FirstMBB);
17812 F->insert(It, SecondMBB);
17813 F->insert(It, SinkMBB);
17814
17815 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17816 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17818 ThisMBB->end());
17819 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17820
17821 // Fallthrough block for ThisMBB.
17822 ThisMBB->addSuccessor(FirstMBB);
17823 // Fallthrough block for FirstMBB.
17824 FirstMBB->addSuccessor(SecondMBB);
17825 ThisMBB->addSuccessor(SinkMBB);
17826 FirstMBB->addSuccessor(SinkMBB);
17827 // This is fallthrough.
17828 SecondMBB->addSuccessor(SinkMBB);
17829
17830 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17831 Register FLHS = First.getOperand(1).getReg();
17832 Register FRHS = First.getOperand(2).getReg();
17833 // Insert appropriate branch.
17834 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17835 .addReg(FLHS)
17836 .addReg(FRHS)
17837 .addMBB(SinkMBB);
17838
17839 Register SLHS = Second.getOperand(1).getReg();
17840 Register SRHS = Second.getOperand(2).getReg();
17841 Register Op1Reg4 = First.getOperand(4).getReg();
17842 Register Op1Reg5 = First.getOperand(5).getReg();
17843
17844 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17845 // Insert appropriate branch.
17846 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17847 .addReg(SLHS)
17848 .addReg(SRHS)
17849 .addMBB(SinkMBB);
17850
17851 Register DestReg = Second.getOperand(0).getReg();
17852 Register Op2Reg4 = Second.getOperand(4).getReg();
17853 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17854 .addReg(Op2Reg4)
17855 .addMBB(ThisMBB)
17856 .addReg(Op1Reg4)
17857 .addMBB(FirstMBB)
17858 .addReg(Op1Reg5)
17859 .addMBB(SecondMBB);
17860
17861 // Now remove the Select_FPRX_s.
17862 First.eraseFromParent();
17863 Second.eraseFromParent();
17864 return SinkMBB;
17865}
17866
17869 const RISCVSubtarget &Subtarget) {
17870 // To "insert" Select_* instructions, we actually have to insert the triangle
17871 // control-flow pattern. The incoming instructions know the destination vreg
17872 // to set, the condition code register to branch on, the true/false values to
17873 // select between, and the condcode to use to select the appropriate branch.
17874 //
17875 // We produce the following control flow:
17876 // HeadMBB
17877 // | \
17878 // | IfFalseMBB
17879 // | /
17880 // TailMBB
17881 //
17882 // When we find a sequence of selects we attempt to optimize their emission
17883 // by sharing the control flow. Currently we only handle cases where we have
17884 // multiple selects with the exact same condition (same LHS, RHS and CC).
17885 // The selects may be interleaved with other instructions if the other
17886 // instructions meet some requirements we deem safe:
17887 // - They are not pseudo instructions.
17888 // - They are debug instructions. Otherwise,
17889 // - They do not have side-effects, do not access memory and their inputs do
17890 // not depend on the results of the select pseudo-instructions.
17891 // The TrueV/FalseV operands of the selects cannot depend on the result of
17892 // previous selects in the sequence.
17893 // These conditions could be further relaxed. See the X86 target for a
17894 // related approach and more information.
17895 //
17896 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17897 // is checked here and handled by a separate function -
17898 // EmitLoweredCascadedSelect.
17899 Register LHS = MI.getOperand(1).getReg();
17900 Register RHS;
17901 if (MI.getOperand(2).isReg())
17902 RHS = MI.getOperand(2).getReg();
17903 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17904
17905 SmallVector<MachineInstr *, 4> SelectDebugValues;
17906 SmallSet<Register, 4> SelectDests;
17907 SelectDests.insert(MI.getOperand(0).getReg());
17908
17909 MachineInstr *LastSelectPseudo = &MI;
17910 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17911 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
17912 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
17913 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
17914 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17915 Next->getOperand(5).isKill()) {
17916 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17917 }
17918
17919 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17920 SequenceMBBI != E; ++SequenceMBBI) {
17921 if (SequenceMBBI->isDebugInstr())
17922 continue;
17923 if (isSelectPseudo(*SequenceMBBI)) {
17924 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17925 !SequenceMBBI->getOperand(2).isReg() ||
17926 SequenceMBBI->getOperand(2).getReg() != RHS ||
17927 SequenceMBBI->getOperand(3).getImm() != CC ||
17928 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17929 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17930 break;
17931 LastSelectPseudo = &*SequenceMBBI;
17932 SequenceMBBI->collectDebugValues(SelectDebugValues);
17933 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17934 continue;
17935 }
17936 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17937 SequenceMBBI->mayLoadOrStore() ||
17938 SequenceMBBI->usesCustomInsertionHook())
17939 break;
17940 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17941 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17942 }))
17943 break;
17944 }
17945
17946 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17947 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17948 DebugLoc DL = MI.getDebugLoc();
17950
17951 MachineBasicBlock *HeadMBB = BB;
17952 MachineFunction *F = BB->getParent();
17953 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17954 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17955
17956 F->insert(I, IfFalseMBB);
17957 F->insert(I, TailMBB);
17958
17959 // Transfer debug instructions associated with the selects to TailMBB.
17960 for (MachineInstr *DebugInstr : SelectDebugValues) {
17961 TailMBB->push_back(DebugInstr->removeFromParent());
17962 }
17963
17964 // Move all instructions after the sequence to TailMBB.
17965 TailMBB->splice(TailMBB->end(), HeadMBB,
17966 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17967 // Update machine-CFG edges by transferring all successors of the current
17968 // block to the new block which will contain the Phi nodes for the selects.
17969 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17970 // Set the successors for HeadMBB.
17971 HeadMBB->addSuccessor(IfFalseMBB);
17972 HeadMBB->addSuccessor(TailMBB);
17973
17974 // Insert appropriate branch.
17975 if (MI.getOperand(2).isImm())
17976 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
17977 .addReg(LHS)
17978 .addImm(MI.getOperand(2).getImm())
17979 .addMBB(TailMBB);
17980 else
17981 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17982 .addReg(LHS)
17983 .addReg(RHS)
17984 .addMBB(TailMBB);
17985
17986 // IfFalseMBB just falls through to TailMBB.
17987 IfFalseMBB->addSuccessor(TailMBB);
17988
17989 // Create PHIs for all of the select pseudo-instructions.
17990 auto SelectMBBI = MI.getIterator();
17991 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17992 auto InsertionPoint = TailMBB->begin();
17993 while (SelectMBBI != SelectEnd) {
17994 auto Next = std::next(SelectMBBI);
17995 if (isSelectPseudo(*SelectMBBI)) {
17996 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17997 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17998 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17999 .addReg(SelectMBBI->getOperand(4).getReg())
18000 .addMBB(HeadMBB)
18001 .addReg(SelectMBBI->getOperand(5).getReg())
18002 .addMBB(IfFalseMBB);
18003 SelectMBBI->eraseFromParent();
18004 }
18005 SelectMBBI = Next;
18006 }
18007
18008 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18009 return TailMBB;
18010}
18011
18012// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18013static const RISCV::RISCVMaskedPseudoInfo *
18014lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18016 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18017 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18018 const RISCV::RISCVMaskedPseudoInfo *Masked =
18019 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18020 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18021 return Masked;
18022}
18023
18026 unsigned CVTXOpc) {
18027 DebugLoc DL = MI.getDebugLoc();
18028
18030
18032 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18033
18034 // Save the old value of FFLAGS.
18035 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18036
18037 assert(MI.getNumOperands() == 7);
18038
18039 // Emit a VFCVT_X_F
18040 const TargetRegisterInfo *TRI =
18042 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18043 Register Tmp = MRI.createVirtualRegister(RC);
18044 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18045 .add(MI.getOperand(1))
18046 .add(MI.getOperand(2))
18047 .add(MI.getOperand(3))
18048 .add(MachineOperand::CreateImm(7)) // frm = DYN
18049 .add(MI.getOperand(4))
18050 .add(MI.getOperand(5))
18051 .add(MI.getOperand(6))
18052 .add(MachineOperand::CreateReg(RISCV::FRM,
18053 /*IsDef*/ false,
18054 /*IsImp*/ true));
18055
18056 // Emit a VFCVT_F_X
18057 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18058 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18059 // There is no E8 variant for VFCVT_F_X.
18060 assert(Log2SEW >= 4);
18061 unsigned CVTFOpc =
18062 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18063 ->MaskedPseudo;
18064
18065 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18066 .add(MI.getOperand(0))
18067 .add(MI.getOperand(1))
18068 .addReg(Tmp)
18069 .add(MI.getOperand(3))
18070 .add(MachineOperand::CreateImm(7)) // frm = DYN
18071 .add(MI.getOperand(4))
18072 .add(MI.getOperand(5))
18073 .add(MI.getOperand(6))
18074 .add(MachineOperand::CreateReg(RISCV::FRM,
18075 /*IsDef*/ false,
18076 /*IsImp*/ true));
18077
18078 // Restore FFLAGS.
18079 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18080 .addReg(SavedFFLAGS, RegState::Kill);
18081
18082 // Erase the pseudoinstruction.
18083 MI.eraseFromParent();
18084 return BB;
18085}
18086
18088 const RISCVSubtarget &Subtarget) {
18089 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18090 const TargetRegisterClass *RC;
18091 switch (MI.getOpcode()) {
18092 default:
18093 llvm_unreachable("Unexpected opcode");
18094 case RISCV::PseudoFROUND_H:
18095 CmpOpc = RISCV::FLT_H;
18096 F2IOpc = RISCV::FCVT_W_H;
18097 I2FOpc = RISCV::FCVT_H_W;
18098 FSGNJOpc = RISCV::FSGNJ_H;
18099 FSGNJXOpc = RISCV::FSGNJX_H;
18100 RC = &RISCV::FPR16RegClass;
18101 break;
18102 case RISCV::PseudoFROUND_H_INX:
18103 CmpOpc = RISCV::FLT_H_INX;
18104 F2IOpc = RISCV::FCVT_W_H_INX;
18105 I2FOpc = RISCV::FCVT_H_W_INX;
18106 FSGNJOpc = RISCV::FSGNJ_H_INX;
18107 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18108 RC = &RISCV::GPRF16RegClass;
18109 break;
18110 case RISCV::PseudoFROUND_S:
18111 CmpOpc = RISCV::FLT_S;
18112 F2IOpc = RISCV::FCVT_W_S;
18113 I2FOpc = RISCV::FCVT_S_W;
18114 FSGNJOpc = RISCV::FSGNJ_S;
18115 FSGNJXOpc = RISCV::FSGNJX_S;
18116 RC = &RISCV::FPR32RegClass;
18117 break;
18118 case RISCV::PseudoFROUND_S_INX:
18119 CmpOpc = RISCV::FLT_S_INX;
18120 F2IOpc = RISCV::FCVT_W_S_INX;
18121 I2FOpc = RISCV::FCVT_S_W_INX;
18122 FSGNJOpc = RISCV::FSGNJ_S_INX;
18123 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18124 RC = &RISCV::GPRF32RegClass;
18125 break;
18126 case RISCV::PseudoFROUND_D:
18127 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18128 CmpOpc = RISCV::FLT_D;
18129 F2IOpc = RISCV::FCVT_L_D;
18130 I2FOpc = RISCV::FCVT_D_L;
18131 FSGNJOpc = RISCV::FSGNJ_D;
18132 FSGNJXOpc = RISCV::FSGNJX_D;
18133 RC = &RISCV::FPR64RegClass;
18134 break;
18135 case RISCV::PseudoFROUND_D_INX:
18136 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18137 CmpOpc = RISCV::FLT_D_INX;
18138 F2IOpc = RISCV::FCVT_L_D_INX;
18139 I2FOpc = RISCV::FCVT_D_L_INX;
18140 FSGNJOpc = RISCV::FSGNJ_D_INX;
18141 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18142 RC = &RISCV::GPRRegClass;
18143 break;
18144 }
18145
18146 const BasicBlock *BB = MBB->getBasicBlock();
18147 DebugLoc DL = MI.getDebugLoc();
18149
18151 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18152 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18153
18154 F->insert(I, CvtMBB);
18155 F->insert(I, DoneMBB);
18156 // Move all instructions after the sequence to DoneMBB.
18157 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18158 MBB->end());
18159 // Update machine-CFG edges by transferring all successors of the current
18160 // block to the new block which will contain the Phi nodes for the selects.
18162 // Set the successors for MBB.
18163 MBB->addSuccessor(CvtMBB);
18164 MBB->addSuccessor(DoneMBB);
18165
18166 Register DstReg = MI.getOperand(0).getReg();
18167 Register SrcReg = MI.getOperand(1).getReg();
18168 Register MaxReg = MI.getOperand(2).getReg();
18169 int64_t FRM = MI.getOperand(3).getImm();
18170
18171 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18173
18174 Register FabsReg = MRI.createVirtualRegister(RC);
18175 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18176
18177 // Compare the FP value to the max value.
18178 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18179 auto MIB =
18180 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18183
18184 // Insert branch.
18185 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18186 .addReg(CmpReg)
18187 .addReg(RISCV::X0)
18188 .addMBB(DoneMBB);
18189
18190 CvtMBB->addSuccessor(DoneMBB);
18191
18192 // Convert to integer.
18193 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18194 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18197
18198 // Convert back to FP.
18199 Register I2FReg = MRI.createVirtualRegister(RC);
18200 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18203
18204 // Restore the sign bit.
18205 Register CvtReg = MRI.createVirtualRegister(RC);
18206 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18207
18208 // Merge the results.
18209 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18210 .addReg(SrcReg)
18211 .addMBB(MBB)
18212 .addReg(CvtReg)
18213 .addMBB(CvtMBB);
18214
18215 MI.eraseFromParent();
18216 return DoneMBB;
18217}
18218
18221 MachineBasicBlock *BB) const {
18222 switch (MI.getOpcode()) {
18223 default:
18224 llvm_unreachable("Unexpected instr type to insert");
18225 case RISCV::ReadCounterWide:
18226 assert(!Subtarget.is64Bit() &&
18227 "ReadCounterWide is only to be used on riscv32");
18228 return emitReadCounterWidePseudo(MI, BB);
18229 case RISCV::Select_GPR_Using_CC_GPR:
18230 case RISCV::Select_GPR_Using_CC_Imm:
18231 case RISCV::Select_FPR16_Using_CC_GPR:
18232 case RISCV::Select_FPR16INX_Using_CC_GPR:
18233 case RISCV::Select_FPR32_Using_CC_GPR:
18234 case RISCV::Select_FPR32INX_Using_CC_GPR:
18235 case RISCV::Select_FPR64_Using_CC_GPR:
18236 case RISCV::Select_FPR64INX_Using_CC_GPR:
18237 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18238 return emitSelectPseudo(MI, BB, Subtarget);
18239 case RISCV::BuildPairF64Pseudo:
18240 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18241 case RISCV::SplitF64Pseudo:
18242 return emitSplitF64Pseudo(MI, BB, Subtarget);
18243 case RISCV::PseudoQuietFLE_H:
18244 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18245 case RISCV::PseudoQuietFLE_H_INX:
18246 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18247 case RISCV::PseudoQuietFLT_H:
18248 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18249 case RISCV::PseudoQuietFLT_H_INX:
18250 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18251 case RISCV::PseudoQuietFLE_S:
18252 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18253 case RISCV::PseudoQuietFLE_S_INX:
18254 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18255 case RISCV::PseudoQuietFLT_S:
18256 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18257 case RISCV::PseudoQuietFLT_S_INX:
18258 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18259 case RISCV::PseudoQuietFLE_D:
18260 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18261 case RISCV::PseudoQuietFLE_D_INX:
18262 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18263 case RISCV::PseudoQuietFLE_D_IN32X:
18264 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18265 Subtarget);
18266 case RISCV::PseudoQuietFLT_D:
18267 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18268 case RISCV::PseudoQuietFLT_D_INX:
18269 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18270 case RISCV::PseudoQuietFLT_D_IN32X:
18271 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18272 Subtarget);
18273
18274 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18275 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18276 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18277 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18278 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18279 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18280 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18281 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18282 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18283 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18284 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18285 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18286 case RISCV::PseudoFROUND_H:
18287 case RISCV::PseudoFROUND_H_INX:
18288 case RISCV::PseudoFROUND_S:
18289 case RISCV::PseudoFROUND_S_INX:
18290 case RISCV::PseudoFROUND_D:
18291 case RISCV::PseudoFROUND_D_INX:
18292 case RISCV::PseudoFROUND_D_IN32X:
18293 return emitFROUND(MI, BB, Subtarget);
18294 case TargetOpcode::STATEPOINT:
18295 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18296 // while jal call instruction (where statepoint will be lowered at the end)
18297 // has implicit def. This def is early-clobber as it will be set at
18298 // the moment of the call and earlier than any use is read.
18299 // Add this implicit dead def here as a workaround.
18300 MI.addOperand(*MI.getMF(),
18302 RISCV::X1, /*isDef*/ true,
18303 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18304 /*isUndef*/ false, /*isEarlyClobber*/ true));
18305 [[fallthrough]];
18306 case TargetOpcode::STACKMAP:
18307 case TargetOpcode::PATCHPOINT:
18308 if (!Subtarget.is64Bit())
18309 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18310 "supported on 64-bit targets");
18311 return emitPatchPoint(MI, BB);
18312 }
18313}
18314
18316 SDNode *Node) const {
18317 // Add FRM dependency to any instructions with dynamic rounding mode.
18318 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18319 if (Idx < 0) {
18320 // Vector pseudos have FRM index indicated by TSFlags.
18321 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18322 if (Idx < 0)
18323 return;
18324 }
18325 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18326 return;
18327 // If the instruction already reads FRM, don't add another read.
18328 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18329 return;
18330 MI.addOperand(
18331 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18332}
18333
18334// Calling Convention Implementation.
18335// The expectations for frontend ABI lowering vary from target to target.
18336// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18337// details, but this is a longer term goal. For now, we simply try to keep the
18338// role of the frontend as simple and well-defined as possible. The rules can
18339// be summarised as:
18340// * Never split up large scalar arguments. We handle them here.
18341// * If a hardfloat calling convention is being used, and the struct may be
18342// passed in a pair of registers (fp+fp, int+fp), and both registers are
18343// available, then pass as two separate arguments. If either the GPRs or FPRs
18344// are exhausted, then pass according to the rule below.
18345// * If a struct could never be passed in registers or directly in a stack
18346// slot (as it is larger than 2*XLEN and the floating point rules don't
18347// apply), then pass it using a pointer with the byval attribute.
18348// * If a struct is less than 2*XLEN, then coerce to either a two-element
18349// word-sized array or a 2*XLEN scalar (depending on alignment).
18350// * The frontend can determine whether a struct is returned by reference or
18351// not based on its size and fields. If it will be returned by reference, the
18352// frontend must modify the prototype so a pointer with the sret annotation is
18353// passed as the first argument. This is not necessary for large scalar
18354// returns.
18355// * Struct return values and varargs should be coerced to structs containing
18356// register-size fields in the same situations they would be for fixed
18357// arguments.
18358
18359static const MCPhysReg ArgFPR16s[] = {
18360 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18361 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18362};
18363static const MCPhysReg ArgFPR32s[] = {
18364 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18365 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18366};
18367static const MCPhysReg ArgFPR64s[] = {
18368 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18369 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18370};
18371// This is an interim calling convention and it may be changed in the future.
18372static const MCPhysReg ArgVRs[] = {
18373 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18374 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18375 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18376static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18377 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18378 RISCV::V20M2, RISCV::V22M2};
18379static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18380 RISCV::V20M4};
18381static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18382
18384 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18385 // the ILP32E ABI.
18386 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18387 RISCV::X13, RISCV::X14, RISCV::X15,
18388 RISCV::X16, RISCV::X17};
18389 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18390 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18391 RISCV::X13, RISCV::X14, RISCV::X15};
18392
18393 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18394 return ArrayRef(ArgEGPRs);
18395
18396 return ArrayRef(ArgIGPRs);
18397}
18398
18400 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18401 // for save-restore libcall, so we don't use them.
18402 static const MCPhysReg FastCCIGPRs[] = {
18403 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18404 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18405 RISCV::X29, RISCV::X30, RISCV::X31};
18406
18407 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18408 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18409 RISCV::X13, RISCV::X14, RISCV::X15,
18410 RISCV::X7};
18411
18412 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18413 return ArrayRef(FastCCEGPRs);
18414
18415 return ArrayRef(FastCCIGPRs);
18416}
18417
18418// Pass a 2*XLEN argument that has been split into two XLEN values through
18419// registers or the stack as necessary.
18420static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18421 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18422 MVT ValVT2, MVT LocVT2,
18423 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18424 unsigned XLenInBytes = XLen / 8;
18425 const RISCVSubtarget &STI =
18428
18429 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18430 // At least one half can be passed via register.
18431 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18432 VA1.getLocVT(), CCValAssign::Full));
18433 } else {
18434 // Both halves must be passed on the stack, with proper alignment.
18435 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18436 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18437 Align StackAlign(XLenInBytes);
18438 if (!EABI || XLen != 32)
18439 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18440 State.addLoc(
18442 State.AllocateStack(XLenInBytes, StackAlign),
18443 VA1.getLocVT(), CCValAssign::Full));
18445 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18446 LocVT2, CCValAssign::Full));
18447 return false;
18448 }
18449
18450 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18451 // The second half can also be passed via register.
18452 State.addLoc(
18453 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18454 } else {
18455 // The second half is passed via the stack, without additional alignment.
18457 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18458 LocVT2, CCValAssign::Full));
18459 }
18460
18461 return false;
18462}
18463
18464// Implements the RISC-V calling convention. Returns true upon failure.
18465bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18466 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18467 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18468 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18469 RVVArgDispatcher &RVVDispatcher) {
18470 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18471 assert(XLen == 32 || XLen == 64);
18472 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18473
18474 // Static chain parameter must not be passed in normal argument registers,
18475 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18476 if (ArgFlags.isNest()) {
18477 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18478 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18479 return false;
18480 }
18481 }
18482
18483 // Any return value split in to more than two values can't be returned
18484 // directly. Vectors are returned via the available vector registers.
18485 if (!LocVT.isVector() && IsRet && ValNo > 1)
18486 return true;
18487
18488 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18489 // variadic argument, or if no F16/F32 argument registers are available.
18490 bool UseGPRForF16_F32 = true;
18491 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18492 // variadic argument, or if no F64 argument registers are available.
18493 bool UseGPRForF64 = true;
18494
18495 switch (ABI) {
18496 default:
18497 llvm_unreachable("Unexpected ABI");
18500 case RISCVABI::ABI_LP64:
18502 break;
18505 UseGPRForF16_F32 = !IsFixed;
18506 break;
18509 UseGPRForF16_F32 = !IsFixed;
18510 UseGPRForF64 = !IsFixed;
18511 break;
18512 }
18513
18514 // FPR16, FPR32, and FPR64 alias each other.
18515 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18516 UseGPRForF16_F32 = true;
18517 UseGPRForF64 = true;
18518 }
18519
18520 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18521 // similar local variables rather than directly checking against the target
18522 // ABI.
18523
18524 if (UseGPRForF16_F32 &&
18525 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18526 LocVT = XLenVT;
18527 LocInfo = CCValAssign::BCvt;
18528 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18529 LocVT = MVT::i64;
18530 LocInfo = CCValAssign::BCvt;
18531 }
18532
18534
18535 // If this is a variadic argument, the RISC-V calling convention requires
18536 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18537 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18538 // be used regardless of whether the original argument was split during
18539 // legalisation or not. The argument will not be passed by registers if the
18540 // original type is larger than 2*XLEN, so the register alignment rule does
18541 // not apply.
18542 // TODO: To be compatible with GCC's behaviors, we don't align registers
18543 // currently if we are using ILP32E calling convention. This behavior may be
18544 // changed when RV32E/ILP32E is ratified.
18545 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18546 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18547 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18548 ABI != RISCVABI::ABI_ILP32E) {
18549 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18550 // Skip 'odd' register if necessary.
18551 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18552 State.AllocateReg(ArgGPRs);
18553 }
18554
18555 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18556 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18557 State.getPendingArgFlags();
18558
18559 assert(PendingLocs.size() == PendingArgFlags.size() &&
18560 "PendingLocs and PendingArgFlags out of sync");
18561
18562 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18563 // registers are exhausted.
18564 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18565 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18566 // Depending on available argument GPRS, f64 may be passed in a pair of
18567 // GPRs, split between a GPR and the stack, or passed completely on the
18568 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18569 // cases.
18570 Register Reg = State.AllocateReg(ArgGPRs);
18571 if (!Reg) {
18572 unsigned StackOffset = State.AllocateStack(8, Align(8));
18573 State.addLoc(
18574 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18575 return false;
18576 }
18577 LocVT = MVT::i32;
18578 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18579 Register HiReg = State.AllocateReg(ArgGPRs);
18580 if (HiReg) {
18581 State.addLoc(
18582 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18583 } else {
18584 unsigned StackOffset = State.AllocateStack(4, Align(4));
18585 State.addLoc(
18586 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18587 }
18588 return false;
18589 }
18590
18591 // Fixed-length vectors are located in the corresponding scalable-vector
18592 // container types.
18593 if (ValVT.isFixedLengthVector())
18594 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18595
18596 // Split arguments might be passed indirectly, so keep track of the pending
18597 // values. Split vectors are passed via a mix of registers and indirectly, so
18598 // treat them as we would any other argument.
18599 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18600 LocVT = XLenVT;
18601 LocInfo = CCValAssign::Indirect;
18602 PendingLocs.push_back(
18603 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18604 PendingArgFlags.push_back(ArgFlags);
18605 if (!ArgFlags.isSplitEnd()) {
18606 return false;
18607 }
18608 }
18609
18610 // If the split argument only had two elements, it should be passed directly
18611 // in registers or on the stack.
18612 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18613 PendingLocs.size() <= 2) {
18614 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18615 // Apply the normal calling convention rules to the first half of the
18616 // split argument.
18617 CCValAssign VA = PendingLocs[0];
18618 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18619 PendingLocs.clear();
18620 PendingArgFlags.clear();
18621 return CC_RISCVAssign2XLen(
18622 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18623 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18624 }
18625
18626 // Allocate to a register if possible, or else a stack slot.
18627 Register Reg;
18628 unsigned StoreSizeBytes = XLen / 8;
18629 Align StackAlign = Align(XLen / 8);
18630
18631 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18632 Reg = State.AllocateReg(ArgFPR16s);
18633 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18634 Reg = State.AllocateReg(ArgFPR32s);
18635 else if (ValVT == MVT::f64 && !UseGPRForF64)
18636 Reg = State.AllocateReg(ArgFPR64s);
18637 else if (ValVT.isVector()) {
18638 Reg = RVVDispatcher.getNextPhysReg();
18639 if (!Reg) {
18640 // For return values, the vector must be passed fully via registers or
18641 // via the stack.
18642 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18643 // but we're using all of them.
18644 if (IsRet)
18645 return true;
18646 // Try using a GPR to pass the address
18647 if ((Reg = State.AllocateReg(ArgGPRs))) {
18648 LocVT = XLenVT;
18649 LocInfo = CCValAssign::Indirect;
18650 } else if (ValVT.isScalableVector()) {
18651 LocVT = XLenVT;
18652 LocInfo = CCValAssign::Indirect;
18653 } else {
18654 // Pass fixed-length vectors on the stack.
18655 LocVT = ValVT;
18656 StoreSizeBytes = ValVT.getStoreSize();
18657 // Align vectors to their element sizes, being careful for vXi1
18658 // vectors.
18659 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18660 }
18661 }
18662 } else {
18663 Reg = State.AllocateReg(ArgGPRs);
18664 }
18665
18666 unsigned StackOffset =
18667 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18668
18669 // If we reach this point and PendingLocs is non-empty, we must be at the
18670 // end of a split argument that must be passed indirectly.
18671 if (!PendingLocs.empty()) {
18672 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18673 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18674
18675 for (auto &It : PendingLocs) {
18676 if (Reg)
18677 It.convertToReg(Reg);
18678 else
18679 It.convertToMem(StackOffset);
18680 State.addLoc(It);
18681 }
18682 PendingLocs.clear();
18683 PendingArgFlags.clear();
18684 return false;
18685 }
18686
18687 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18688 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18689 "Expected an XLenVT or vector types at this stage");
18690
18691 if (Reg) {
18692 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18693 return false;
18694 }
18695
18696 // When a scalar floating-point value is passed on the stack, no
18697 // bit-conversion is needed.
18698 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18699 assert(!ValVT.isVector());
18700 LocVT = ValVT;
18701 LocInfo = CCValAssign::Full;
18702 }
18703 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18704 return false;
18705}
18706
18707template <typename ArgTy>
18708static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18709 for (const auto &ArgIdx : enumerate(Args)) {
18710 MVT ArgVT = ArgIdx.value().VT;
18711 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18712 return ArgIdx.index();
18713 }
18714 return std::nullopt;
18715}
18716
18717void RISCVTargetLowering::analyzeInputArgs(
18718 MachineFunction &MF, CCState &CCInfo,
18719 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18720 RISCVCCAssignFn Fn) const {
18721 unsigned NumArgs = Ins.size();
18723
18724 RVVArgDispatcher Dispatcher;
18725 if (IsRet) {
18726 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18727 } else {
18728 SmallVector<Type *, 4> TypeList;
18729 for (const Argument &Arg : MF.getFunction().args())
18730 TypeList.push_back(Arg.getType());
18731 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18732 }
18733
18734 for (unsigned i = 0; i != NumArgs; ++i) {
18735 MVT ArgVT = Ins[i].VT;
18736 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18737
18738 Type *ArgTy = nullptr;
18739 if (IsRet)
18740 ArgTy = FType->getReturnType();
18741 else if (Ins[i].isOrigArg())
18742 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18743
18745 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18746 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18747 Dispatcher)) {
18748 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18749 << ArgVT << '\n');
18750 llvm_unreachable(nullptr);
18751 }
18752 }
18753}
18754
18755void RISCVTargetLowering::analyzeOutputArgs(
18756 MachineFunction &MF, CCState &CCInfo,
18757 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18758 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18759 unsigned NumArgs = Outs.size();
18760
18761 SmallVector<Type *, 4> TypeList;
18762 if (IsRet)
18763 TypeList.push_back(MF.getFunction().getReturnType());
18764 else if (CLI)
18765 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18766 TypeList.push_back(Arg.Ty);
18767 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18768
18769 for (unsigned i = 0; i != NumArgs; i++) {
18770 MVT ArgVT = Outs[i].VT;
18771 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18772 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18773
18775 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18776 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18777 Dispatcher)) {
18778 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18779 << ArgVT << "\n");
18780 llvm_unreachable(nullptr);
18781 }
18782 }
18783}
18784
18785// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18786// values.
18788 const CCValAssign &VA, const SDLoc &DL,
18789 const RISCVSubtarget &Subtarget) {
18790 switch (VA.getLocInfo()) {
18791 default:
18792 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18793 case CCValAssign::Full:
18795 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18796 break;
18797 case CCValAssign::BCvt:
18798 if (VA.getLocVT().isInteger() &&
18799 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18800 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18801 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18802 if (RV64LegalI32) {
18803 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18804 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18805 } else {
18806 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18807 }
18808 } else {
18809 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18810 }
18811 break;
18812 }
18813 return Val;
18814}
18815
18816// The caller is responsible for loading the full value if the argument is
18817// passed with CCValAssign::Indirect.
18819 const CCValAssign &VA, const SDLoc &DL,
18820 const ISD::InputArg &In,
18821 const RISCVTargetLowering &TLI) {
18824 EVT LocVT = VA.getLocVT();
18825 SDValue Val;
18826 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18827 Register VReg = RegInfo.createVirtualRegister(RC);
18828 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18829 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18830
18831 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18832 if (In.isOrigArg()) {
18833 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18834 if (OrigArg->getType()->isIntegerTy()) {
18835 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18836 // An input zero extended from i31 can also be considered sign extended.
18837 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18838 (BitWidth < 32 && In.Flags.isZExt())) {
18840 RVFI->addSExt32Register(VReg);
18841 }
18842 }
18843 }
18844
18846 return Val;
18847
18848 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18849}
18850
18852 const CCValAssign &VA, const SDLoc &DL,
18853 const RISCVSubtarget &Subtarget) {
18854 EVT LocVT = VA.getLocVT();
18855
18856 switch (VA.getLocInfo()) {
18857 default:
18858 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18859 case CCValAssign::Full:
18860 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18861 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18862 break;
18863 case CCValAssign::BCvt:
18864 if (LocVT.isInteger() &&
18865 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18866 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18867 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18868 if (RV64LegalI32) {
18869 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18870 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18871 } else {
18872 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18873 }
18874 } else {
18875 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18876 }
18877 break;
18878 }
18879 return Val;
18880}
18881
18882// The caller is responsible for loading the full value if the argument is
18883// passed with CCValAssign::Indirect.
18885 const CCValAssign &VA, const SDLoc &DL) {
18887 MachineFrameInfo &MFI = MF.getFrameInfo();
18888 EVT LocVT = VA.getLocVT();
18889 EVT ValVT = VA.getValVT();
18891 if (ValVT.isScalableVector()) {
18892 // When the value is a scalable vector, we save the pointer which points to
18893 // the scalable vector value in the stack. The ValVT will be the pointer
18894 // type, instead of the scalable vector type.
18895 ValVT = LocVT;
18896 }
18897 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18898 /*IsImmutable=*/true);
18899 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18900 SDValue Val;
18901
18902 ISD::LoadExtType ExtType;
18903 switch (VA.getLocInfo()) {
18904 default:
18905 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18906 case CCValAssign::Full:
18908 case CCValAssign::BCvt:
18909 ExtType = ISD::NON_EXTLOAD;
18910 break;
18911 }
18912 Val = DAG.getExtLoad(
18913 ExtType, DL, LocVT, Chain, FIN,
18915 return Val;
18916}
18917
18919 const CCValAssign &VA,
18920 const CCValAssign &HiVA,
18921 const SDLoc &DL) {
18922 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18923 "Unexpected VA");
18925 MachineFrameInfo &MFI = MF.getFrameInfo();
18927
18928 assert(VA.isRegLoc() && "Expected register VA assignment");
18929
18930 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18931 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18932 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18933 SDValue Hi;
18934 if (HiVA.isMemLoc()) {
18935 // Second half of f64 is passed on the stack.
18936 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18937 /*IsImmutable=*/true);
18938 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18939 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18941 } else {
18942 // Second half of f64 is passed in another GPR.
18943 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18944 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18945 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18946 }
18947 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18948}
18949
18950// FastCC has less than 1% performance improvement for some particular
18951// benchmark. But theoretically, it may has benenfit for some cases.
18953 unsigned ValNo, MVT ValVT, MVT LocVT,
18954 CCValAssign::LocInfo LocInfo,
18955 ISD::ArgFlagsTy ArgFlags, CCState &State,
18956 bool IsFixed, bool IsRet, Type *OrigTy,
18957 const RISCVTargetLowering &TLI,
18958 RVVArgDispatcher &RVVDispatcher) {
18959 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18960 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18961 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18962 return false;
18963 }
18964 }
18965
18966 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18967
18968 if (LocVT == MVT::f16 &&
18969 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18970 static const MCPhysReg FPR16List[] = {
18971 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18972 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18973 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18974 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18975 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18976 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18977 return false;
18978 }
18979 }
18980
18981 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18982 static const MCPhysReg FPR32List[] = {
18983 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18984 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18985 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18986 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18987 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18988 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18989 return false;
18990 }
18991 }
18992
18993 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18994 static const MCPhysReg FPR64List[] = {
18995 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18996 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18997 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18998 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18999 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19000 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19001 return false;
19002 }
19003 }
19004
19005 // Check if there is an available GPR before hitting the stack.
19006 if ((LocVT == MVT::f16 &&
19007 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19008 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19009 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19010 Subtarget.hasStdExtZdinx())) {
19011 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19012 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19013 return false;
19014 }
19015 }
19016
19017 if (LocVT == MVT::f16) {
19018 unsigned Offset2 = State.AllocateStack(2, Align(2));
19019 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19020 return false;
19021 }
19022
19023 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19024 unsigned Offset4 = State.AllocateStack(4, Align(4));
19025 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19026 return false;
19027 }
19028
19029 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19030 unsigned Offset5 = State.AllocateStack(8, Align(8));
19031 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19032 return false;
19033 }
19034
19035 if (LocVT.isVector()) {
19036 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19037 if (AllocatedVReg) {
19038 // Fixed-length vectors are located in the corresponding scalable-vector
19039 // container types.
19040 if (ValVT.isFixedLengthVector())
19041 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19042 State.addLoc(
19043 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19044 } else {
19045 // Try and pass the address via a "fast" GPR.
19046 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19047 LocInfo = CCValAssign::Indirect;
19048 LocVT = TLI.getSubtarget().getXLenVT();
19049 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19050 } else if (ValVT.isFixedLengthVector()) {
19051 auto StackAlign =
19053 unsigned StackOffset =
19054 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19055 State.addLoc(
19056 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19057 } else {
19058 // Can't pass scalable vectors on the stack.
19059 return true;
19060 }
19061 }
19062
19063 return false;
19064 }
19065
19066 return true; // CC didn't match.
19067}
19068
19069bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19070 CCValAssign::LocInfo LocInfo,
19071 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19072 if (ArgFlags.isNest()) {
19074 "Attribute 'nest' is not supported in GHC calling convention");
19075 }
19076
19077 static const MCPhysReg GPRList[] = {
19078 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19079 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19080
19081 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19082 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19083 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19084 if (unsigned Reg = State.AllocateReg(GPRList)) {
19085 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19086 return false;
19087 }
19088 }
19089
19090 const RISCVSubtarget &Subtarget =
19092
19093 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19094 // Pass in STG registers: F1, ..., F6
19095 // fs0 ... fs5
19096 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19097 RISCV::F18_F, RISCV::F19_F,
19098 RISCV::F20_F, RISCV::F21_F};
19099 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19100 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19101 return false;
19102 }
19103 }
19104
19105 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19106 // Pass in STG registers: D1, ..., D6
19107 // fs6 ... fs11
19108 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19109 RISCV::F24_D, RISCV::F25_D,
19110 RISCV::F26_D, RISCV::F27_D};
19111 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19112 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19113 return false;
19114 }
19115 }
19116
19117 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19118 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19119 Subtarget.is64Bit())) {
19120 if (unsigned Reg = State.AllocateReg(GPRList)) {
19121 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19122 return false;
19123 }
19124 }
19125
19126 report_fatal_error("No registers left in GHC calling convention");
19127 return true;
19128}
19129
19130// Transform physical registers into virtual registers.
19132 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19133 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19134 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19135
19137
19138 switch (CallConv) {
19139 default:
19140 report_fatal_error("Unsupported calling convention");
19141 case CallingConv::C:
19142 case CallingConv::Fast:
19144 case CallingConv::GRAAL:
19146 break;
19147 case CallingConv::GHC:
19148 if (Subtarget.hasStdExtE())
19149 report_fatal_error("GHC calling convention is not supported on RVE!");
19150 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19151 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19152 "(Zdinx/D) instruction set extensions");
19153 }
19154
19155 const Function &Func = MF.getFunction();
19156 if (Func.hasFnAttribute("interrupt")) {
19157 if (!Func.arg_empty())
19159 "Functions with the interrupt attribute cannot have arguments!");
19160
19161 StringRef Kind =
19162 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19163
19164 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19166 "Function interrupt attribute argument not supported!");
19167 }
19168
19169 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19170 MVT XLenVT = Subtarget.getXLenVT();
19171 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19172 // Used with vargs to acumulate store chains.
19173 std::vector<SDValue> OutChains;
19174
19175 // Assign locations to all of the incoming arguments.
19177 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19178
19179 if (CallConv == CallingConv::GHC)
19181 else
19182 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19184 : RISCV::CC_RISCV);
19185
19186 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19187 CCValAssign &VA = ArgLocs[i];
19188 SDValue ArgValue;
19189 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19190 // case.
19191 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19192 assert(VA.needsCustom());
19193 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19194 } else if (VA.isRegLoc())
19195 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19196 else
19197 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19198
19199 if (VA.getLocInfo() == CCValAssign::Indirect) {
19200 // If the original argument was split and passed by reference (e.g. i128
19201 // on RV32), we need to load all parts of it here (using the same
19202 // address). Vectors may be partly split to registers and partly to the
19203 // stack, in which case the base address is partly offset and subsequent
19204 // stores are relative to that.
19205 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19207 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19208 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19209 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19210 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19211 CCValAssign &PartVA = ArgLocs[i + 1];
19212 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19213 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19214 if (PartVA.getValVT().isScalableVector())
19215 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19216 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19217 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19219 ++i;
19220 ++InsIdx;
19221 }
19222 continue;
19223 }
19224 InVals.push_back(ArgValue);
19225 }
19226
19227 if (any_of(ArgLocs,
19228 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19229 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19230
19231 if (IsVarArg) {
19232 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19233 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19234 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19235 MachineFrameInfo &MFI = MF.getFrameInfo();
19236 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19238
19239 // Size of the vararg save area. For now, the varargs save area is either
19240 // zero or large enough to hold a0-a7.
19241 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19242 int FI;
19243
19244 // If all registers are allocated, then all varargs must be passed on the
19245 // stack and we don't need to save any argregs.
19246 if (VarArgsSaveSize == 0) {
19247 int VaArgOffset = CCInfo.getStackSize();
19248 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19249 } else {
19250 int VaArgOffset = -VarArgsSaveSize;
19251 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19252
19253 // If saving an odd number of registers then create an extra stack slot to
19254 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19255 // offsets to even-numbered registered remain 2*XLEN-aligned.
19256 if (Idx % 2) {
19258 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19259 VarArgsSaveSize += XLenInBytes;
19260 }
19261
19262 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19263
19264 // Copy the integer registers that may have been used for passing varargs
19265 // to the vararg save area.
19266 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19267 const Register Reg = RegInfo.createVirtualRegister(RC);
19268 RegInfo.addLiveIn(ArgRegs[I], Reg);
19269 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19270 SDValue Store = DAG.getStore(
19271 Chain, DL, ArgValue, FIN,
19272 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19273 OutChains.push_back(Store);
19274 FIN =
19275 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19276 }
19277 }
19278
19279 // Record the frame index of the first variable argument
19280 // which is a value necessary to VASTART.
19281 RVFI->setVarArgsFrameIndex(FI);
19282 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19283 }
19284
19285 // All stores are grouped in one node to allow the matching between
19286 // the size of Ins and InVals. This only happens for vararg functions.
19287 if (!OutChains.empty()) {
19288 OutChains.push_back(Chain);
19289 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19290 }
19291
19292 return Chain;
19293}
19294
19295/// isEligibleForTailCallOptimization - Check whether the call is eligible
19296/// for tail call optimization.
19297/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19298bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19299 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19300 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19301
19302 auto CalleeCC = CLI.CallConv;
19303 auto &Outs = CLI.Outs;
19304 auto &Caller = MF.getFunction();
19305 auto CallerCC = Caller.getCallingConv();
19306
19307 // Exception-handling functions need a special set of instructions to
19308 // indicate a return to the hardware. Tail-calling another function would
19309 // probably break this.
19310 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19311 // should be expanded as new function attributes are introduced.
19312 if (Caller.hasFnAttribute("interrupt"))
19313 return false;
19314
19315 // Do not tail call opt if the stack is used to pass parameters.
19316 if (CCInfo.getStackSize() != 0)
19317 return false;
19318
19319 // Do not tail call opt if any parameters need to be passed indirectly.
19320 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19321 // passed indirectly. So the address of the value will be passed in a
19322 // register, or if not available, then the address is put on the stack. In
19323 // order to pass indirectly, space on the stack often needs to be allocated
19324 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19325 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19326 // are passed CCValAssign::Indirect.
19327 for (auto &VA : ArgLocs)
19328 if (VA.getLocInfo() == CCValAssign::Indirect)
19329 return false;
19330
19331 // Do not tail call opt if either caller or callee uses struct return
19332 // semantics.
19333 auto IsCallerStructRet = Caller.hasStructRetAttr();
19334 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19335 if (IsCallerStructRet || IsCalleeStructRet)
19336 return false;
19337
19338 // The callee has to preserve all registers the caller needs to preserve.
19339 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19340 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19341 if (CalleeCC != CallerCC) {
19342 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19343 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19344 return false;
19345 }
19346
19347 // Byval parameters hand the function a pointer directly into the stack area
19348 // we want to reuse during a tail call. Working around this *is* possible
19349 // but less efficient and uglier in LowerCall.
19350 for (auto &Arg : Outs)
19351 if (Arg.Flags.isByVal())
19352 return false;
19353
19354 return true;
19355}
19356
19358 return DAG.getDataLayout().getPrefTypeAlign(
19359 VT.getTypeForEVT(*DAG.getContext()));
19360}
19361
19362// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19363// and output parameter nodes.
19365 SmallVectorImpl<SDValue> &InVals) const {
19366 SelectionDAG &DAG = CLI.DAG;
19367 SDLoc &DL = CLI.DL;
19369 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19371 SDValue Chain = CLI.Chain;
19372 SDValue Callee = CLI.Callee;
19373 bool &IsTailCall = CLI.IsTailCall;
19374 CallingConv::ID CallConv = CLI.CallConv;
19375 bool IsVarArg = CLI.IsVarArg;
19376 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19377 MVT XLenVT = Subtarget.getXLenVT();
19378
19380
19381 // Analyze the operands of the call, assigning locations to each operand.
19383 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19384
19385 if (CallConv == CallingConv::GHC) {
19386 if (Subtarget.hasStdExtE())
19387 report_fatal_error("GHC calling convention is not supported on RVE!");
19389 } else
19390 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19392 : RISCV::CC_RISCV);
19393
19394 // Check if it's really possible to do a tail call.
19395 if (IsTailCall)
19396 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19397
19398 if (IsTailCall)
19399 ++NumTailCalls;
19400 else if (CLI.CB && CLI.CB->isMustTailCall())
19401 report_fatal_error("failed to perform tail call elimination on a call "
19402 "site marked musttail");
19403
19404 // Get a count of how many bytes are to be pushed on the stack.
19405 unsigned NumBytes = ArgCCInfo.getStackSize();
19406
19407 // Create local copies for byval args
19408 SmallVector<SDValue, 8> ByValArgs;
19409 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19410 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19411 if (!Flags.isByVal())
19412 continue;
19413
19414 SDValue Arg = OutVals[i];
19415 unsigned Size = Flags.getByValSize();
19416 Align Alignment = Flags.getNonZeroByValAlign();
19417
19418 int FI =
19419 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19420 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19421 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19422
19423 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19424 /*IsVolatile=*/false,
19425 /*AlwaysInline=*/false, IsTailCall,
19427 ByValArgs.push_back(FIPtr);
19428 }
19429
19430 if (!IsTailCall)
19431 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19432
19433 // Copy argument values to their designated locations.
19435 SmallVector<SDValue, 8> MemOpChains;
19436 SDValue StackPtr;
19437 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19438 ++i, ++OutIdx) {
19439 CCValAssign &VA = ArgLocs[i];
19440 SDValue ArgValue = OutVals[OutIdx];
19441 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19442
19443 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19444 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19445 assert(VA.isRegLoc() && "Expected register VA assignment");
19446 assert(VA.needsCustom());
19447 SDValue SplitF64 = DAG.getNode(
19448 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19449 SDValue Lo = SplitF64.getValue(0);
19450 SDValue Hi = SplitF64.getValue(1);
19451
19452 Register RegLo = VA.getLocReg();
19453 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19454
19455 // Get the CCValAssign for the Hi part.
19456 CCValAssign &HiVA = ArgLocs[++i];
19457
19458 if (HiVA.isMemLoc()) {
19459 // Second half of f64 is passed on the stack.
19460 if (!StackPtr.getNode())
19461 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19463 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19464 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19465 // Emit the store.
19466 MemOpChains.push_back(
19467 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19468 } else {
19469 // Second half of f64 is passed in another GPR.
19470 Register RegHigh = HiVA.getLocReg();
19471 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19472 }
19473 continue;
19474 }
19475
19476 // Promote the value if needed.
19477 // For now, only handle fully promoted and indirect arguments.
19478 if (VA.getLocInfo() == CCValAssign::Indirect) {
19479 // Store the argument in a stack slot and pass its address.
19480 Align StackAlign =
19481 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19482 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19483 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19484 // If the original argument was split (e.g. i128), we need
19485 // to store the required parts of it here (and pass just one address).
19486 // Vectors may be partly split to registers and partly to the stack, in
19487 // which case the base address is partly offset and subsequent stores are
19488 // relative to that.
19489 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19490 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19491 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19492 // Calculate the total size to store. We don't have access to what we're
19493 // actually storing other than performing the loop and collecting the
19494 // info.
19496 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19497 SDValue PartValue = OutVals[OutIdx + 1];
19498 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19499 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19500 EVT PartVT = PartValue.getValueType();
19501 if (PartVT.isScalableVector())
19502 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19503 StoredSize += PartVT.getStoreSize();
19504 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19505 Parts.push_back(std::make_pair(PartValue, Offset));
19506 ++i;
19507 ++OutIdx;
19508 }
19509 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19510 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19511 MemOpChains.push_back(
19512 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19514 for (const auto &Part : Parts) {
19515 SDValue PartValue = Part.first;
19516 SDValue PartOffset = Part.second;
19518 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19519 MemOpChains.push_back(
19520 DAG.getStore(Chain, DL, PartValue, Address,
19522 }
19523 ArgValue = SpillSlot;
19524 } else {
19525 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19526 }
19527
19528 // Use local copy if it is a byval arg.
19529 if (Flags.isByVal())
19530 ArgValue = ByValArgs[j++];
19531
19532 if (VA.isRegLoc()) {
19533 // Queue up the argument copies and emit them at the end.
19534 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19535 } else {
19536 assert(VA.isMemLoc() && "Argument not register or memory");
19537 assert(!IsTailCall && "Tail call not allowed if stack is used "
19538 "for passing parameters");
19539
19540 // Work out the address of the stack slot.
19541 if (!StackPtr.getNode())
19542 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19544 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19546
19547 // Emit the store.
19548 MemOpChains.push_back(
19549 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19550 }
19551 }
19552
19553 // Join the stores, which are independent of one another.
19554 if (!MemOpChains.empty())
19555 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19556
19557 SDValue Glue;
19558
19559 // Build a sequence of copy-to-reg nodes, chained and glued together.
19560 for (auto &Reg : RegsToPass) {
19561 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19562 Glue = Chain.getValue(1);
19563 }
19564
19565 // Validate that none of the argument registers have been marked as
19566 // reserved, if so report an error. Do the same for the return address if this
19567 // is not a tailcall.
19568 validateCCReservedRegs(RegsToPass, MF);
19569 if (!IsTailCall &&
19572 MF.getFunction(),
19573 "Return address register required, but has been reserved."});
19574
19575 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19576 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19577 // split it and then direct call can be matched by PseudoCALL.
19578 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19579 const GlobalValue *GV = S->getGlobal();
19580 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19581 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19582 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19583 }
19584
19585 // The first call operand is the chain and the second is the target address.
19587 Ops.push_back(Chain);
19588 Ops.push_back(Callee);
19589
19590 // Add argument registers to the end of the list so that they are
19591 // known live into the call.
19592 for (auto &Reg : RegsToPass)
19593 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19594
19595 if (!IsTailCall) {
19596 // Add a register mask operand representing the call-preserved registers.
19597 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19598 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19599 assert(Mask && "Missing call preserved mask for calling convention");
19600 Ops.push_back(DAG.getRegisterMask(Mask));
19601 }
19602
19603 // Glue the call to the argument copies, if any.
19604 if (Glue.getNode())
19605 Ops.push_back(Glue);
19606
19607 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19608 "Unexpected CFI type for a direct call");
19609
19610 // Emit the call.
19611 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19612
19613 if (IsTailCall) {
19615 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19616 if (CLI.CFIType)
19617 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19618 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19619 return Ret;
19620 }
19621
19622 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19623 if (CLI.CFIType)
19624 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19625 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19626 Glue = Chain.getValue(1);
19627
19628 // Mark the end of the call, which is glued to the call itself.
19629 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19630 Glue = Chain.getValue(1);
19631
19632 // Assign locations to each value returned by this call.
19634 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19635 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19636
19637 // Copy all of the result registers out of their specified physreg.
19638 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19639 auto &VA = RVLocs[i];
19640 // Copy the value out
19641 SDValue RetValue =
19642 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19643 // Glue the RetValue to the end of the call sequence
19644 Chain = RetValue.getValue(1);
19645 Glue = RetValue.getValue(2);
19646
19647 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19648 assert(VA.needsCustom());
19649 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19650 MVT::i32, Glue);
19651 Chain = RetValue2.getValue(1);
19652 Glue = RetValue2.getValue(2);
19653 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19654 RetValue2);
19655 }
19656
19657 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19658
19659 InVals.push_back(RetValue);
19660 }
19661
19662 return Chain;
19663}
19664
19666 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19667 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19669 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19670
19671 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19672
19673 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19674 MVT VT = Outs[i].VT;
19675 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19676 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19677 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19678 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19679 nullptr, *this, Dispatcher))
19680 return false;
19681 }
19682 return true;
19683}
19684
19685SDValue
19687 bool IsVarArg,
19689 const SmallVectorImpl<SDValue> &OutVals,
19690 const SDLoc &DL, SelectionDAG &DAG) const {
19692 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19693
19694 // Stores the assignment of the return value to a location.
19696
19697 // Info about the registers and stack slot.
19698 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19699 *DAG.getContext());
19700
19701 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19702 nullptr, RISCV::CC_RISCV);
19703
19704 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19705 report_fatal_error("GHC functions return void only");
19706
19707 SDValue Glue;
19708 SmallVector<SDValue, 4> RetOps(1, Chain);
19709
19710 // Copy the result values into the output registers.
19711 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19712 SDValue Val = OutVals[OutIdx];
19713 CCValAssign &VA = RVLocs[i];
19714 assert(VA.isRegLoc() && "Can only return in registers!");
19715
19716 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19717 // Handle returning f64 on RV32D with a soft float ABI.
19718 assert(VA.isRegLoc() && "Expected return via registers");
19719 assert(VA.needsCustom());
19720 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19721 DAG.getVTList(MVT::i32, MVT::i32), Val);
19722 SDValue Lo = SplitF64.getValue(0);
19723 SDValue Hi = SplitF64.getValue(1);
19724 Register RegLo = VA.getLocReg();
19725 Register RegHi = RVLocs[++i].getLocReg();
19726
19727 if (STI.isRegisterReservedByUser(RegLo) ||
19728 STI.isRegisterReservedByUser(RegHi))
19730 MF.getFunction(),
19731 "Return value register required, but has been reserved."});
19732
19733 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19734 Glue = Chain.getValue(1);
19735 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19736 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19737 Glue = Chain.getValue(1);
19738 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19739 } else {
19740 // Handle a 'normal' return.
19741 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19742 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19743
19744 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19746 MF.getFunction(),
19747 "Return value register required, but has been reserved."});
19748
19749 // Guarantee that all emitted copies are stuck together.
19750 Glue = Chain.getValue(1);
19751 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19752 }
19753 }
19754
19755 RetOps[0] = Chain; // Update chain.
19756
19757 // Add the glue node if we have it.
19758 if (Glue.getNode()) {
19759 RetOps.push_back(Glue);
19760 }
19761
19762 if (any_of(RVLocs,
19763 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19764 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19765
19766 unsigned RetOpc = RISCVISD::RET_GLUE;
19767 // Interrupt service routines use different return instructions.
19768 const Function &Func = DAG.getMachineFunction().getFunction();
19769 if (Func.hasFnAttribute("interrupt")) {
19770 if (!Func.getReturnType()->isVoidTy())
19772 "Functions with the interrupt attribute must have void return type!");
19773
19775 StringRef Kind =
19776 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19777
19778 if (Kind == "supervisor")
19779 RetOpc = RISCVISD::SRET_GLUE;
19780 else
19781 RetOpc = RISCVISD::MRET_GLUE;
19782 }
19783
19784 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19785}
19786
19787void RISCVTargetLowering::validateCCReservedRegs(
19788 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19789 MachineFunction &MF) const {
19790 const Function &F = MF.getFunction();
19791 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19792
19793 if (llvm::any_of(Regs, [&STI](auto Reg) {
19794 return STI.isRegisterReservedByUser(Reg.first);
19795 }))
19796 F.getContext().diagnose(DiagnosticInfoUnsupported{
19797 F, "Argument register required, but has been reserved."});
19798}
19799
19800// Check if the result of the node is only used as a return value, as
19801// otherwise we can't perform a tail-call.
19803 if (N->getNumValues() != 1)
19804 return false;
19805 if (!N->hasNUsesOfValue(1, 0))
19806 return false;
19807
19808 SDNode *Copy = *N->use_begin();
19809
19810 if (Copy->getOpcode() == ISD::BITCAST) {
19811 return isUsedByReturnOnly(Copy, Chain);
19812 }
19813
19814 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19815 // with soft float ABIs.
19816 if (Copy->getOpcode() != ISD::CopyToReg) {
19817 return false;
19818 }
19819
19820 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19821 // isn't safe to perform a tail call.
19822 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19823 return false;
19824
19825 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19826 bool HasRet = false;
19827 for (SDNode *Node : Copy->uses()) {
19828 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19829 return false;
19830 HasRet = true;
19831 }
19832 if (!HasRet)
19833 return false;
19834
19835 Chain = Copy->getOperand(0);
19836 return true;
19837}
19838
19840 return CI->isTailCall();
19841}
19842
19843const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19844#define NODE_NAME_CASE(NODE) \
19845 case RISCVISD::NODE: \
19846 return "RISCVISD::" #NODE;
19847 // clang-format off
19848 switch ((RISCVISD::NodeType)Opcode) {
19850 break;
19851 NODE_NAME_CASE(RET_GLUE)
19852 NODE_NAME_CASE(SRET_GLUE)
19853 NODE_NAME_CASE(MRET_GLUE)
19854 NODE_NAME_CASE(CALL)
19855 NODE_NAME_CASE(SELECT_CC)
19856 NODE_NAME_CASE(BR_CC)
19857 NODE_NAME_CASE(BuildPairF64)
19858 NODE_NAME_CASE(SplitF64)
19859 NODE_NAME_CASE(TAIL)
19860 NODE_NAME_CASE(ADD_LO)
19861 NODE_NAME_CASE(HI)
19862 NODE_NAME_CASE(LLA)
19863 NODE_NAME_CASE(ADD_TPREL)
19864 NODE_NAME_CASE(MULHSU)
19865 NODE_NAME_CASE(SHL_ADD)
19866 NODE_NAME_CASE(SLLW)
19867 NODE_NAME_CASE(SRAW)
19868 NODE_NAME_CASE(SRLW)
19869 NODE_NAME_CASE(DIVW)
19870 NODE_NAME_CASE(DIVUW)
19871 NODE_NAME_CASE(REMUW)
19872 NODE_NAME_CASE(ROLW)
19873 NODE_NAME_CASE(RORW)
19874 NODE_NAME_CASE(CLZW)
19875 NODE_NAME_CASE(CTZW)
19876 NODE_NAME_CASE(ABSW)
19877 NODE_NAME_CASE(FMV_H_X)
19878 NODE_NAME_CASE(FMV_X_ANYEXTH)
19879 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19880 NODE_NAME_CASE(FMV_W_X_RV64)
19881 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19882 NODE_NAME_CASE(FCVT_X)
19883 NODE_NAME_CASE(FCVT_XU)
19884 NODE_NAME_CASE(FCVT_W_RV64)
19885 NODE_NAME_CASE(FCVT_WU_RV64)
19886 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19887 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19888 NODE_NAME_CASE(FP_ROUND_BF16)
19889 NODE_NAME_CASE(FP_EXTEND_BF16)
19890 NODE_NAME_CASE(FROUND)
19891 NODE_NAME_CASE(FCLASS)
19892 NODE_NAME_CASE(FMAX)
19893 NODE_NAME_CASE(FMIN)
19894 NODE_NAME_CASE(READ_COUNTER_WIDE)
19895 NODE_NAME_CASE(BREV8)
19896 NODE_NAME_CASE(ORC_B)
19897 NODE_NAME_CASE(ZIP)
19898 NODE_NAME_CASE(UNZIP)
19899 NODE_NAME_CASE(CLMUL)
19900 NODE_NAME_CASE(CLMULH)
19901 NODE_NAME_CASE(CLMULR)
19902 NODE_NAME_CASE(MOPR)
19903 NODE_NAME_CASE(MOPRR)
19904 NODE_NAME_CASE(SHA256SIG0)
19905 NODE_NAME_CASE(SHA256SIG1)
19906 NODE_NAME_CASE(SHA256SUM0)
19907 NODE_NAME_CASE(SHA256SUM1)
19908 NODE_NAME_CASE(SM4KS)
19909 NODE_NAME_CASE(SM4ED)
19910 NODE_NAME_CASE(SM3P0)
19911 NODE_NAME_CASE(SM3P1)
19912 NODE_NAME_CASE(TH_LWD)
19913 NODE_NAME_CASE(TH_LWUD)
19914 NODE_NAME_CASE(TH_LDD)
19915 NODE_NAME_CASE(TH_SWD)
19916 NODE_NAME_CASE(TH_SDD)
19917 NODE_NAME_CASE(VMV_V_V_VL)
19918 NODE_NAME_CASE(VMV_V_X_VL)
19919 NODE_NAME_CASE(VFMV_V_F_VL)
19920 NODE_NAME_CASE(VMV_X_S)
19921 NODE_NAME_CASE(VMV_S_X_VL)
19922 NODE_NAME_CASE(VFMV_S_F_VL)
19923 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19924 NODE_NAME_CASE(READ_VLENB)
19925 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19926 NODE_NAME_CASE(VSLIDEUP_VL)
19927 NODE_NAME_CASE(VSLIDE1UP_VL)
19928 NODE_NAME_CASE(VSLIDEDOWN_VL)
19929 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19930 NODE_NAME_CASE(VFSLIDE1UP_VL)
19931 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19932 NODE_NAME_CASE(VID_VL)
19933 NODE_NAME_CASE(VFNCVT_ROD_VL)
19934 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19935 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19936 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19937 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19938 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19939 NODE_NAME_CASE(VECREDUCE_AND_VL)
19940 NODE_NAME_CASE(VECREDUCE_OR_VL)
19941 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19942 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19943 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19944 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19945 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19946 NODE_NAME_CASE(ADD_VL)
19947 NODE_NAME_CASE(AND_VL)
19948 NODE_NAME_CASE(MUL_VL)
19949 NODE_NAME_CASE(OR_VL)
19950 NODE_NAME_CASE(SDIV_VL)
19951 NODE_NAME_CASE(SHL_VL)
19952 NODE_NAME_CASE(SREM_VL)
19953 NODE_NAME_CASE(SRA_VL)
19954 NODE_NAME_CASE(SRL_VL)
19955 NODE_NAME_CASE(ROTL_VL)
19956 NODE_NAME_CASE(ROTR_VL)
19957 NODE_NAME_CASE(SUB_VL)
19958 NODE_NAME_CASE(UDIV_VL)
19959 NODE_NAME_CASE(UREM_VL)
19960 NODE_NAME_CASE(XOR_VL)
19961 NODE_NAME_CASE(AVGFLOORU_VL)
19962 NODE_NAME_CASE(AVGCEILU_VL)
19963 NODE_NAME_CASE(SADDSAT_VL)
19964 NODE_NAME_CASE(UADDSAT_VL)
19965 NODE_NAME_CASE(SSUBSAT_VL)
19966 NODE_NAME_CASE(USUBSAT_VL)
19967 NODE_NAME_CASE(FADD_VL)
19968 NODE_NAME_CASE(FSUB_VL)
19969 NODE_NAME_CASE(FMUL_VL)
19970 NODE_NAME_CASE(FDIV_VL)
19971 NODE_NAME_CASE(FNEG_VL)
19972 NODE_NAME_CASE(FABS_VL)
19973 NODE_NAME_CASE(FSQRT_VL)
19974 NODE_NAME_CASE(FCLASS_VL)
19975 NODE_NAME_CASE(VFMADD_VL)
19976 NODE_NAME_CASE(VFNMADD_VL)
19977 NODE_NAME_CASE(VFMSUB_VL)
19978 NODE_NAME_CASE(VFNMSUB_VL)
19979 NODE_NAME_CASE(VFWMADD_VL)
19980 NODE_NAME_CASE(VFWNMADD_VL)
19981 NODE_NAME_CASE(VFWMSUB_VL)
19982 NODE_NAME_CASE(VFWNMSUB_VL)
19983 NODE_NAME_CASE(FCOPYSIGN_VL)
19984 NODE_NAME_CASE(SMIN_VL)
19985 NODE_NAME_CASE(SMAX_VL)
19986 NODE_NAME_CASE(UMIN_VL)
19987 NODE_NAME_CASE(UMAX_VL)
19988 NODE_NAME_CASE(BITREVERSE_VL)
19989 NODE_NAME_CASE(BSWAP_VL)
19990 NODE_NAME_CASE(CTLZ_VL)
19991 NODE_NAME_CASE(CTTZ_VL)
19992 NODE_NAME_CASE(CTPOP_VL)
19993 NODE_NAME_CASE(VFMIN_VL)
19994 NODE_NAME_CASE(VFMAX_VL)
19995 NODE_NAME_CASE(MULHS_VL)
19996 NODE_NAME_CASE(MULHU_VL)
19997 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19998 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19999 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20000 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20001 NODE_NAME_CASE(VFCVT_X_F_VL)
20002 NODE_NAME_CASE(VFCVT_XU_F_VL)
20003 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20004 NODE_NAME_CASE(SINT_TO_FP_VL)
20005 NODE_NAME_CASE(UINT_TO_FP_VL)
20006 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20007 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20008 NODE_NAME_CASE(FP_EXTEND_VL)
20009 NODE_NAME_CASE(FP_ROUND_VL)
20010 NODE_NAME_CASE(STRICT_FADD_VL)
20011 NODE_NAME_CASE(STRICT_FSUB_VL)
20012 NODE_NAME_CASE(STRICT_FMUL_VL)
20013 NODE_NAME_CASE(STRICT_FDIV_VL)
20014 NODE_NAME_CASE(STRICT_FSQRT_VL)
20015 NODE_NAME_CASE(STRICT_VFMADD_VL)
20016 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20017 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20018 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20019 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20020 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20021 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20022 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20023 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20024 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20025 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20026 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20027 NODE_NAME_CASE(STRICT_FSETCC_VL)
20028 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20029 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20030 NODE_NAME_CASE(VWMUL_VL)
20031 NODE_NAME_CASE(VWMULU_VL)
20032 NODE_NAME_CASE(VWMULSU_VL)
20033 NODE_NAME_CASE(VWADD_VL)
20034 NODE_NAME_CASE(VWADDU_VL)
20035 NODE_NAME_CASE(VWSUB_VL)
20036 NODE_NAME_CASE(VWSUBU_VL)
20037 NODE_NAME_CASE(VWADD_W_VL)
20038 NODE_NAME_CASE(VWADDU_W_VL)
20039 NODE_NAME_CASE(VWSUB_W_VL)
20040 NODE_NAME_CASE(VWSUBU_W_VL)
20041 NODE_NAME_CASE(VWSLL_VL)
20042 NODE_NAME_CASE(VFWMUL_VL)
20043 NODE_NAME_CASE(VFWADD_VL)
20044 NODE_NAME_CASE(VFWSUB_VL)
20045 NODE_NAME_CASE(VFWADD_W_VL)
20046 NODE_NAME_CASE(VFWSUB_W_VL)
20047 NODE_NAME_CASE(VWMACC_VL)
20048 NODE_NAME_CASE(VWMACCU_VL)
20049 NODE_NAME_CASE(VWMACCSU_VL)
20050 NODE_NAME_CASE(VNSRL_VL)
20051 NODE_NAME_CASE(SETCC_VL)
20052 NODE_NAME_CASE(VMERGE_VL)
20053 NODE_NAME_CASE(VMAND_VL)
20054 NODE_NAME_CASE(VMOR_VL)
20055 NODE_NAME_CASE(VMXOR_VL)
20056 NODE_NAME_CASE(VMCLR_VL)
20057 NODE_NAME_CASE(VMSET_VL)
20058 NODE_NAME_CASE(VRGATHER_VX_VL)
20059 NODE_NAME_CASE(VRGATHER_VV_VL)
20060 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20061 NODE_NAME_CASE(VSEXT_VL)
20062 NODE_NAME_CASE(VZEXT_VL)
20063 NODE_NAME_CASE(VCPOP_VL)
20064 NODE_NAME_CASE(VFIRST_VL)
20065 NODE_NAME_CASE(READ_CSR)
20066 NODE_NAME_CASE(WRITE_CSR)
20067 NODE_NAME_CASE(SWAP_CSR)
20068 NODE_NAME_CASE(CZERO_EQZ)
20069 NODE_NAME_CASE(CZERO_NEZ)
20070 NODE_NAME_CASE(SW_GUARDED_BRIND)
20071 NODE_NAME_CASE(SF_VC_XV_SE)
20072 NODE_NAME_CASE(SF_VC_IV_SE)
20073 NODE_NAME_CASE(SF_VC_VV_SE)
20074 NODE_NAME_CASE(SF_VC_FV_SE)
20075 NODE_NAME_CASE(SF_VC_XVV_SE)
20076 NODE_NAME_CASE(SF_VC_IVV_SE)
20077 NODE_NAME_CASE(SF_VC_VVV_SE)
20078 NODE_NAME_CASE(SF_VC_FVV_SE)
20079 NODE_NAME_CASE(SF_VC_XVW_SE)
20080 NODE_NAME_CASE(SF_VC_IVW_SE)
20081 NODE_NAME_CASE(SF_VC_VVW_SE)
20082 NODE_NAME_CASE(SF_VC_FVW_SE)
20083 NODE_NAME_CASE(SF_VC_V_X_SE)
20084 NODE_NAME_CASE(SF_VC_V_I_SE)
20085 NODE_NAME_CASE(SF_VC_V_XV_SE)
20086 NODE_NAME_CASE(SF_VC_V_IV_SE)
20087 NODE_NAME_CASE(SF_VC_V_VV_SE)
20088 NODE_NAME_CASE(SF_VC_V_FV_SE)
20089 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20090 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20091 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20092 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20093 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20094 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20095 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20096 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20097 }
20098 // clang-format on
20099 return nullptr;
20100#undef NODE_NAME_CASE
20101}
20102
20103/// getConstraintType - Given a constraint letter, return the type of
20104/// constraint it is for this target.
20107 if (Constraint.size() == 1) {
20108 switch (Constraint[0]) {
20109 default:
20110 break;
20111 case 'f':
20112 return C_RegisterClass;
20113 case 'I':
20114 case 'J':
20115 case 'K':
20116 return C_Immediate;
20117 case 'A':
20118 return C_Memory;
20119 case 's':
20120 case 'S': // A symbolic address
20121 return C_Other;
20122 }
20123 } else {
20124 if (Constraint == "vr" || Constraint == "vm")
20125 return C_RegisterClass;
20126 }
20127 return TargetLowering::getConstraintType(Constraint);
20128}
20129
20130std::pair<unsigned, const TargetRegisterClass *>
20132 StringRef Constraint,
20133 MVT VT) const {
20134 // First, see if this is a constraint that directly corresponds to a RISC-V
20135 // register class.
20136 if (Constraint.size() == 1) {
20137 switch (Constraint[0]) {
20138 case 'r':
20139 // TODO: Support fixed vectors up to XLen for P extension?
20140 if (VT.isVector())
20141 break;
20142 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20143 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20144 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20145 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20146 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20147 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20148 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20149 case 'f':
20150 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20151 return std::make_pair(0U, &RISCV::FPR16RegClass);
20152 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20153 return std::make_pair(0U, &RISCV::FPR32RegClass);
20154 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20155 return std::make_pair(0U, &RISCV::FPR64RegClass);
20156 break;
20157 default:
20158 break;
20159 }
20160 } else if (Constraint == "vr") {
20161 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20162 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20163 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20164 return std::make_pair(0U, RC);
20165 }
20166 } else if (Constraint == "vm") {
20167 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20168 return std::make_pair(0U, &RISCV::VMV0RegClass);
20169 }
20170
20171 // Clang will correctly decode the usage of register name aliases into their
20172 // official names. However, other frontends like `rustc` do not. This allows
20173 // users of these frontends to use the ABI names for registers in LLVM-style
20174 // register constraints.
20175 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20176 .Case("{zero}", RISCV::X0)
20177 .Case("{ra}", RISCV::X1)
20178 .Case("{sp}", RISCV::X2)
20179 .Case("{gp}", RISCV::X3)
20180 .Case("{tp}", RISCV::X4)
20181 .Case("{t0}", RISCV::X5)
20182 .Case("{t1}", RISCV::X6)
20183 .Case("{t2}", RISCV::X7)
20184 .Cases("{s0}", "{fp}", RISCV::X8)
20185 .Case("{s1}", RISCV::X9)
20186 .Case("{a0}", RISCV::X10)
20187 .Case("{a1}", RISCV::X11)
20188 .Case("{a2}", RISCV::X12)
20189 .Case("{a3}", RISCV::X13)
20190 .Case("{a4}", RISCV::X14)
20191 .Case("{a5}", RISCV::X15)
20192 .Case("{a6}", RISCV::X16)
20193 .Case("{a7}", RISCV::X17)
20194 .Case("{s2}", RISCV::X18)
20195 .Case("{s3}", RISCV::X19)
20196 .Case("{s4}", RISCV::X20)
20197 .Case("{s5}", RISCV::X21)
20198 .Case("{s6}", RISCV::X22)
20199 .Case("{s7}", RISCV::X23)
20200 .Case("{s8}", RISCV::X24)
20201 .Case("{s9}", RISCV::X25)
20202 .Case("{s10}", RISCV::X26)
20203 .Case("{s11}", RISCV::X27)
20204 .Case("{t3}", RISCV::X28)
20205 .Case("{t4}", RISCV::X29)
20206 .Case("{t5}", RISCV::X30)
20207 .Case("{t6}", RISCV::X31)
20208 .Default(RISCV::NoRegister);
20209 if (XRegFromAlias != RISCV::NoRegister)
20210 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20211
20212 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20213 // TableGen record rather than the AsmName to choose registers for InlineAsm
20214 // constraints, plus we want to match those names to the widest floating point
20215 // register type available, manually select floating point registers here.
20216 //
20217 // The second case is the ABI name of the register, so that frontends can also
20218 // use the ABI names in register constraint lists.
20219 if (Subtarget.hasStdExtF()) {
20220 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20221 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20222 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20223 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20224 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20225 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20226 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20227 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20228 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20229 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20230 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20231 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20232 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20233 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20234 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20235 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20236 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20237 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20238 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20239 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20240 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20241 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20242 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20243 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20244 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20245 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20246 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20247 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20248 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20249 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20250 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20251 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20252 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20253 .Default(RISCV::NoRegister);
20254 if (FReg != RISCV::NoRegister) {
20255 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20256 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20257 unsigned RegNo = FReg - RISCV::F0_F;
20258 unsigned DReg = RISCV::F0_D + RegNo;
20259 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20260 }
20261 if (VT == MVT::f32 || VT == MVT::Other)
20262 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20263 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20264 unsigned RegNo = FReg - RISCV::F0_F;
20265 unsigned HReg = RISCV::F0_H + RegNo;
20266 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20267 }
20268 }
20269 }
20270
20271 if (Subtarget.hasVInstructions()) {
20272 Register VReg = StringSwitch<Register>(Constraint.lower())
20273 .Case("{v0}", RISCV::V0)
20274 .Case("{v1}", RISCV::V1)
20275 .Case("{v2}", RISCV::V2)
20276 .Case("{v3}", RISCV::V3)
20277 .Case("{v4}", RISCV::V4)
20278 .Case("{v5}", RISCV::V5)
20279 .Case("{v6}", RISCV::V6)
20280 .Case("{v7}", RISCV::V7)
20281 .Case("{v8}", RISCV::V8)
20282 .Case("{v9}", RISCV::V9)
20283 .Case("{v10}", RISCV::V10)
20284 .Case("{v11}", RISCV::V11)
20285 .Case("{v12}", RISCV::V12)
20286 .Case("{v13}", RISCV::V13)
20287 .Case("{v14}", RISCV::V14)
20288 .Case("{v15}", RISCV::V15)
20289 .Case("{v16}", RISCV::V16)
20290 .Case("{v17}", RISCV::V17)
20291 .Case("{v18}", RISCV::V18)
20292 .Case("{v19}", RISCV::V19)
20293 .Case("{v20}", RISCV::V20)
20294 .Case("{v21}", RISCV::V21)
20295 .Case("{v22}", RISCV::V22)
20296 .Case("{v23}", RISCV::V23)
20297 .Case("{v24}", RISCV::V24)
20298 .Case("{v25}", RISCV::V25)
20299 .Case("{v26}", RISCV::V26)
20300 .Case("{v27}", RISCV::V27)
20301 .Case("{v28}", RISCV::V28)
20302 .Case("{v29}", RISCV::V29)
20303 .Case("{v30}", RISCV::V30)
20304 .Case("{v31}", RISCV::V31)
20305 .Default(RISCV::NoRegister);
20306 if (VReg != RISCV::NoRegister) {
20307 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20308 return std::make_pair(VReg, &RISCV::VMRegClass);
20309 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20310 return std::make_pair(VReg, &RISCV::VRRegClass);
20311 for (const auto *RC :
20312 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20313 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20314 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20315 return std::make_pair(VReg, RC);
20316 }
20317 }
20318 }
20319 }
20320
20321 std::pair<Register, const TargetRegisterClass *> Res =
20323
20324 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20325 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20326 // Subtarget into account.
20327 if (Res.second == &RISCV::GPRF16RegClass ||
20328 Res.second == &RISCV::GPRF32RegClass ||
20329 Res.second == &RISCV::GPRPairRegClass)
20330 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20331
20332 return Res;
20333}
20334
20337 // Currently only support length 1 constraints.
20338 if (ConstraintCode.size() == 1) {
20339 switch (ConstraintCode[0]) {
20340 case 'A':
20342 default:
20343 break;
20344 }
20345 }
20346
20347 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20348}
20349
20351 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20352 SelectionDAG &DAG) const {
20353 // Currently only support length 1 constraints.
20354 if (Constraint.size() == 1) {
20355 switch (Constraint[0]) {
20356 case 'I':
20357 // Validate & create a 12-bit signed immediate operand.
20358 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20359 uint64_t CVal = C->getSExtValue();
20360 if (isInt<12>(CVal))
20361 Ops.push_back(
20362 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20363 }
20364 return;
20365 case 'J':
20366 // Validate & create an integer zero operand.
20367 if (isNullConstant(Op))
20368 Ops.push_back(
20369 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20370 return;
20371 case 'K':
20372 // Validate & create a 5-bit unsigned immediate operand.
20373 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20374 uint64_t CVal = C->getZExtValue();
20375 if (isUInt<5>(CVal))
20376 Ops.push_back(
20377 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20378 }
20379 return;
20380 case 'S':
20382 return;
20383 default:
20384 break;
20385 }
20386 }
20387 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20388}
20389
20391 Instruction *Inst,
20392 AtomicOrdering Ord) const {
20393 if (Subtarget.hasStdExtZtso()) {
20394 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20395 return Builder.CreateFence(Ord);
20396 return nullptr;
20397 }
20398
20399 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20400 return Builder.CreateFence(Ord);
20401 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20402 return Builder.CreateFence(AtomicOrdering::Release);
20403 return nullptr;
20404}
20405
20407 Instruction *Inst,
20408 AtomicOrdering Ord) const {
20409 if (Subtarget.hasStdExtZtso()) {
20410 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20411 return Builder.CreateFence(Ord);
20412 return nullptr;
20413 }
20414
20415 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20416 return Builder.CreateFence(AtomicOrdering::Acquire);
20417 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20420 return nullptr;
20421}
20422
20425 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20426 // point operations can't be used in an lr/sc sequence without breaking the
20427 // forward-progress guarantee.
20428 if (AI->isFloatingPointOperation() ||
20432
20433 // Don't expand forced atomics, we want to have __sync libcalls instead.
20434 if (Subtarget.hasForcedAtomics())
20436
20437 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20438 if (AI->getOperation() == AtomicRMWInst::Nand) {
20439 if (Subtarget.hasStdExtZacas() &&
20440 (Size >= 32 || Subtarget.hasStdExtZabha()))
20442 if (Size < 32)
20444 }
20445
20446 if (Size < 32 && !Subtarget.hasStdExtZabha())
20448
20450}
20451
20452static Intrinsic::ID
20454 if (XLen == 32) {
20455 switch (BinOp) {
20456 default:
20457 llvm_unreachable("Unexpected AtomicRMW BinOp");
20459 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20460 case AtomicRMWInst::Add:
20461 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20462 case AtomicRMWInst::Sub:
20463 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20465 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20466 case AtomicRMWInst::Max:
20467 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20468 case AtomicRMWInst::Min:
20469 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20471 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20473 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20474 }
20475 }
20476
20477 if (XLen == 64) {
20478 switch (BinOp) {
20479 default:
20480 llvm_unreachable("Unexpected AtomicRMW BinOp");
20482 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20483 case AtomicRMWInst::Add:
20484 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20485 case AtomicRMWInst::Sub:
20486 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20488 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20489 case AtomicRMWInst::Max:
20490 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20491 case AtomicRMWInst::Min:
20492 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20494 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20496 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20497 }
20498 }
20499
20500 llvm_unreachable("Unexpected XLen\n");
20501}
20502
20504 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20505 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20506 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20507 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20508 // mask, as this produces better code than the LR/SC loop emitted by
20509 // int_riscv_masked_atomicrmw_xchg.
20510 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20511 isa<ConstantInt>(AI->getValOperand())) {
20512 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20513 if (CVal->isZero())
20514 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20515 Builder.CreateNot(Mask, "Inv_Mask"),
20516 AI->getAlign(), Ord);
20517 if (CVal->isMinusOne())
20518 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20519 AI->getAlign(), Ord);
20520 }
20521
20522 unsigned XLen = Subtarget.getXLen();
20523 Value *Ordering =
20524 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20525 Type *Tys[] = {AlignedAddr->getType()};
20526 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20527 AI->getModule(),
20529
20530 if (XLen == 64) {
20531 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20532 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20533 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20534 }
20535
20536 Value *Result;
20537
20538 // Must pass the shift amount needed to sign extend the loaded value prior
20539 // to performing a signed comparison for min/max. ShiftAmt is the number of
20540 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20541 // is the number of bits to left+right shift the value in order to
20542 // sign-extend.
20543 if (AI->getOperation() == AtomicRMWInst::Min ||
20545 const DataLayout &DL = AI->getModule()->getDataLayout();
20546 unsigned ValWidth =
20547 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20548 Value *SextShamt =
20549 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20550 Result = Builder.CreateCall(LrwOpScwLoop,
20551 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20552 } else {
20553 Result =
20554 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20555 }
20556
20557 if (XLen == 64)
20558 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20559 return Result;
20560}
20561
20564 AtomicCmpXchgInst *CI) const {
20565 // Don't expand forced atomics, we want to have __sync libcalls instead.
20566 if (Subtarget.hasForcedAtomics())
20568
20570 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20571 (Size == 8 || Size == 16))
20574}
20575
20577 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20578 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20579 unsigned XLen = Subtarget.getXLen();
20580 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20581 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20582 if (XLen == 64) {
20583 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20584 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20585 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20586 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20587 }
20588 Type *Tys[] = {AlignedAddr->getType()};
20589 Function *MaskedCmpXchg =
20590 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20591 Value *Result = Builder.CreateCall(
20592 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20593 if (XLen == 64)
20594 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20595 return Result;
20596}
20597
20599 EVT DataVT) const {
20600 // We have indexed loads for all supported EEW types. Indices are always
20601 // zero extended.
20602 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20603 isTypeLegal(Extend.getValueType()) &&
20604 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20605 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20606}
20607
20609 EVT VT) const {
20610 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20611 return false;
20612
20613 switch (FPVT.getSimpleVT().SimpleTy) {
20614 case MVT::f16:
20615 return Subtarget.hasStdExtZfhmin();
20616 case MVT::f32:
20617 return Subtarget.hasStdExtF();
20618 case MVT::f64:
20619 return Subtarget.hasStdExtD();
20620 default:
20621 return false;
20622 }
20623}
20624
20626 // If we are using the small code model, we can reduce size of jump table
20627 // entry to 4 bytes.
20628 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20631 }
20633}
20634
20636 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20637 unsigned uid, MCContext &Ctx) const {
20638 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20640 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20641}
20642
20644 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20645 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20646 // a power of two as well.
20647 // FIXME: This doesn't work for zve32, but that's already broken
20648 // elsewhere for the same reason.
20649 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20650 static_assert(RISCV::RVVBitsPerBlock == 64,
20651 "RVVBitsPerBlock changed, audit needed");
20652 return true;
20653}
20654
20656 SDValue &Offset,
20658 SelectionDAG &DAG) const {
20659 // Target does not support indexed loads.
20660 if (!Subtarget.hasVendorXTHeadMemIdx())
20661 return false;
20662
20663 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20664 return false;
20665
20666 Base = Op->getOperand(0);
20667 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20668 int64_t RHSC = RHS->getSExtValue();
20669 if (Op->getOpcode() == ISD::SUB)
20670 RHSC = -(uint64_t)RHSC;
20671
20672 // The constants that can be encoded in the THeadMemIdx instructions
20673 // are of the form (sign_extend(imm5) << imm2).
20674 bool isLegalIndexedOffset = false;
20675 for (unsigned i = 0; i < 4; i++)
20676 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20677 isLegalIndexedOffset = true;
20678 break;
20679 }
20680
20681 if (!isLegalIndexedOffset)
20682 return false;
20683
20684 Offset = Op->getOperand(1);
20685 return true;
20686 }
20687
20688 return false;
20689}
20690
20692 SDValue &Offset,
20694 SelectionDAG &DAG) const {
20695 EVT VT;
20696 SDValue Ptr;
20697 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20698 VT = LD->getMemoryVT();
20699 Ptr = LD->getBasePtr();
20700 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20701 VT = ST->getMemoryVT();
20702 Ptr = ST->getBasePtr();
20703 } else
20704 return false;
20705
20706 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20707 return false;
20708
20709 AM = ISD::PRE_INC;
20710 return true;
20711}
20712
20714 SDValue &Base,
20715 SDValue &Offset,
20717 SelectionDAG &DAG) const {
20718 EVT VT;
20719 SDValue Ptr;
20720 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20721 VT = LD->getMemoryVT();
20722 Ptr = LD->getBasePtr();
20723 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20724 VT = ST->getMemoryVT();
20725 Ptr = ST->getBasePtr();
20726 } else
20727 return false;
20728
20729 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20730 return false;
20731 // Post-indexing updates the base, so it's not a valid transform
20732 // if that's not the same as the load's pointer.
20733 if (Ptr != Base)
20734 return false;
20735
20736 AM = ISD::POST_INC;
20737 return true;
20738}
20739
20741 EVT VT) const {
20742 EVT SVT = VT.getScalarType();
20743
20744 if (!SVT.isSimple())
20745 return false;
20746
20747 switch (SVT.getSimpleVT().SimpleTy) {
20748 case MVT::f16:
20749 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20750 : Subtarget.hasStdExtZfhOrZhinx();
20751 case MVT::f32:
20752 return Subtarget.hasStdExtFOrZfinx();
20753 case MVT::f64:
20754 return Subtarget.hasStdExtDOrZdinx();
20755 default:
20756 break;
20757 }
20758
20759 return false;
20760}
20761
20763 // Zacas will use amocas.w which does not require extension.
20764 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20765}
20766
20768 const Constant *PersonalityFn) const {
20769 return RISCV::X10;
20770}
20771
20773 const Constant *PersonalityFn) const {
20774 return RISCV::X11;
20775}
20776
20778 // Return false to suppress the unnecessary extensions if the LibCall
20779 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20780 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20781 Type.getSizeInBits() < Subtarget.getXLen()))
20782 return false;
20783
20784 return true;
20785}
20786
20788 if (Subtarget.is64Bit() && Type == MVT::i32)
20789 return true;
20790
20791 return IsSigned;
20792}
20793
20795 SDValue C) const {
20796 // Check integral scalar types.
20797 const bool HasExtMOrZmmul =
20798 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20799 if (!VT.isScalarInteger())
20800 return false;
20801
20802 // Omit the optimization if the sub target has the M extension and the data
20803 // size exceeds XLen.
20804 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20805 return false;
20806
20807 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20808 // Break the MUL to a SLLI and an ADD/SUB.
20809 const APInt &Imm = ConstNode->getAPIntValue();
20810 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20811 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20812 return true;
20813
20814 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20815 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20816 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20817 (Imm - 8).isPowerOf2()))
20818 return true;
20819
20820 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20821 // a pair of LUI/ADDI.
20822 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20823 ConstNode->hasOneUse()) {
20824 APInt ImmS = Imm.ashr(Imm.countr_zero());
20825 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20826 (1 - ImmS).isPowerOf2())
20827 return true;
20828 }
20829 }
20830
20831 return false;
20832}
20833
20835 SDValue ConstNode) const {
20836 // Let the DAGCombiner decide for vectors.
20837 EVT VT = AddNode.getValueType();
20838 if (VT.isVector())
20839 return true;
20840
20841 // Let the DAGCombiner decide for larger types.
20842 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20843 return true;
20844
20845 // It is worse if c1 is simm12 while c1*c2 is not.
20846 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20847 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20848 const APInt &C1 = C1Node->getAPIntValue();
20849 const APInt &C2 = C2Node->getAPIntValue();
20850 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20851 return false;
20852
20853 // Default to true and let the DAGCombiner decide.
20854 return true;
20855}
20856
20858 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20859 unsigned *Fast) const {
20860 if (!VT.isVector()) {
20861 if (Fast)
20862 *Fast = Subtarget.enableUnalignedScalarMem();
20863 return Subtarget.enableUnalignedScalarMem();
20864 }
20865
20866 // All vector implementations must support element alignment
20867 EVT ElemVT = VT.getVectorElementType();
20868 if (Alignment >= ElemVT.getStoreSize()) {
20869 if (Fast)
20870 *Fast = 1;
20871 return true;
20872 }
20873
20874 // Note: We lower an unmasked unaligned vector access to an equally sized
20875 // e8 element type access. Given this, we effectively support all unmasked
20876 // misaligned accesses. TODO: Work through the codegen implications of
20877 // allowing such accesses to be formed, and considered fast.
20878 if (Fast)
20879 *Fast = Subtarget.enableUnalignedVectorMem();
20880 return Subtarget.enableUnalignedVectorMem();
20881}
20882
20883
20885 const AttributeList &FuncAttributes) const {
20886 if (!Subtarget.hasVInstructions())
20887 return MVT::Other;
20888
20889 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20890 return MVT::Other;
20891
20892 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20893 // has an expansion threshold, and we want the number of hardware memory
20894 // operations to correspond roughly to that threshold. LMUL>1 operations
20895 // are typically expanded linearly internally, and thus correspond to more
20896 // than one actual memory operation. Note that store merging and load
20897 // combining will typically form larger LMUL operations from the LMUL1
20898 // operations emitted here, and that's okay because combining isn't
20899 // introducing new memory operations; it's just merging existing ones.
20900 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20901 if (Op.size() < MinVLenInBytes)
20902 // TODO: Figure out short memops. For the moment, do the default thing
20903 // which ends up using scalar sequences.
20904 return MVT::Other;
20905
20906 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20907 // a large scalar constant and instead use vmv.v.x/i to do the
20908 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20909 // maximize the chance we can encode the size in the vsetvli.
20910 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20911 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20912
20913 // Do we have sufficient alignment for our preferred VT? If not, revert
20914 // to largest size allowed by our alignment criteria.
20915 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20916 Align RequiredAlign(PreferredVT.getStoreSize());
20917 if (Op.isFixedDstAlign())
20918 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20919 if (Op.isMemcpy())
20920 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20921 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20922 }
20923 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20924}
20925
20927 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20928 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20929 bool IsABIRegCopy = CC.has_value();
20930 EVT ValueVT = Val.getValueType();
20931 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20932 PartVT == MVT::f32) {
20933 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20934 // nan, and cast to f32.
20935 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20936 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20937 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20938 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20939 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20940 Parts[0] = Val;
20941 return true;
20942 }
20943
20944 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20945 LLVMContext &Context = *DAG.getContext();
20946 EVT ValueEltVT = ValueVT.getVectorElementType();
20947 EVT PartEltVT = PartVT.getVectorElementType();
20948 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20949 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20950 if (PartVTBitSize % ValueVTBitSize == 0) {
20951 assert(PartVTBitSize >= ValueVTBitSize);
20952 // If the element types are different, bitcast to the same element type of
20953 // PartVT first.
20954 // Give an example here, we want copy a <vscale x 1 x i8> value to
20955 // <vscale x 4 x i16>.
20956 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20957 // subvector, then we can bitcast to <vscale x 4 x i16>.
20958 if (ValueEltVT != PartEltVT) {
20959 if (PartVTBitSize > ValueVTBitSize) {
20960 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20961 assert(Count != 0 && "The number of element should not be zero.");
20962 EVT SameEltTypeVT =
20963 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20964 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20965 DAG.getUNDEF(SameEltTypeVT), Val,
20966 DAG.getVectorIdxConstant(0, DL));
20967 }
20968 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20969 } else {
20970 Val =
20971 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20972 Val, DAG.getVectorIdxConstant(0, DL));
20973 }
20974 Parts[0] = Val;
20975 return true;
20976 }
20977 }
20978 return false;
20979}
20980
20982 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20983 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20984 bool IsABIRegCopy = CC.has_value();
20985 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20986 PartVT == MVT::f32) {
20987 SDValue Val = Parts[0];
20988
20989 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20990 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20991 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20992 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20993 return Val;
20994 }
20995
20996 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20997 LLVMContext &Context = *DAG.getContext();
20998 SDValue Val = Parts[0];
20999 EVT ValueEltVT = ValueVT.getVectorElementType();
21000 EVT PartEltVT = PartVT.getVectorElementType();
21001 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21002 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21003 if (PartVTBitSize % ValueVTBitSize == 0) {
21004 assert(PartVTBitSize >= ValueVTBitSize);
21005 EVT SameEltTypeVT = ValueVT;
21006 // If the element types are different, convert it to the same element type
21007 // of PartVT.
21008 // Give an example here, we want copy a <vscale x 1 x i8> value from
21009 // <vscale x 4 x i16>.
21010 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21011 // then we can extract <vscale x 1 x i8>.
21012 if (ValueEltVT != PartEltVT) {
21013 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21014 assert(Count != 0 && "The number of element should not be zero.");
21015 SameEltTypeVT =
21016 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21017 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21018 }
21019 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21020 DAG.getVectorIdxConstant(0, DL));
21021 return Val;
21022 }
21023 }
21024 return SDValue();
21025}
21026
21028 // When aggressively optimizing for code size, we prefer to use a div
21029 // instruction, as it is usually smaller than the alternative sequence.
21030 // TODO: Add vector division?
21031 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21032 return OptSize && !VT.isVector();
21033}
21034
21036 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21037 // some situation.
21038 unsigned Opc = N->getOpcode();
21039 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21040 return false;
21041 return true;
21042}
21043
21044static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21045 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21046 Function *ThreadPointerFunc =
21047 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21048 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21049 IRB.CreateCall(ThreadPointerFunc), Offset);
21050}
21051
21053 // Fuchsia provides a fixed TLS slot for the stack cookie.
21054 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21055 if (Subtarget.isTargetFuchsia())
21056 return useTpOffset(IRB, -0x10);
21057
21058 // Android provides a fixed TLS slot for the stack cookie. See the definition
21059 // of TLS_SLOT_STACK_GUARD in
21060 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21061 if (Subtarget.isTargetAndroid())
21062 return useTpOffset(IRB, -0x18);
21063
21065}
21066
21068 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21069 const DataLayout &DL) const {
21070 EVT VT = getValueType(DL, VTy);
21071 // Don't lower vlseg/vsseg for vector types that can't be split.
21072 if (!isTypeLegal(VT))
21073 return false;
21074
21076 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21077 Alignment))
21078 return false;
21079
21080 MVT ContainerVT = VT.getSimpleVT();
21081
21082 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21083 if (!Subtarget.useRVVForFixedLengthVectors())
21084 return false;
21085 // Sometimes the interleaved access pass picks up splats as interleaves of
21086 // one element. Don't lower these.
21087 if (FVTy->getNumElements() < 2)
21088 return false;
21089
21091 } else {
21092 // The intrinsics for scalable vectors are not overloaded on pointer type
21093 // and can only handle the default address space.
21094 if (AddrSpace)
21095 return false;
21096 }
21097
21098 // Need to make sure that EMUL * NFIELDS ≤ 8
21099 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21100 if (Fractional)
21101 return true;
21102 return Factor * LMUL <= 8;
21103}
21104
21106 Align Alignment) const {
21107 if (!Subtarget.hasVInstructions())
21108 return false;
21109
21110 // Only support fixed vectors if we know the minimum vector size.
21111 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21112 return false;
21113
21114 EVT ScalarType = DataType.getScalarType();
21115 if (!isLegalElementTypeForRVV(ScalarType))
21116 return false;
21117
21118 if (!Subtarget.enableUnalignedVectorMem() &&
21119 Alignment < ScalarType.getStoreSize())
21120 return false;
21121
21122 return true;
21123}
21124
21126 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21127 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21128 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21129 Intrinsic::riscv_seg8_load};
21130
21131/// Lower an interleaved load into a vlsegN intrinsic.
21132///
21133/// E.g. Lower an interleaved load (Factor = 2):
21134/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21135/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21136/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21137///
21138/// Into:
21139/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21140/// %ptr, i64 4)
21141/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21142/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21145 ArrayRef<unsigned> Indices, unsigned Factor) const {
21146 IRBuilder<> Builder(LI);
21147
21148 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21149 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21151 LI->getModule()->getDataLayout()))
21152 return false;
21153
21154 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21155
21156 Function *VlsegNFunc =
21158 {VTy, LI->getPointerOperandType(), XLenTy});
21159
21160 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21161
21162 CallInst *VlsegN =
21163 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21164
21165 for (unsigned i = 0; i < Shuffles.size(); i++) {
21166 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21167 Shuffles[i]->replaceAllUsesWith(SubVec);
21168 }
21169
21170 return true;
21171}
21172
21174 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21175 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21176 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21177 Intrinsic::riscv_seg8_store};
21178
21179/// Lower an interleaved store into a vssegN intrinsic.
21180///
21181/// E.g. Lower an interleaved store (Factor = 3):
21182/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21183/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21184/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21185///
21186/// Into:
21187/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21188/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21189/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21190/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21191/// %ptr, i32 4)
21192///
21193/// Note that the new shufflevectors will be removed and we'll only generate one
21194/// vsseg3 instruction in CodeGen.
21196 ShuffleVectorInst *SVI,
21197 unsigned Factor) const {
21198 IRBuilder<> Builder(SI);
21199 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21200 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21201 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21202 ShuffleVTy->getNumElements() / Factor);
21203 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21204 SI->getPointerAddressSpace(),
21205 SI->getModule()->getDataLayout()))
21206 return false;
21207
21208 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21209
21210 Function *VssegNFunc =
21211 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21212 {VTy, SI->getPointerOperandType(), XLenTy});
21213
21214 auto Mask = SVI->getShuffleMask();
21216
21217 for (unsigned i = 0; i < Factor; i++) {
21218 Value *Shuffle = Builder.CreateShuffleVector(
21219 SVI->getOperand(0), SVI->getOperand(1),
21220 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21221 Ops.push_back(Shuffle);
21222 }
21223 // This VL should be OK (should be executable in one vsseg instruction,
21224 // potentially under larger LMULs) because we checked that the fixed vector
21225 // type fits in isLegalInterleavedAccessType
21226 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21227 Ops.append({SI->getPointerOperand(), VL});
21228
21229 Builder.CreateCall(VssegNFunc, Ops);
21230
21231 return true;
21232}
21233
21235 LoadInst *LI) const {
21236 assert(LI->isSimple());
21237 IRBuilder<> Builder(LI);
21238
21239 // Only deinterleave2 supported at present.
21240 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21241 return false;
21242
21243 unsigned Factor = 2;
21244
21245 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21246 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21247
21248 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21250 LI->getModule()->getDataLayout()))
21251 return false;
21252
21253 Function *VlsegNFunc;
21254 Value *VL;
21255 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21257
21258 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21259 VlsegNFunc = Intrinsic::getDeclaration(
21260 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21261 {ResVTy, LI->getPointerOperandType(), XLenTy});
21262 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21263 } else {
21264 static const Intrinsic::ID IntrIds[] = {
21265 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21266 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21267 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21268 Intrinsic::riscv_vlseg8};
21269
21270 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21271 {ResVTy, XLenTy});
21272 VL = Constant::getAllOnesValue(XLenTy);
21273 Ops.append(Factor, PoisonValue::get(ResVTy));
21274 }
21275
21276 Ops.append({LI->getPointerOperand(), VL});
21277
21278 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21279 DI->replaceAllUsesWith(Vlseg);
21280
21281 return true;
21282}
21283
21285 StoreInst *SI) const {
21286 assert(SI->isSimple());
21287 IRBuilder<> Builder(SI);
21288
21289 // Only interleave2 supported at present.
21290 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21291 return false;
21292
21293 unsigned Factor = 2;
21294
21295 VectorType *VTy = cast<VectorType>(II->getType());
21296 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21297
21298 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21299 SI->getPointerAddressSpace(),
21300 SI->getModule()->getDataLayout()))
21301 return false;
21302
21303 Function *VssegNFunc;
21304 Value *VL;
21305 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21306
21307 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21308 VssegNFunc = Intrinsic::getDeclaration(
21309 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21310 {InVTy, SI->getPointerOperandType(), XLenTy});
21311 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21312 } else {
21313 static const Intrinsic::ID IntrIds[] = {
21314 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21315 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21316 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21317 Intrinsic::riscv_vsseg8};
21318
21319 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21320 {InVTy, XLenTy});
21321 VL = Constant::getAllOnesValue(XLenTy);
21322 }
21323
21324 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21325 SI->getPointerOperand(), VL});
21326
21327 return true;
21328}
21329
21333 const TargetInstrInfo *TII) const {
21334 assert(MBBI->isCall() && MBBI->getCFIType() &&
21335 "Invalid call instruction for a KCFI check");
21336 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21337 MBBI->getOpcode()));
21338
21339 MachineOperand &Target = MBBI->getOperand(0);
21340 Target.setIsRenamable(false);
21341
21342 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21343 .addReg(Target.getReg())
21344 .addImm(MBBI->getCFIType())
21345 .getInstr();
21346}
21347
21348#define GET_REGISTER_MATCHER
21349#include "RISCVGenAsmMatcher.inc"
21350
21353 const MachineFunction &MF) const {
21355 if (Reg == RISCV::NoRegister)
21357 if (Reg == RISCV::NoRegister)
21359 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21360 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21361 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21362 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21363 StringRef(RegName) + "\"."));
21364 return Reg;
21365}
21366
21369 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21370
21371 if (NontemporalInfo == nullptr)
21373
21374 // 1 for default value work as __RISCV_NTLH_ALL
21375 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21376 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21377 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21378 // 5 -> __RISCV_NTLH_ALL
21379 int NontemporalLevel = 5;
21380 const MDNode *RISCVNontemporalInfo =
21381 I.getMetadata("riscv-nontemporal-domain");
21382 if (RISCVNontemporalInfo != nullptr)
21383 NontemporalLevel =
21384 cast<ConstantInt>(
21385 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21386 ->getValue())
21387 ->getZExtValue();
21388
21389 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21390 "RISC-V target doesn't support this non-temporal domain.");
21391
21392 NontemporalLevel -= 2;
21394 if (NontemporalLevel & 0b1)
21395 Flags |= MONontemporalBit0;
21396 if (NontemporalLevel & 0b10)
21397 Flags |= MONontemporalBit1;
21398
21399 return Flags;
21400}
21401
21404
21405 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21407 TargetFlags |= (NodeFlags & MONontemporalBit0);
21408 TargetFlags |= (NodeFlags & MONontemporalBit1);
21409 return TargetFlags;
21410}
21411
21413 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21414 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21415}
21416
21418 if (VT.isScalableVector())
21419 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21420 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21421 return true;
21422 return Subtarget.hasStdExtZbb() &&
21423 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21424}
21425
21427 ISD::CondCode Cond) const {
21428 return isCtpopFast(VT) ? 0 : 1;
21429}
21430
21432
21433 // GISel support is in progress or complete for these opcodes.
21434 unsigned Op = Inst.getOpcode();
21435 if (Op == Instruction::Add || Op == Instruction::Sub ||
21436 Op == Instruction::And || Op == Instruction::Or ||
21437 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21438 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21439 return false;
21440
21441 if (Inst.getType()->isScalableTy())
21442 return true;
21443
21444 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21445 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21446 !isa<ReturnInst>(&Inst))
21447 return true;
21448
21449 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21450 if (AI->getAllocatedType()->isScalableTy())
21451 return true;
21452 }
21453
21454 return false;
21455}
21456
21457SDValue
21458RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21459 SelectionDAG &DAG,
21460 SmallVectorImpl<SDNode *> &Created) const {
21462 if (isIntDivCheap(N->getValueType(0), Attr))
21463 return SDValue(N, 0); // Lower SDIV as SDIV
21464
21465 // Only perform this transform if short forward branch opt is supported.
21466 if (!Subtarget.hasShortForwardBranchOpt())
21467 return SDValue();
21468 EVT VT = N->getValueType(0);
21469 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21470 return SDValue();
21471
21472 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21473 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21474 return SDValue();
21475 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21476}
21477
21478bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21479 EVT VT, const APInt &AndMask) const {
21480 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21481 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21483}
21484
21485unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21486 return Subtarget.getMinimumJumpTableEntries();
21487}
21488
21489// Handle single arg such as return value.
21490template <typename Arg>
21491void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21492 // This lambda determines whether an array of types are constructed by
21493 // homogeneous vector types.
21494 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21495 // First, extract the first element in the argument type.
21496 auto It = ArgList.begin();
21497 MVT FirstArgRegType = It->VT;
21498
21499 // Return if there is no return or the type needs split.
21500 if (It == ArgList.end() || It->Flags.isSplit())
21501 return false;
21502
21503 ++It;
21504
21505 // Return if this argument type contains only 1 element, or it's not a
21506 // vector type.
21507 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21508 return false;
21509
21510 // Second, check if the following elements in this argument type are all the
21511 // same.
21512 for (; It != ArgList.end(); ++It)
21513 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21514 return false;
21515
21516 return true;
21517 };
21518
21519 if (isHomogeneousScalableVectorType(ArgList)) {
21520 // Handle as tuple type
21521 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21522 } else {
21523 // Handle as normal vector type
21524 bool FirstVMaskAssigned = false;
21525 for (const auto &OutArg : ArgList) {
21526 MVT RegisterVT = OutArg.VT;
21527
21528 // Skip non-RVV register type
21529 if (!RegisterVT.isVector())
21530 continue;
21531
21532 if (RegisterVT.isFixedLengthVector())
21533 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21534
21535 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21536 RVVArgInfos.push_back({1, RegisterVT, true});
21537 FirstVMaskAssigned = true;
21538 continue;
21539 }
21540
21541 RVVArgInfos.push_back({1, RegisterVT, false});
21542 }
21543 }
21544}
21545
21546// Handle multiple args.
21547template <>
21548void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21549 const DataLayout &DL = MF->getDataLayout();
21550 const Function &F = MF->getFunction();
21551 LLVMContext &Context = F.getContext();
21552
21553 bool FirstVMaskAssigned = false;
21554 for (Type *Ty : TypeList) {
21555 StructType *STy = dyn_cast<StructType>(Ty);
21556 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21557 Type *ElemTy = STy->getTypeAtIndex(0U);
21558 EVT VT = TLI->getValueType(DL, ElemTy);
21559 MVT RegisterVT =
21560 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21561 unsigned NumRegs =
21562 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21563
21564 RVVArgInfos.push_back(
21565 {NumRegs * STy->getNumElements(), RegisterVT, false});
21566 } else {
21567 SmallVector<EVT, 4> ValueVTs;
21568 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21569
21570 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21571 ++Value) {
21572 EVT VT = ValueVTs[Value];
21573 MVT RegisterVT =
21574 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21575 unsigned NumRegs =
21576 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21577
21578 // Skip non-RVV register type
21579 if (!RegisterVT.isVector())
21580 continue;
21581
21582 if (RegisterVT.isFixedLengthVector())
21583 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21584
21585 if (!FirstVMaskAssigned &&
21586 RegisterVT.getVectorElementType() == MVT::i1) {
21587 RVVArgInfos.push_back({1, RegisterVT, true});
21588 FirstVMaskAssigned = true;
21589 --NumRegs;
21590 }
21591
21592 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21593 }
21594 }
21595 }
21596}
21597
21598void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21599 unsigned StartReg) {
21600 assert((StartReg % LMul) == 0 &&
21601 "Start register number should be multiple of lmul");
21602 const MCPhysReg *VRArrays;
21603 switch (LMul) {
21604 default:
21605 report_fatal_error("Invalid lmul");
21606 case 1:
21607 VRArrays = ArgVRs;
21608 break;
21609 case 2:
21610 VRArrays = ArgVRM2s;
21611 break;
21612 case 4:
21613 VRArrays = ArgVRM4s;
21614 break;
21615 case 8:
21616 VRArrays = ArgVRM8s;
21617 break;
21618 }
21619
21620 for (unsigned i = 0; i < NF; ++i)
21621 if (StartReg)
21622 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21623 else
21624 AllocatedPhysRegs.push_back(MCPhysReg());
21625}
21626
21627/// This function determines if each RVV argument is passed by register, if the
21628/// argument can be assigned to a VR, then give it a specific register.
21629/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21630void RVVArgDispatcher::compute() {
21631 uint32_t AssignedMap = 0;
21632 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21633 // Allocate first vector mask argument to V0.
21634 if (ArgInfo.FirstVMask) {
21635 AllocatedPhysRegs.push_back(RISCV::V0);
21636 return;
21637 }
21638
21639 unsigned RegsNeeded = divideCeil(
21640 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21641 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21642 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21643 StartReg += RegsNeeded) {
21644 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21645 if ((AssignedMap & Map) == 0) {
21646 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21647 AssignedMap |= Map;
21648 return;
21649 }
21650 }
21651
21652 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21653 };
21654
21655 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21656 allocate(RVVArgInfos[i]);
21657}
21658
21660 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21661 return AllocatedPhysRegs[CurIdx++];
21662}
21663
21666 int JTI,
21667 SelectionDAG &DAG) const {
21668 if (Subtarget.hasStdExtZicfilp()) {
21669 // When Zicfilp enabled, we need to use software guarded branch for jump
21670 // table branch.
21671 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
21672 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
21673 Addr);
21674 }
21675 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
21676}
21677
21679
21680#define GET_RISCVVIntrinsicsTable_IMPL
21681#include "RISCVGenSearchableTables.inc"
21682
21683} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1489
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
iterator_range< arg_iterator > args()
Definition: Function.h:842
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Argument * getArg(unsigned i) const
Definition: Function.h:836
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:304
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:528
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:398
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1346
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1412
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:886
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1195
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ STRICT_LROUND
Definition: ISDOpcodes.h:432
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:587
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ STRICT_LRINT
Definition: ISDOpcodes.h:434
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:592
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:430
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:637
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:435
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:613
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ STRICT_LLROUND
Definition: ISDOpcodes.h:433
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:581
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1497
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1497
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1484
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1418
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1535
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1515
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1580
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1521
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)