LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (getTargetMachine().getTargetTriple().isOSLinux()) {
666 // Custom lowering of llvm.clear_cache.
668 }
669
670 if (Subtarget.hasVInstructions()) {
672
674 if (RV64LegalI32 && Subtarget.is64Bit())
676
677 // RVV intrinsics may have illegal operands.
678 // We also need to custom legalize vmv.x.s.
681 {MVT::i8, MVT::i16}, Custom);
682 if (Subtarget.is64Bit())
684 MVT::i32, Custom);
685 else
687 MVT::i64, Custom);
688
690 MVT::Other, Custom);
691
692 static const unsigned IntegerVPOps[] = {
693 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
694 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
695 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
696 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
697 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
698 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
699 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
700 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
701 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
702 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
703 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
704 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
705 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
706 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
707
708 static const unsigned FloatingPointVPOps[] = {
709 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
710 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
711 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
712 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
713 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
714 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
715 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
716 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
717 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
718 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
719 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
720 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
721 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
722 ISD::VP_REDUCE_FMAXIMUM};
723
724 static const unsigned IntegerVecReduceOps[] = {
728
729 static const unsigned FloatingPointVecReduceOps[] = {
732
733 if (!Subtarget.is64Bit()) {
734 // We must custom-lower certain vXi64 operations on RV32 due to the vector
735 // element type being illegal.
737 MVT::i64, Custom);
738
739 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
740
741 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
742 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
743 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
744 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
745 MVT::i64, Custom);
746 }
747
748 for (MVT VT : BoolVecVTs) {
749 if (!isTypeLegal(VT))
750 continue;
751
753
754 // Mask VTs are custom-expanded into a series of standard nodes
758 VT, Custom);
759
761 Custom);
762
765 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
766 Expand);
767
768 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
769 Custom);
770
771 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
772
775 Custom);
776
778 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
779 Custom);
780
781 // RVV has native int->float & float->int conversions where the
782 // element type sizes are within one power-of-two of each other. Any
783 // wider distances between type sizes have to be lowered as sequences
784 // which progressively narrow the gap in stages.
789 VT, Custom);
791 Custom);
792
793 // Expand all extending loads to types larger than this, and truncating
794 // stores from types larger than this.
796 setTruncStoreAction(VT, OtherVT, Expand);
798 OtherVT, Expand);
799 }
800
801 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
802 ISD::VP_TRUNCATE, ISD::VP_SETCC},
803 VT, Custom);
804
807
809
810 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
811 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
812
815 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
816 }
817
818 for (MVT VT : IntVecVTs) {
819 if (!isTypeLegal(VT))
820 continue;
821
824
825 // Vectors implement MULHS/MULHU.
827
828 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
829 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
831
833 Legal);
834
836
837 // Custom-lower extensions and truncations from/to mask types.
839 VT, Custom);
840
841 // RVV has native int->float & float->int conversions where the
842 // element type sizes are within one power-of-two of each other. Any
843 // wider distances between type sizes have to be lowered as sequences
844 // which progressively narrow the gap in stages.
849 VT, Custom);
851 Custom);
855 VT, Legal);
856
857 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
858 // nodes which truncate by one power of two at a time.
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933 }
934
935 // Expand various CCs to best match the RVV ISA, which natively supports UNE
936 // but no other unordered comparisons, and supports all ordered comparisons
937 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
938 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
939 // and we pattern-match those back to the "original", swapping operands once
940 // more. This way we catch both operations and both "vf" and "fv" forms with
941 // fewer patterns.
942 static const ISD::CondCode VFPCCToExpand[] = {
946 };
947
948 // TODO: support more ops.
949 static const unsigned ZvfhminPromoteOps[] = {
957
958 // TODO: support more vp ops.
959 static const unsigned ZvfhminPromoteVPOps[] = {
960 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
961 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
962 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
963 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
964 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
965 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
966 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
967 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
968 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
969
970 // Sets common operation actions on RVV floating-point vector types.
971 const auto SetCommonVFPActions = [&](MVT VT) {
973 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
974 // sizes are within one power-of-two of each other. Therefore conversions
975 // between vXf16 and vXf64 must be lowered as sequences which convert via
976 // vXf32.
979 // Custom-lower insert/extract operations to simplify patterns.
981 Custom);
982 // Expand various condition codes (explained above).
983 setCondCodeAction(VFPCCToExpand, VT, Expand);
984
987
991 VT, Custom);
992
993 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
994
995 // Expand FP operations that need libcalls.
1007
1009
1011
1013 VT, Custom);
1014
1016 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1017 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1018 VT, Custom);
1019
1022
1025 VT, Custom);
1026
1029
1031
1032 setOperationAction(FloatingPointVPOps, VT, Custom);
1033
1035 Custom);
1038 VT, Legal);
1043 VT, Custom);
1044 };
1045
1046 // Sets common extload/truncstore actions on RVV floating-point vector
1047 // types.
1048 const auto SetCommonVFPExtLoadTruncStoreActions =
1049 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1050 for (auto SmallVT : SmallerVTs) {
1051 setTruncStoreAction(VT, SmallVT, Expand);
1052 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1053 }
1054 };
1055
1056 if (Subtarget.hasVInstructionsF16()) {
1057 for (MVT VT : F16VecVTs) {
1058 if (!isTypeLegal(VT))
1059 continue;
1060 SetCommonVFPActions(VT);
1061 }
1062 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1063 for (MVT VT : F16VecVTs) {
1064 if (!isTypeLegal(VT))
1065 continue;
1068 Custom);
1069 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1070 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1071 Custom);
1074 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1075 VT, Custom);
1078 VT, Custom);
1079 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1081 // load/store
1083
1084 // Custom split nxv32f16 since nxv32f32 if not legal.
1085 if (VT == MVT::nxv32f16) {
1086 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1087 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1088 continue;
1089 }
1090 // Add more promote ops.
1091 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1092 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1093 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1094 }
1095 }
1096
1097 // TODO: Could we merge some code with zvfhmin?
1098 if (Subtarget.hasVInstructionsBF16()) {
1099 for (MVT VT : BF16VecVTs) {
1100 if (!isTypeLegal(VT))
1101 continue;
1103 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1105 Custom);
1108 VT, Custom);
1110 if (Subtarget.hasStdExtZfbfmin())
1112 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1113 Custom);
1115 // TODO: Promote to fp32.
1116 }
1117 }
1118
1119 if (Subtarget.hasVInstructionsF32()) {
1120 for (MVT VT : F32VecVTs) {
1121 if (!isTypeLegal(VT))
1122 continue;
1123 SetCommonVFPActions(VT);
1124 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1125 }
1126 }
1127
1128 if (Subtarget.hasVInstructionsF64()) {
1129 for (MVT VT : F64VecVTs) {
1130 if (!isTypeLegal(VT))
1131 continue;
1132 SetCommonVFPActions(VT);
1133 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1134 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1135 }
1136 }
1137
1138 if (Subtarget.useRVVForFixedLengthVectors()) {
1140 if (!useRVVForFixedLengthVectorVT(VT))
1141 continue;
1142
1143 // By default everything must be expanded.
1144 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1147 setTruncStoreAction(VT, OtherVT, Expand);
1149 OtherVT, Expand);
1150 }
1151
1152 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1153 // expansion to a build_vector of 0s.
1155
1156 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1158 Custom);
1159
1161 Custom);
1162
1164 VT, Custom);
1165
1167
1169
1171
1173
1175
1177
1180 Custom);
1181
1183 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1184 Custom);
1185
1187 {
1196 },
1197 VT, Custom);
1199 Custom);
1200
1202
1203 // Operations below are different for between masks and other vectors.
1204 if (VT.getVectorElementType() == MVT::i1) {
1205 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1206 ISD::OR, ISD::XOR},
1207 VT, Custom);
1208
1209 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1210 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1211 VT, Custom);
1212
1213 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1214 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1215 continue;
1216 }
1217
1218 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1219 // it before type legalization for i64 vectors on RV32. It will then be
1220 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1221 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1222 // improvements first.
1223 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1226 }
1227
1230
1231 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1232 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1233 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1234 ISD::VP_SCATTER},
1235 VT, Custom);
1236
1240 VT, Custom);
1241
1244
1246
1247 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1248 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1250
1254 VT, Custom);
1255
1258
1261
1262 // Custom-lower reduction operations to set up the corresponding custom
1263 // nodes' operands.
1267 VT, Custom);
1268
1269 setOperationAction(IntegerVPOps, VT, Custom);
1270
1271 if (Subtarget.hasStdExtZvkb())
1273
1274 if (Subtarget.hasStdExtZvbb()) {
1277 VT, Custom);
1278 } else {
1279 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1280 // range of f32.
1281 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1282 if (isTypeLegal(FloatVT))
1285 Custom);
1286 }
1287 }
1288
1290 // There are no extending loads or truncating stores.
1291 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1292 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1293 setTruncStoreAction(VT, InnerVT, Expand);
1294 }
1295
1296 if (!useRVVForFixedLengthVectorVT(VT))
1297 continue;
1298
1299 // By default everything must be expanded.
1300 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1302
1303 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1304 // expansion to a build_vector of 0s.
1306
1307 if (VT.getVectorElementType() == MVT::f16 &&
1308 !Subtarget.hasVInstructionsF16()) {
1311 Custom);
1312 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1314 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1315 Custom);
1317 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1318 VT, Custom);
1321 VT, Custom);
1324 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1325 // Don't promote f16 vector operations to f32 if f32 vector type is
1326 // not legal.
1327 // TODO: could split the f16 vector into two vectors and do promotion.
1328 if (!isTypeLegal(F32VecVT))
1329 continue;
1330 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1331 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1332 continue;
1333 }
1334
1335 if (VT.getVectorElementType() == MVT::bf16) {
1337 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1339 Custom);
1342 VT, Custom);
1344 if (Subtarget.hasStdExtZfbfmin())
1347 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1348 Custom);
1349 // TODO: Promote to fp32.
1350 continue;
1351 }
1352
1353 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1355 Custom);
1356
1360 VT, Custom);
1361
1364 VT, Custom);
1365
1366 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1367 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1368 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1369 ISD::VP_SCATTER},
1370 VT, Custom);
1371
1376 VT, Custom);
1377
1379
1382 VT, Custom);
1383
1384 setCondCodeAction(VFPCCToExpand, VT, Expand);
1385
1389
1391
1392 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1393
1394 setOperationAction(FloatingPointVPOps, VT, Custom);
1395
1397 Custom);
1404 VT, Custom);
1405 }
1406
1407 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1408 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1409 Custom);
1410 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1412 if (Subtarget.hasStdExtFOrZfinx())
1414 if (Subtarget.hasStdExtDOrZdinx())
1416 }
1417 }
1418
1419 if (Subtarget.hasStdExtA()) {
1421 if (RV64LegalI32 && Subtarget.is64Bit())
1423 }
1424
1425 if (Subtarget.hasForcedAtomics()) {
1426 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1432 XLenVT, LibCall);
1433 }
1434
1435 if (Subtarget.hasVendorXTHeadMemIdx()) {
1436 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1437 setIndexedLoadAction(im, MVT::i8, Legal);
1438 setIndexedStoreAction(im, MVT::i8, Legal);
1439 setIndexedLoadAction(im, MVT::i16, Legal);
1440 setIndexedStoreAction(im, MVT::i16, Legal);
1441 setIndexedLoadAction(im, MVT::i32, Legal);
1442 setIndexedStoreAction(im, MVT::i32, Legal);
1443
1444 if (Subtarget.is64Bit()) {
1445 setIndexedLoadAction(im, MVT::i64, Legal);
1446 setIndexedStoreAction(im, MVT::i64, Legal);
1447 }
1448 }
1449 }
1450
1451 if (Subtarget.hasVendorXCVmem()) {
1455
1459 }
1460
1461 // Function alignments.
1462 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1463 setMinFunctionAlignment(FunctionAlignment);
1464 // Set preferred alignments.
1467
1471 if (Subtarget.is64Bit())
1473
1474 if (Subtarget.hasStdExtFOrZfinx())
1476
1477 if (Subtarget.hasStdExtZbb())
1479
1480 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1481 Subtarget.hasStdExtV())
1483
1484 if (Subtarget.hasStdExtZbkb())
1486 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1488 if (Subtarget.hasStdExtFOrZfinx())
1491 if (Subtarget.hasVInstructions())
1493 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1496 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1499 if (Subtarget.hasVendorXTHeadMemPair())
1501 if (Subtarget.useRVVForFixedLengthVectors())
1503
1504 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1505 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1506
1507 // Disable strict node mutation.
1508 IsStrictFPEnabled = true;
1509}
1510
1512 LLVMContext &Context,
1513 EVT VT) const {
1514 if (!VT.isVector())
1515 return getPointerTy(DL);
1516 if (Subtarget.hasVInstructions() &&
1517 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1518 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1520}
1521
1522MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1523 return Subtarget.getXLenVT();
1524}
1525
1526// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1527bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1528 unsigned VF,
1529 bool IsScalable) const {
1530 if (!Subtarget.hasVInstructions())
1531 return true;
1532
1533 if (!IsScalable)
1534 return true;
1535
1536 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1537 return true;
1538
1539 // Don't allow VF=1 if those types are't legal.
1540 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1541 return true;
1542
1543 // VLEN=32 support is incomplete.
1544 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1545 return true;
1546
1547 // The maximum VF is for the smallest element width with LMUL=8.
1548 // VF must be a power of 2.
1549 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1550 return VF > MaxVF || !isPowerOf2_32(VF);
1551}
1552
1554 return !Subtarget.hasVInstructions() ||
1555 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1556}
1557
1559 const CallInst &I,
1560 MachineFunction &MF,
1561 unsigned Intrinsic) const {
1562 auto &DL = I.getDataLayout();
1563
1564 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1565 bool IsUnitStrided, bool UsePtrVal = false) {
1567 // We can't use ptrVal if the intrinsic can access memory before the
1568 // pointer. This means we can't use it for strided or indexed intrinsics.
1569 if (UsePtrVal)
1570 Info.ptrVal = I.getArgOperand(PtrOp);
1571 else
1572 Info.fallbackAddressSpace =
1573 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1574 Type *MemTy;
1575 if (IsStore) {
1576 // Store value is the first operand.
1577 MemTy = I.getArgOperand(0)->getType();
1578 } else {
1579 // Use return type. If it's segment load, return type is a struct.
1580 MemTy = I.getType();
1581 if (MemTy->isStructTy())
1582 MemTy = MemTy->getStructElementType(0);
1583 }
1584 if (!IsUnitStrided)
1585 MemTy = MemTy->getScalarType();
1586
1587 Info.memVT = getValueType(DL, MemTy);
1588 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1590 Info.flags |=
1592 return true;
1593 };
1594
1595 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1597
1599 switch (Intrinsic) {
1600 default:
1601 return false;
1602 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1603 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1604 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1605 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1606 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1607 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1608 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1609 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1610 case Intrinsic::riscv_masked_cmpxchg_i32:
1612 Info.memVT = MVT::i32;
1613 Info.ptrVal = I.getArgOperand(0);
1614 Info.offset = 0;
1615 Info.align = Align(4);
1618 return true;
1619 case Intrinsic::riscv_masked_strided_load:
1620 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1621 /*IsUnitStrided*/ false);
1622 case Intrinsic::riscv_masked_strided_store:
1623 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1624 /*IsUnitStrided*/ false);
1625 case Intrinsic::riscv_seg2_load:
1626 case Intrinsic::riscv_seg3_load:
1627 case Intrinsic::riscv_seg4_load:
1628 case Intrinsic::riscv_seg5_load:
1629 case Intrinsic::riscv_seg6_load:
1630 case Intrinsic::riscv_seg7_load:
1631 case Intrinsic::riscv_seg8_load:
1632 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1633 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1634 case Intrinsic::riscv_seg2_store:
1635 case Intrinsic::riscv_seg3_store:
1636 case Intrinsic::riscv_seg4_store:
1637 case Intrinsic::riscv_seg5_store:
1638 case Intrinsic::riscv_seg6_store:
1639 case Intrinsic::riscv_seg7_store:
1640 case Intrinsic::riscv_seg8_store:
1641 // Operands are (vec, ..., vec, ptr, vl)
1642 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1643 /*IsStore*/ true,
1644 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1645 case Intrinsic::riscv_vle:
1646 case Intrinsic::riscv_vle_mask:
1647 case Intrinsic::riscv_vleff:
1648 case Intrinsic::riscv_vleff_mask:
1649 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1650 /*IsStore*/ false,
1651 /*IsUnitStrided*/ true,
1652 /*UsePtrVal*/ true);
1653 case Intrinsic::riscv_vse:
1654 case Intrinsic::riscv_vse_mask:
1655 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1656 /*IsStore*/ true,
1657 /*IsUnitStrided*/ true,
1658 /*UsePtrVal*/ true);
1659 case Intrinsic::riscv_vlse:
1660 case Intrinsic::riscv_vlse_mask:
1661 case Intrinsic::riscv_vloxei:
1662 case Intrinsic::riscv_vloxei_mask:
1663 case Intrinsic::riscv_vluxei:
1664 case Intrinsic::riscv_vluxei_mask:
1665 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1666 /*IsStore*/ false,
1667 /*IsUnitStrided*/ false);
1668 case Intrinsic::riscv_vsse:
1669 case Intrinsic::riscv_vsse_mask:
1670 case Intrinsic::riscv_vsoxei:
1671 case Intrinsic::riscv_vsoxei_mask:
1672 case Intrinsic::riscv_vsuxei:
1673 case Intrinsic::riscv_vsuxei_mask:
1674 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1675 /*IsStore*/ true,
1676 /*IsUnitStrided*/ false);
1677 case Intrinsic::riscv_vlseg2:
1678 case Intrinsic::riscv_vlseg3:
1679 case Intrinsic::riscv_vlseg4:
1680 case Intrinsic::riscv_vlseg5:
1681 case Intrinsic::riscv_vlseg6:
1682 case Intrinsic::riscv_vlseg7:
1683 case Intrinsic::riscv_vlseg8:
1684 case Intrinsic::riscv_vlseg2ff:
1685 case Intrinsic::riscv_vlseg3ff:
1686 case Intrinsic::riscv_vlseg4ff:
1687 case Intrinsic::riscv_vlseg5ff:
1688 case Intrinsic::riscv_vlseg6ff:
1689 case Intrinsic::riscv_vlseg7ff:
1690 case Intrinsic::riscv_vlseg8ff:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1692 /*IsStore*/ false,
1693 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1694 case Intrinsic::riscv_vlseg2_mask:
1695 case Intrinsic::riscv_vlseg3_mask:
1696 case Intrinsic::riscv_vlseg4_mask:
1697 case Intrinsic::riscv_vlseg5_mask:
1698 case Intrinsic::riscv_vlseg6_mask:
1699 case Intrinsic::riscv_vlseg7_mask:
1700 case Intrinsic::riscv_vlseg8_mask:
1701 case Intrinsic::riscv_vlseg2ff_mask:
1702 case Intrinsic::riscv_vlseg3ff_mask:
1703 case Intrinsic::riscv_vlseg4ff_mask:
1704 case Intrinsic::riscv_vlseg5ff_mask:
1705 case Intrinsic::riscv_vlseg6ff_mask:
1706 case Intrinsic::riscv_vlseg7ff_mask:
1707 case Intrinsic::riscv_vlseg8ff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1711 case Intrinsic::riscv_vlsseg2:
1712 case Intrinsic::riscv_vlsseg3:
1713 case Intrinsic::riscv_vlsseg4:
1714 case Intrinsic::riscv_vlsseg5:
1715 case Intrinsic::riscv_vlsseg6:
1716 case Intrinsic::riscv_vlsseg7:
1717 case Intrinsic::riscv_vlsseg8:
1718 case Intrinsic::riscv_vloxseg2:
1719 case Intrinsic::riscv_vloxseg3:
1720 case Intrinsic::riscv_vloxseg4:
1721 case Intrinsic::riscv_vloxseg5:
1722 case Intrinsic::riscv_vloxseg6:
1723 case Intrinsic::riscv_vloxseg7:
1724 case Intrinsic::riscv_vloxseg8:
1725 case Intrinsic::riscv_vluxseg2:
1726 case Intrinsic::riscv_vluxseg3:
1727 case Intrinsic::riscv_vluxseg4:
1728 case Intrinsic::riscv_vluxseg5:
1729 case Intrinsic::riscv_vluxseg6:
1730 case Intrinsic::riscv_vluxseg7:
1731 case Intrinsic::riscv_vluxseg8:
1732 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1733 /*IsStore*/ false,
1734 /*IsUnitStrided*/ false);
1735 case Intrinsic::riscv_vlsseg2_mask:
1736 case Intrinsic::riscv_vlsseg3_mask:
1737 case Intrinsic::riscv_vlsseg4_mask:
1738 case Intrinsic::riscv_vlsseg5_mask:
1739 case Intrinsic::riscv_vlsseg6_mask:
1740 case Intrinsic::riscv_vlsseg7_mask:
1741 case Intrinsic::riscv_vlsseg8_mask:
1742 case Intrinsic::riscv_vloxseg2_mask:
1743 case Intrinsic::riscv_vloxseg3_mask:
1744 case Intrinsic::riscv_vloxseg4_mask:
1745 case Intrinsic::riscv_vloxseg5_mask:
1746 case Intrinsic::riscv_vloxseg6_mask:
1747 case Intrinsic::riscv_vloxseg7_mask:
1748 case Intrinsic::riscv_vloxseg8_mask:
1749 case Intrinsic::riscv_vluxseg2_mask:
1750 case Intrinsic::riscv_vluxseg3_mask:
1751 case Intrinsic::riscv_vluxseg4_mask:
1752 case Intrinsic::riscv_vluxseg5_mask:
1753 case Intrinsic::riscv_vluxseg6_mask:
1754 case Intrinsic::riscv_vluxseg7_mask:
1755 case Intrinsic::riscv_vluxseg8_mask:
1756 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1757 /*IsStore*/ false,
1758 /*IsUnitStrided*/ false);
1759 case Intrinsic::riscv_vsseg2:
1760 case Intrinsic::riscv_vsseg3:
1761 case Intrinsic::riscv_vsseg4:
1762 case Intrinsic::riscv_vsseg5:
1763 case Intrinsic::riscv_vsseg6:
1764 case Intrinsic::riscv_vsseg7:
1765 case Intrinsic::riscv_vsseg8:
1766 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1767 /*IsStore*/ true,
1768 /*IsUnitStrided*/ false);
1769 case Intrinsic::riscv_vsseg2_mask:
1770 case Intrinsic::riscv_vsseg3_mask:
1771 case Intrinsic::riscv_vsseg4_mask:
1772 case Intrinsic::riscv_vsseg5_mask:
1773 case Intrinsic::riscv_vsseg6_mask:
1774 case Intrinsic::riscv_vsseg7_mask:
1775 case Intrinsic::riscv_vsseg8_mask:
1776 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1777 /*IsStore*/ true,
1778 /*IsUnitStrided*/ false);
1779 case Intrinsic::riscv_vssseg2:
1780 case Intrinsic::riscv_vssseg3:
1781 case Intrinsic::riscv_vssseg4:
1782 case Intrinsic::riscv_vssseg5:
1783 case Intrinsic::riscv_vssseg6:
1784 case Intrinsic::riscv_vssseg7:
1785 case Intrinsic::riscv_vssseg8:
1786 case Intrinsic::riscv_vsoxseg2:
1787 case Intrinsic::riscv_vsoxseg3:
1788 case Intrinsic::riscv_vsoxseg4:
1789 case Intrinsic::riscv_vsoxseg5:
1790 case Intrinsic::riscv_vsoxseg6:
1791 case Intrinsic::riscv_vsoxseg7:
1792 case Intrinsic::riscv_vsoxseg8:
1793 case Intrinsic::riscv_vsuxseg2:
1794 case Intrinsic::riscv_vsuxseg3:
1795 case Intrinsic::riscv_vsuxseg4:
1796 case Intrinsic::riscv_vsuxseg5:
1797 case Intrinsic::riscv_vsuxseg6:
1798 case Intrinsic::riscv_vsuxseg7:
1799 case Intrinsic::riscv_vsuxseg8:
1800 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1801 /*IsStore*/ true,
1802 /*IsUnitStrided*/ false);
1803 case Intrinsic::riscv_vssseg2_mask:
1804 case Intrinsic::riscv_vssseg3_mask:
1805 case Intrinsic::riscv_vssseg4_mask:
1806 case Intrinsic::riscv_vssseg5_mask:
1807 case Intrinsic::riscv_vssseg6_mask:
1808 case Intrinsic::riscv_vssseg7_mask:
1809 case Intrinsic::riscv_vssseg8_mask:
1810 case Intrinsic::riscv_vsoxseg2_mask:
1811 case Intrinsic::riscv_vsoxseg3_mask:
1812 case Intrinsic::riscv_vsoxseg4_mask:
1813 case Intrinsic::riscv_vsoxseg5_mask:
1814 case Intrinsic::riscv_vsoxseg6_mask:
1815 case Intrinsic::riscv_vsoxseg7_mask:
1816 case Intrinsic::riscv_vsoxseg8_mask:
1817 case Intrinsic::riscv_vsuxseg2_mask:
1818 case Intrinsic::riscv_vsuxseg3_mask:
1819 case Intrinsic::riscv_vsuxseg4_mask:
1820 case Intrinsic::riscv_vsuxseg5_mask:
1821 case Intrinsic::riscv_vsuxseg6_mask:
1822 case Intrinsic::riscv_vsuxseg7_mask:
1823 case Intrinsic::riscv_vsuxseg8_mask:
1824 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1825 /*IsStore*/ true,
1826 /*IsUnitStrided*/ false);
1827 }
1828}
1829
1831 const AddrMode &AM, Type *Ty,
1832 unsigned AS,
1833 Instruction *I) const {
1834 // No global is ever allowed as a base.
1835 if (AM.BaseGV)
1836 return false;
1837
1838 // RVV instructions only support register addressing.
1839 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1840 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1841
1842 // Require a 12-bit signed offset.
1843 if (!isInt<12>(AM.BaseOffs))
1844 return false;
1845
1846 switch (AM.Scale) {
1847 case 0: // "r+i" or just "i", depending on HasBaseReg.
1848 break;
1849 case 1:
1850 if (!AM.HasBaseReg) // allow "r+i".
1851 break;
1852 return false; // disallow "r+r" or "r+r+i".
1853 default:
1854 return false;
1855 }
1856
1857 return true;
1858}
1859
1861 return isInt<12>(Imm);
1862}
1863
1865 return isInt<12>(Imm);
1866}
1867
1868// On RV32, 64-bit integers are split into their high and low parts and held
1869// in two different registers, so the trunc is free since the low register can
1870// just be used.
1871// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1872// isTruncateFree?
1874 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1875 return false;
1876 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1877 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1878 return (SrcBits == 64 && DestBits == 32);
1879}
1880
1882 // We consider i64->i32 free on RV64 since we have good selection of W
1883 // instructions that make promoting operations back to i64 free in many cases.
1884 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1885 !DstVT.isInteger())
1886 return false;
1887 unsigned SrcBits = SrcVT.getSizeInBits();
1888 unsigned DestBits = DstVT.getSizeInBits();
1889 return (SrcBits == 64 && DestBits == 32);
1890}
1891
1893 // Zexts are free if they can be combined with a load.
1894 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1895 // poorly with type legalization of compares preferring sext.
1896 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1897 EVT MemVT = LD->getMemoryVT();
1898 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1899 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1900 LD->getExtensionType() == ISD::ZEXTLOAD))
1901 return true;
1902 }
1903
1904 return TargetLowering::isZExtFree(Val, VT2);
1905}
1906
1908 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1909}
1910
1912 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1913}
1914
1916 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1917}
1918
1920 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1921 Subtarget.hasVendorXCVbitmanip();
1922}
1923
1925 const Instruction &AndI) const {
1926 // We expect to be able to match a bit extraction instruction if the Zbs
1927 // extension is supported and the mask is a power of two. However, we
1928 // conservatively return false if the mask would fit in an ANDI instruction,
1929 // on the basis that it's possible the sinking+duplication of the AND in
1930 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1931 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1932 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1933 return false;
1934 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1935 if (!Mask)
1936 return false;
1937 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1938}
1939
1941 EVT VT = Y.getValueType();
1942
1943 // FIXME: Support vectors once we have tests.
1944 if (VT.isVector())
1945 return false;
1946
1947 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1948 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1949}
1950
1952 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1953 if (Subtarget.hasStdExtZbs())
1954 return X.getValueType().isScalarInteger();
1955 auto *C = dyn_cast<ConstantSDNode>(Y);
1956 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1957 if (Subtarget.hasVendorXTHeadBs())
1958 return C != nullptr;
1959 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1960 return C && C->getAPIntValue().ule(10);
1961}
1962
1964 EVT VT) const {
1965 // Only enable for rvv.
1966 if (!VT.isVector() || !Subtarget.hasVInstructions())
1967 return false;
1968
1969 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1970 return false;
1971
1972 return true;
1973}
1974
1976 Type *Ty) const {
1977 assert(Ty->isIntegerTy());
1978
1979 unsigned BitSize = Ty->getIntegerBitWidth();
1980 if (BitSize > Subtarget.getXLen())
1981 return false;
1982
1983 // Fast path, assume 32-bit immediates are cheap.
1984 int64_t Val = Imm.getSExtValue();
1985 if (isInt<32>(Val))
1986 return true;
1987
1988 // A constant pool entry may be more aligned thant he load we're trying to
1989 // replace. If we don't support unaligned scalar mem, prefer the constant
1990 // pool.
1991 // TODO: Can the caller pass down the alignment?
1992 if (!Subtarget.enableUnalignedScalarMem())
1993 return true;
1994
1995 // Prefer to keep the load if it would require many instructions.
1996 // This uses the same threshold we use for constant pools but doesn't
1997 // check useConstantPoolForLargeInts.
1998 // TODO: Should we keep the load only when we're definitely going to emit a
1999 // constant pool?
2000
2002 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2003}
2004
2008 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2009 SelectionDAG &DAG) const {
2010 // One interesting pattern that we'd want to form is 'bit extract':
2011 // ((1 >> Y) & 1) ==/!= 0
2012 // But we also need to be careful not to try to reverse that fold.
2013
2014 // Is this '((1 >> Y) & 1)'?
2015 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2016 return false; // Keep the 'bit extract' pattern.
2017
2018 // Will this be '((1 >> Y) & 1)' after the transform?
2019 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2020 return true; // Do form the 'bit extract' pattern.
2021
2022 // If 'X' is a constant, and we transform, then we will immediately
2023 // try to undo the fold, thus causing endless combine loop.
2024 // So only do the transform if X is not a constant. This matches the default
2025 // implementation of this function.
2026 return !XC;
2027}
2028
2029bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2030 switch (Opcode) {
2031 case Instruction::Add:
2032 case Instruction::Sub:
2033 case Instruction::Mul:
2034 case Instruction::And:
2035 case Instruction::Or:
2036 case Instruction::Xor:
2037 case Instruction::FAdd:
2038 case Instruction::FSub:
2039 case Instruction::FMul:
2040 case Instruction::FDiv:
2041 case Instruction::ICmp:
2042 case Instruction::FCmp:
2043 return true;
2044 case Instruction::Shl:
2045 case Instruction::LShr:
2046 case Instruction::AShr:
2047 case Instruction::UDiv:
2048 case Instruction::SDiv:
2049 case Instruction::URem:
2050 case Instruction::SRem:
2051 case Instruction::Select:
2052 return Operand == 1;
2053 default:
2054 return false;
2055 }
2056}
2057
2058
2060 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2061 return false;
2062
2063 if (canSplatOperand(I->getOpcode(), Operand))
2064 return true;
2065
2066 auto *II = dyn_cast<IntrinsicInst>(I);
2067 if (!II)
2068 return false;
2069
2070 switch (II->getIntrinsicID()) {
2071 case Intrinsic::fma:
2072 case Intrinsic::vp_fma:
2073 return Operand == 0 || Operand == 1;
2074 case Intrinsic::vp_shl:
2075 case Intrinsic::vp_lshr:
2076 case Intrinsic::vp_ashr:
2077 case Intrinsic::vp_udiv:
2078 case Intrinsic::vp_sdiv:
2079 case Intrinsic::vp_urem:
2080 case Intrinsic::vp_srem:
2081 case Intrinsic::ssub_sat:
2082 case Intrinsic::vp_ssub_sat:
2083 case Intrinsic::usub_sat:
2084 case Intrinsic::vp_usub_sat:
2085 return Operand == 1;
2086 // These intrinsics are commutative.
2087 case Intrinsic::vp_add:
2088 case Intrinsic::vp_mul:
2089 case Intrinsic::vp_and:
2090 case Intrinsic::vp_or:
2091 case Intrinsic::vp_xor:
2092 case Intrinsic::vp_fadd:
2093 case Intrinsic::vp_fmul:
2094 case Intrinsic::vp_icmp:
2095 case Intrinsic::vp_fcmp:
2096 case Intrinsic::smin:
2097 case Intrinsic::vp_smin:
2098 case Intrinsic::umin:
2099 case Intrinsic::vp_umin:
2100 case Intrinsic::smax:
2101 case Intrinsic::vp_smax:
2102 case Intrinsic::umax:
2103 case Intrinsic::vp_umax:
2104 case Intrinsic::sadd_sat:
2105 case Intrinsic::vp_sadd_sat:
2106 case Intrinsic::uadd_sat:
2107 case Intrinsic::vp_uadd_sat:
2108 // These intrinsics have 'vr' versions.
2109 case Intrinsic::vp_sub:
2110 case Intrinsic::vp_fsub:
2111 case Intrinsic::vp_fdiv:
2112 return Operand == 0 || Operand == 1;
2113 default:
2114 return false;
2115 }
2116}
2117
2118/// Check if sinking \p I's operands to I's basic block is profitable, because
2119/// the operands can be folded into a target instruction, e.g.
2120/// splats of scalars can fold into vector instructions.
2122 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2123 using namespace llvm::PatternMatch;
2124
2125 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2126 return false;
2127
2128 // Don't sink splat operands if the target prefers it. Some targets requires
2129 // S2V transfer buffers and we can run out of them copying the same value
2130 // repeatedly.
2131 // FIXME: It could still be worth doing if it would improve vector register
2132 // pressure and prevent a vector spill.
2133 if (!Subtarget.sinkSplatOperands())
2134 return false;
2135
2136 for (auto OpIdx : enumerate(I->operands())) {
2137 if (!canSplatOperand(I, OpIdx.index()))
2138 continue;
2139
2140 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2141 // Make sure we are not already sinking this operand
2142 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2143 continue;
2144
2145 // We are looking for a splat that can be sunk.
2147 m_Undef(), m_ZeroMask())))
2148 continue;
2149
2150 // Don't sink i1 splats.
2151 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2152 continue;
2153
2154 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2155 // and vector registers
2156 for (Use &U : Op->uses()) {
2157 Instruction *Insn = cast<Instruction>(U.getUser());
2158 if (!canSplatOperand(Insn, U.getOperandNo()))
2159 return false;
2160 }
2161
2162 Ops.push_back(&Op->getOperandUse(0));
2163 Ops.push_back(&OpIdx.value());
2164 }
2165 return true;
2166}
2167
2169 unsigned Opc = VecOp.getOpcode();
2170
2171 // Assume target opcodes can't be scalarized.
2172 // TODO - do we have any exceptions?
2173 if (Opc >= ISD::BUILTIN_OP_END)
2174 return false;
2175
2176 // If the vector op is not supported, try to convert to scalar.
2177 EVT VecVT = VecOp.getValueType();
2178 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2179 return true;
2180
2181 // If the vector op is supported, but the scalar op is not, the transform may
2182 // not be worthwhile.
2183 // Permit a vector binary operation can be converted to scalar binary
2184 // operation which is custom lowered with illegal type.
2185 EVT ScalarVT = VecVT.getScalarType();
2186 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2187 isOperationCustom(Opc, ScalarVT);
2188}
2189
2191 const GlobalAddressSDNode *GA) const {
2192 // In order to maximise the opportunity for common subexpression elimination,
2193 // keep a separate ADD node for the global address offset instead of folding
2194 // it in the global address node. Later peephole optimisations may choose to
2195 // fold it back in when profitable.
2196 return false;
2197}
2198
2199// Return one of the followings:
2200// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2201// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2202// positive counterpart, which will be materialized from the first returned
2203// element. The second returned element indicated that there should be a FNEG
2204// followed.
2205// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2206std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2207 EVT VT) const {
2208 if (!Subtarget.hasStdExtZfa())
2209 return std::make_pair(-1, false);
2210
2211 bool IsSupportedVT = false;
2212 if (VT == MVT::f16) {
2213 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2214 } else if (VT == MVT::f32) {
2215 IsSupportedVT = true;
2216 } else if (VT == MVT::f64) {
2217 assert(Subtarget.hasStdExtD() && "Expect D extension");
2218 IsSupportedVT = true;
2219 }
2220
2221 if (!IsSupportedVT)
2222 return std::make_pair(-1, false);
2223
2225 if (Index < 0 && Imm.isNegative())
2226 // Try the combination of its positive counterpart + FNEG.
2227 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2228 else
2229 return std::make_pair(Index, false);
2230}
2231
2233 bool ForCodeSize) const {
2234 bool IsLegalVT = false;
2235 if (VT == MVT::f16)
2236 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2237 else if (VT == MVT::f32)
2238 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2239 else if (VT == MVT::f64)
2240 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2241 else if (VT == MVT::bf16)
2242 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2243
2244 if (!IsLegalVT)
2245 return false;
2246
2247 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2248 return true;
2249
2250 // Cannot create a 64 bit floating-point immediate value for rv32.
2251 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2252 // td can handle +0.0 or -0.0 already.
2253 // -0.0 can be created by fmv + fneg.
2254 return Imm.isZero();
2255 }
2256
2257 // Special case: fmv + fneg
2258 if (Imm.isNegZero())
2259 return true;
2260
2261 // Building an integer and then converting requires a fmv at the end of
2262 // the integer sequence.
2263 const int Cost =
2264 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2265 Subtarget);
2266 return Cost <= FPImmCost;
2267}
2268
2269// TODO: This is very conservative.
2271 unsigned Index) const {
2273 return false;
2274
2275 // Only support extracting a fixed from a fixed vector for now.
2276 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2277 return false;
2278
2279 EVT EltVT = ResVT.getVectorElementType();
2280 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2281
2282 // The smallest type we can slide is i8.
2283 // TODO: We can extract index 0 from a mask vector without a slide.
2284 if (EltVT == MVT::i1)
2285 return false;
2286
2287 unsigned ResElts = ResVT.getVectorNumElements();
2288 unsigned SrcElts = SrcVT.getVectorNumElements();
2289
2290 unsigned MinVLen = Subtarget.getRealMinVLen();
2291 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2292
2293 // If we're extracting only data from the first VLEN bits of the source
2294 // then we can always do this with an m1 vslidedown.vx. Restricting the
2295 // Index ensures we can use a vslidedown.vi.
2296 // TODO: We can generalize this when the exact VLEN is known.
2297 if (Index + ResElts <= MinVLMAX && Index < 31)
2298 return true;
2299
2300 // Convervatively only handle extracting half of a vector.
2301 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2302 // a cheap extract. However, this case is important in practice for
2303 // shuffled extracts of longer vectors. How resolve?
2304 if ((ResElts * 2) != SrcElts)
2305 return false;
2306
2307 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2308 // cheap.
2309 if (Index >= 32)
2310 return false;
2311
2312 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2313 // the upper half of a vector until we have more test coverage.
2314 return Index == 0 || Index == ResElts;
2315}
2316
2319 EVT VT) const {
2320 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2321 // We might still end up using a GPR but that will be decided based on ABI.
2322 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2323 !Subtarget.hasStdExtZfhminOrZhinxmin())
2324 return MVT::f32;
2325
2327
2328 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2329 return MVT::i64;
2330
2331 return PartVT;
2332}
2333
2336 EVT VT) const {
2337 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2338 // We might still end up using a GPR but that will be decided based on ABI.
2339 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2340 !Subtarget.hasStdExtZfhminOrZhinxmin())
2341 return 1;
2342
2344}
2345
2347 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2348 unsigned &NumIntermediates, MVT &RegisterVT) const {
2350 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2351
2352 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2353 IntermediateVT = MVT::i64;
2354
2355 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2356 RegisterVT = MVT::i64;
2357
2358 return NumRegs;
2359}
2360
2361// Changes the condition code and swaps operands if necessary, so the SetCC
2362// operation matches one of the comparisons supported directly by branches
2363// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2364// with 1/-1.
2365static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2366 ISD::CondCode &CC, SelectionDAG &DAG) {
2367 // If this is a single bit test that can't be handled by ANDI, shift the
2368 // bit to be tested to the MSB and perform a signed compare with 0.
2369 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2370 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2371 isa<ConstantSDNode>(LHS.getOperand(1))) {
2372 uint64_t Mask = LHS.getConstantOperandVal(1);
2373 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2374 unsigned ShAmt = 0;
2375 if (isPowerOf2_64(Mask)) {
2377 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2378 } else {
2379 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2380 }
2381
2382 LHS = LHS.getOperand(0);
2383 if (ShAmt != 0)
2384 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2385 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2386 return;
2387 }
2388 }
2389
2390 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2391 int64_t C = RHSC->getSExtValue();
2392 switch (CC) {
2393 default: break;
2394 case ISD::SETGT:
2395 // Convert X > -1 to X >= 0.
2396 if (C == -1) {
2397 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2398 CC = ISD::SETGE;
2399 return;
2400 }
2401 break;
2402 case ISD::SETLT:
2403 // Convert X < 1 to 0 >= X.
2404 if (C == 1) {
2405 RHS = LHS;
2406 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2407 CC = ISD::SETGE;
2408 return;
2409 }
2410 break;
2411 }
2412 }
2413
2414 switch (CC) {
2415 default:
2416 break;
2417 case ISD::SETGT:
2418 case ISD::SETLE:
2419 case ISD::SETUGT:
2420 case ISD::SETULE:
2422 std::swap(LHS, RHS);
2423 break;
2424 }
2425}
2426
2428 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2429 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2430 if (VT.getVectorElementType() == MVT::i1)
2431 KnownSize *= 8;
2432
2433 switch (KnownSize) {
2434 default:
2435 llvm_unreachable("Invalid LMUL.");
2436 case 8:
2438 case 16:
2440 case 32:
2442 case 64:
2444 case 128:
2446 case 256:
2448 case 512:
2450 }
2451}
2452
2454 switch (LMul) {
2455 default:
2456 llvm_unreachable("Invalid LMUL.");
2461 return RISCV::VRRegClassID;
2463 return RISCV::VRM2RegClassID;
2465 return RISCV::VRM4RegClassID;
2467 return RISCV::VRM8RegClassID;
2468 }
2469}
2470
2472 RISCVII::VLMUL LMUL = getLMUL(VT);
2473 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2474 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2475 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2476 LMUL == RISCVII::VLMUL::LMUL_1) {
2477 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2478 "Unexpected subreg numbering");
2479 return RISCV::sub_vrm1_0 + Index;
2480 }
2481 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2482 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2483 "Unexpected subreg numbering");
2484 return RISCV::sub_vrm2_0 + Index;
2485 }
2486 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2487 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2488 "Unexpected subreg numbering");
2489 return RISCV::sub_vrm4_0 + Index;
2490 }
2491 llvm_unreachable("Invalid vector type.");
2492}
2493
2495 if (VT.getVectorElementType() == MVT::i1)
2496 return RISCV::VRRegClassID;
2497 return getRegClassIDForLMUL(getLMUL(VT));
2498}
2499
2500// Attempt to decompose a subvector insert/extract between VecVT and
2501// SubVecVT via subregister indices. Returns the subregister index that
2502// can perform the subvector insert/extract with the given element index, as
2503// well as the index corresponding to any leftover subvectors that must be
2504// further inserted/extracted within the register class for SubVecVT.
2505std::pair<unsigned, unsigned>
2507 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2508 const RISCVRegisterInfo *TRI) {
2509 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2510 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2511 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2512 "Register classes not ordered");
2513 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2514 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2515 // Try to compose a subregister index that takes us from the incoming
2516 // LMUL>1 register class down to the outgoing one. At each step we half
2517 // the LMUL:
2518 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2519 // Note that this is not guaranteed to find a subregister index, such as
2520 // when we are extracting from one VR type to another.
2521 unsigned SubRegIdx = RISCV::NoSubRegister;
2522 for (const unsigned RCID :
2523 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2524 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2525 VecVT = VecVT.getHalfNumVectorElementsVT();
2526 bool IsHi =
2527 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2528 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2529 getSubregIndexByMVT(VecVT, IsHi));
2530 if (IsHi)
2531 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2532 }
2533 return {SubRegIdx, InsertExtractIdx};
2534}
2535
2536// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2537// stores for those types.
2538bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2539 return !Subtarget.useRVVForFixedLengthVectors() ||
2540 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2541}
2542
2544 if (!ScalarTy.isSimple())
2545 return false;
2546 switch (ScalarTy.getSimpleVT().SimpleTy) {
2547 case MVT::iPTR:
2548 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2549 case MVT::i8:
2550 case MVT::i16:
2551 case MVT::i32:
2552 return true;
2553 case MVT::i64:
2554 return Subtarget.hasVInstructionsI64();
2555 case MVT::f16:
2556 return Subtarget.hasVInstructionsF16();
2557 case MVT::f32:
2558 return Subtarget.hasVInstructionsF32();
2559 case MVT::f64:
2560 return Subtarget.hasVInstructionsF64();
2561 default:
2562 return false;
2563 }
2564}
2565
2566
2567unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2568 return NumRepeatedDivisors;
2569}
2570
2572 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2573 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2574 "Unexpected opcode");
2575 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2576 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2578 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2579 if (!II)
2580 return SDValue();
2581 return Op.getOperand(II->VLOperand + 1 + HasChain);
2582}
2583
2585 const RISCVSubtarget &Subtarget) {
2586 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2587 if (!Subtarget.useRVVForFixedLengthVectors())
2588 return false;
2589
2590 // We only support a set of vector types with a consistent maximum fixed size
2591 // across all supported vector element types to avoid legalization issues.
2592 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2593 // fixed-length vector type we support is 1024 bytes.
2594 if (VT.getFixedSizeInBits() > 1024 * 8)
2595 return false;
2596
2597 unsigned MinVLen = Subtarget.getRealMinVLen();
2598
2599 MVT EltVT = VT.getVectorElementType();
2600
2601 // Don't use RVV for vectors we cannot scalarize if required.
2602 switch (EltVT.SimpleTy) {
2603 // i1 is supported but has different rules.
2604 default:
2605 return false;
2606 case MVT::i1:
2607 // Masks can only use a single register.
2608 if (VT.getVectorNumElements() > MinVLen)
2609 return false;
2610 MinVLen /= 8;
2611 break;
2612 case MVT::i8:
2613 case MVT::i16:
2614 case MVT::i32:
2615 break;
2616 case MVT::i64:
2617 if (!Subtarget.hasVInstructionsI64())
2618 return false;
2619 break;
2620 case MVT::f16:
2621 if (!Subtarget.hasVInstructionsF16Minimal())
2622 return false;
2623 break;
2624 case MVT::bf16:
2625 if (!Subtarget.hasVInstructionsBF16())
2626 return false;
2627 break;
2628 case MVT::f32:
2629 if (!Subtarget.hasVInstructionsF32())
2630 return false;
2631 break;
2632 case MVT::f64:
2633 if (!Subtarget.hasVInstructionsF64())
2634 return false;
2635 break;
2636 }
2637
2638 // Reject elements larger than ELEN.
2639 if (EltVT.getSizeInBits() > Subtarget.getELen())
2640 return false;
2641
2642 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2643 // Don't use RVV for types that don't fit.
2644 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2645 return false;
2646
2647 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2648 // the base fixed length RVV support in place.
2649 if (!VT.isPow2VectorType())
2650 return false;
2651
2652 return true;
2653}
2654
2655bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2656 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2657}
2658
2659// Return the largest legal scalable vector type that matches VT's element type.
2661 const RISCVSubtarget &Subtarget) {
2662 // This may be called before legal types are setup.
2663 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2664 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2665 "Expected legal fixed length vector!");
2666
2667 unsigned MinVLen = Subtarget.getRealMinVLen();
2668 unsigned MaxELen = Subtarget.getELen();
2669
2670 MVT EltVT = VT.getVectorElementType();
2671 switch (EltVT.SimpleTy) {
2672 default:
2673 llvm_unreachable("unexpected element type for RVV container");
2674 case MVT::i1:
2675 case MVT::i8:
2676 case MVT::i16:
2677 case MVT::i32:
2678 case MVT::i64:
2679 case MVT::bf16:
2680 case MVT::f16:
2681 case MVT::f32:
2682 case MVT::f64: {
2683 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2684 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2685 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2686 unsigned NumElts =
2688 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2689 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2690 return MVT::getScalableVectorVT(EltVT, NumElts);
2691 }
2692 }
2693}
2694
2696 const RISCVSubtarget &Subtarget) {
2698 Subtarget);
2699}
2700
2702 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2703}
2704
2705// Grow V to consume an entire RVV register.
2707 const RISCVSubtarget &Subtarget) {
2708 assert(VT.isScalableVector() &&
2709 "Expected to convert into a scalable vector!");
2710 assert(V.getValueType().isFixedLengthVector() &&
2711 "Expected a fixed length vector operand!");
2712 SDLoc DL(V);
2713 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2714 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2715}
2716
2717// Shrink V so it's just big enough to maintain a VT's worth of data.
2719 const RISCVSubtarget &Subtarget) {
2721 "Expected to convert into a fixed length vector!");
2722 assert(V.getValueType().isScalableVector() &&
2723 "Expected a scalable vector operand!");
2724 SDLoc DL(V);
2725 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2726 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2727}
2728
2729/// Return the type of the mask type suitable for masking the provided
2730/// vector type. This is simply an i1 element type vector of the same
2731/// (possibly scalable) length.
2732static MVT getMaskTypeFor(MVT VecVT) {
2733 assert(VecVT.isVector());
2735 return MVT::getVectorVT(MVT::i1, EC);
2736}
2737
2738/// Creates an all ones mask suitable for masking a vector of type VecTy with
2739/// vector length VL. .
2740static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2741 SelectionDAG &DAG) {
2742 MVT MaskVT = getMaskTypeFor(VecVT);
2743 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2744}
2745
2746static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2747 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2748 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2749 // canonicalize the representation. InsertVSETVLI will pick the immediate
2750 // encoding later if profitable.
2751 const auto [MinVLMAX, MaxVLMAX] =
2752 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2753 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2754 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2755
2756 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2757}
2758
2759static std::pair<SDValue, SDValue>
2761 const RISCVSubtarget &Subtarget) {
2762 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2763 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2764 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2765 return {Mask, VL};
2766}
2767
2768static std::pair<SDValue, SDValue>
2769getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2770 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2771 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2772 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2773 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2774 return {Mask, VL};
2775}
2776
2777// Gets the two common "VL" operands: an all-ones mask and the vector length.
2778// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2779// the vector type that the fixed-length vector is contained in. Otherwise if
2780// VecVT is scalable, then ContainerVT should be the same as VecVT.
2781static std::pair<SDValue, SDValue>
2782getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2783 const RISCVSubtarget &Subtarget) {
2784 if (VecVT.isFixedLengthVector())
2785 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2786 Subtarget);
2787 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2788 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2789}
2790
2792 SelectionDAG &DAG) const {
2793 assert(VecVT.isScalableVector() && "Expected scalable vector");
2794 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2795 VecVT.getVectorElementCount());
2796}
2797
2798std::pair<unsigned, unsigned>
2800 const RISCVSubtarget &Subtarget) {
2801 assert(VecVT.isScalableVector() && "Expected scalable vector");
2802
2803 unsigned EltSize = VecVT.getScalarSizeInBits();
2804 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2805
2806 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2807 unsigned MaxVLMAX =
2808 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2809
2810 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2811 unsigned MinVLMAX =
2812 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2813
2814 return std::make_pair(MinVLMAX, MaxVLMAX);
2815}
2816
2817// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2818// of either is (currently) supported. This can get us into an infinite loop
2819// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2820// as a ..., etc.
2821// Until either (or both) of these can reliably lower any node, reporting that
2822// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2823// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2824// which is not desirable.
2826 EVT VT, unsigned DefinedValues) const {
2827 return false;
2828}
2829
2831 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2832 // implementation-defined.
2833 if (!VT.isVector())
2835 unsigned DLenFactor = Subtarget.getDLenFactor();
2836 unsigned Cost;
2837 if (VT.isScalableVector()) {
2838 unsigned LMul;
2839 bool Fractional;
2840 std::tie(LMul, Fractional) =
2842 if (Fractional)
2843 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2844 else
2845 Cost = (LMul * DLenFactor);
2846 } else {
2847 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2848 }
2849 return Cost;
2850}
2851
2852
2853/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2854/// is generally quadratic in the number of vreg implied by LMUL. Note that
2855/// operand (index and possibly mask) are handled separately.
2857 return getLMULCost(VT) * getLMULCost(VT);
2858}
2859
2860/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2861/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2862/// or may track the vrgather.vv cost. It is implementation-dependent.
2864 return getLMULCost(VT);
2865}
2866
2867/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2868/// for the type VT. (This does not cover the vslide1up or vslide1down
2869/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2870/// or may track the vrgather.vv cost. It is implementation-dependent.
2872 return getLMULCost(VT);
2873}
2874
2875/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2876/// for the type VT. (This does not cover the vslide1up or vslide1down
2877/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2878/// or may track the vrgather.vv cost. It is implementation-dependent.
2880 return getLMULCost(VT);
2881}
2882
2884 const RISCVSubtarget &Subtarget) {
2885 // RISC-V FP-to-int conversions saturate to the destination register size, but
2886 // don't produce 0 for nan. We can use a conversion instruction and fix the
2887 // nan case with a compare and a select.
2888 SDValue Src = Op.getOperand(0);
2889
2890 MVT DstVT = Op.getSimpleValueType();
2891 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2892
2893 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2894
2895 if (!DstVT.isVector()) {
2896 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2897 // the result.
2898 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2899 Src.getValueType() == MVT::bf16) {
2900 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2901 }
2902
2903 unsigned Opc;
2904 if (SatVT == DstVT)
2905 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2906 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2908 else
2909 return SDValue();
2910 // FIXME: Support other SatVTs by clamping before or after the conversion.
2911
2912 SDLoc DL(Op);
2913 SDValue FpToInt = DAG.getNode(
2914 Opc, DL, DstVT, Src,
2916
2917 if (Opc == RISCVISD::FCVT_WU_RV64)
2918 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2919
2920 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2921 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2923 }
2924
2925 // Vectors.
2926
2927 MVT DstEltVT = DstVT.getVectorElementType();
2928 MVT SrcVT = Src.getSimpleValueType();
2929 MVT SrcEltVT = SrcVT.getVectorElementType();
2930 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2931 unsigned DstEltSize = DstEltVT.getSizeInBits();
2932
2933 // Only handle saturating to the destination type.
2934 if (SatVT != DstEltVT)
2935 return SDValue();
2936
2937 // FIXME: Don't support narrowing by more than 1 steps for now.
2938 if (SrcEltSize > (2 * DstEltSize))
2939 return SDValue();
2940
2941 MVT DstContainerVT = DstVT;
2942 MVT SrcContainerVT = SrcVT;
2943 if (DstVT.isFixedLengthVector()) {
2944 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2945 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2946 assert(DstContainerVT.getVectorElementCount() ==
2947 SrcContainerVT.getVectorElementCount() &&
2948 "Expected same element count");
2949 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2950 }
2951
2952 SDLoc DL(Op);
2953
2954 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2955
2956 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2957 {Src, Src, DAG.getCondCode(ISD::SETNE),
2958 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2959
2960 // Need to widen by more than 1 step, promote the FP type, then do a widening
2961 // convert.
2962 if (DstEltSize > (2 * SrcEltSize)) {
2963 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2964 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2965 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2966 }
2967
2968 unsigned RVVOpc =
2970 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2971
2972 SDValue SplatZero = DAG.getNode(
2973 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2974 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2975 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2976 Res, DAG.getUNDEF(DstContainerVT), VL);
2977
2978 if (DstVT.isFixedLengthVector())
2979 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2980
2981 return Res;
2982}
2983
2985 switch (Opc) {
2986 case ISD::FROUNDEVEN:
2988 case ISD::VP_FROUNDEVEN:
2989 return RISCVFPRndMode::RNE;
2990 case ISD::FTRUNC:
2991 case ISD::STRICT_FTRUNC:
2992 case ISD::VP_FROUNDTOZERO:
2993 return RISCVFPRndMode::RTZ;
2994 case ISD::FFLOOR:
2995 case ISD::STRICT_FFLOOR:
2996 case ISD::VP_FFLOOR:
2997 return RISCVFPRndMode::RDN;
2998 case ISD::FCEIL:
2999 case ISD::STRICT_FCEIL:
3000 case ISD::VP_FCEIL:
3001 return RISCVFPRndMode::RUP;
3002 case ISD::FROUND:
3003 case ISD::STRICT_FROUND:
3004 case ISD::VP_FROUND:
3005 return RISCVFPRndMode::RMM;
3006 case ISD::FRINT:
3007 return RISCVFPRndMode::DYN;
3008 }
3009
3011}
3012
3013// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3014// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3015// the integer domain and back. Taking care to avoid converting values that are
3016// nan or already correct.
3017static SDValue
3019 const RISCVSubtarget &Subtarget) {
3020 MVT VT = Op.getSimpleValueType();
3021 assert(VT.isVector() && "Unexpected type");
3022
3023 SDLoc DL(Op);
3024
3025 SDValue Src = Op.getOperand(0);
3026
3027 MVT ContainerVT = VT;
3028 if (VT.isFixedLengthVector()) {
3029 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3030 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3031 }
3032
3033 SDValue Mask, VL;
3034 if (Op->isVPOpcode()) {
3035 Mask = Op.getOperand(1);
3036 if (VT.isFixedLengthVector())
3037 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3038 Subtarget);
3039 VL = Op.getOperand(2);
3040 } else {
3041 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3042 }
3043
3044 // Freeze the source since we are increasing the number of uses.
3045 Src = DAG.getFreeze(Src);
3046
3047 // We do the conversion on the absolute value and fix the sign at the end.
3048 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3049
3050 // Determine the largest integer that can be represented exactly. This and
3051 // values larger than it don't have any fractional bits so don't need to
3052 // be converted.
3053 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3054 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3055 APFloat MaxVal = APFloat(FltSem);
3056 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3057 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3058 SDValue MaxValNode =
3059 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3060 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3061 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3062
3063 // If abs(Src) was larger than MaxVal or nan, keep it.
3064 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3065 Mask =
3066 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3067 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3068 Mask, Mask, VL});
3069
3070 // Truncate to integer and convert back to FP.
3071 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3072 MVT XLenVT = Subtarget.getXLenVT();
3073 SDValue Truncated;
3074
3075 switch (Op.getOpcode()) {
3076 default:
3077 llvm_unreachable("Unexpected opcode");
3078 case ISD::FCEIL:
3079 case ISD::VP_FCEIL:
3080 case ISD::FFLOOR:
3081 case ISD::VP_FFLOOR:
3082 case ISD::FROUND:
3083 case ISD::FROUNDEVEN:
3084 case ISD::VP_FROUND:
3085 case ISD::VP_FROUNDEVEN:
3086 case ISD::VP_FROUNDTOZERO: {
3089 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3090 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3091 break;
3092 }
3093 case ISD::FTRUNC:
3094 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3095 Mask, VL);
3096 break;
3097 case ISD::FRINT:
3098 case ISD::VP_FRINT:
3099 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3100 break;
3101 case ISD::FNEARBYINT:
3102 case ISD::VP_FNEARBYINT:
3103 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3104 Mask, VL);
3105 break;
3106 }
3107
3108 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3109 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3110 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3111 Mask, VL);
3112
3113 // Restore the original sign so that -0.0 is preserved.
3114 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3115 Src, Src, Mask, VL);
3116
3117 if (!VT.isFixedLengthVector())
3118 return Truncated;
3119
3120 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3121}
3122
3123// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3124// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3125// qNan and coverting the new source to integer and back to FP.
3126static SDValue
3128 const RISCVSubtarget &Subtarget) {
3129 SDLoc DL(Op);
3130 MVT VT = Op.getSimpleValueType();
3131 SDValue Chain = Op.getOperand(0);
3132 SDValue Src = Op.getOperand(1);
3133
3134 MVT ContainerVT = VT;
3135 if (VT.isFixedLengthVector()) {
3136 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3137 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3138 }
3139
3140 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3141
3142 // Freeze the source since we are increasing the number of uses.
3143 Src = DAG.getFreeze(Src);
3144
3145 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3146 MVT MaskVT = Mask.getSimpleValueType();
3148 DAG.getVTList(MaskVT, MVT::Other),
3149 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3150 DAG.getUNDEF(MaskVT), Mask, VL});
3151 Chain = Unorder.getValue(1);
3153 DAG.getVTList(ContainerVT, MVT::Other),
3154 {Chain, Src, Src, Src, Unorder, VL});
3155 Chain = Src.getValue(1);
3156
3157 // We do the conversion on the absolute value and fix the sign at the end.
3158 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3159
3160 // Determine the largest integer that can be represented exactly. This and
3161 // values larger than it don't have any fractional bits so don't need to
3162 // be converted.
3163 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3164 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3165 APFloat MaxVal = APFloat(FltSem);
3166 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3167 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3168 SDValue MaxValNode =
3169 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3170 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3171 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3172
3173 // If abs(Src) was larger than MaxVal or nan, keep it.
3174 Mask = DAG.getNode(
3175 RISCVISD::SETCC_VL, DL, MaskVT,
3176 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3177
3178 // Truncate to integer and convert back to FP.
3179 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3180 MVT XLenVT = Subtarget.getXLenVT();
3181 SDValue Truncated;
3182
3183 switch (Op.getOpcode()) {
3184 default:
3185 llvm_unreachable("Unexpected opcode");
3186 case ISD::STRICT_FCEIL:
3187 case ISD::STRICT_FFLOOR:
3188 case ISD::STRICT_FROUND:
3192 Truncated = DAG.getNode(
3193 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3194 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3195 break;
3196 }
3197 case ISD::STRICT_FTRUNC:
3198 Truncated =
3200 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3201 break;
3204 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3205 Mask, VL);
3206 break;
3207 }
3208 Chain = Truncated.getValue(1);
3209
3210 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3211 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3212 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3213 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3214 Truncated, Mask, VL);
3215 Chain = Truncated.getValue(1);
3216 }
3217
3218 // Restore the original sign so that -0.0 is preserved.
3219 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3220 Src, Src, Mask, VL);
3221
3222 if (VT.isFixedLengthVector())
3223 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3224 return DAG.getMergeValues({Truncated, Chain}, DL);
3225}
3226
3227static SDValue
3229 const RISCVSubtarget &Subtarget) {
3230 MVT VT = Op.getSimpleValueType();
3231 if (VT.isVector())
3232 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3233
3234 if (DAG.shouldOptForSize())
3235 return SDValue();
3236
3237 SDLoc DL(Op);
3238 SDValue Src = Op.getOperand(0);
3239
3240 // Create an integer the size of the mantissa with the MSB set. This and all
3241 // values larger than it don't have any fractional bits so don't need to be
3242 // converted.
3243 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3244 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3245 APFloat MaxVal = APFloat(FltSem);
3246 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3247 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3248 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3249
3251 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3252 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3253}
3254
3255// Expand vector LRINT and LLRINT by converting to the integer domain.
3257 const RISCVSubtarget &Subtarget) {
3258 MVT VT = Op.getSimpleValueType();
3259 assert(VT.isVector() && "Unexpected type");
3260
3261 SDLoc DL(Op);
3262 SDValue Src = Op.getOperand(0);
3263 MVT ContainerVT = VT;
3264
3265 if (VT.isFixedLengthVector()) {
3266 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3267 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3268 }
3269
3270 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3271 SDValue Truncated =
3272 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3273
3274 if (!VT.isFixedLengthVector())
3275 return Truncated;
3276
3277 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3278}
3279
3280static SDValue
3282 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3283 SDValue Offset, SDValue Mask, SDValue VL,
3285 if (Merge.isUndef())
3287 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3288 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3289 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3290}
3291
3292static SDValue
3293getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3295 SDValue VL,
3297 if (Merge.isUndef())
3299 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3300 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3301 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3302}
3303
3304static MVT getLMUL1VT(MVT VT) {
3306 "Unexpected vector MVT");
3310}
3311
3315 int64_t Addend;
3316};
3317
3318static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3320 // We will use a SINT_TO_FP to materialize this constant so we should use a
3321 // signed APSInt here.
3322 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3323 // We use an arbitrary rounding mode here. If a floating-point is an exact
3324 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3325 // the rounding mode changes the output value, then it is not an exact
3326 // integer.
3328 bool IsExact;
3329 // If it is out of signed integer range, it will return an invalid operation.
3330 // If it is not an exact integer, IsExact is false.
3331 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3333 !IsExact)
3334 return std::nullopt;
3335 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3336}
3337
3338// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3339// to the (non-zero) step S and start value X. This can be then lowered as the
3340// RVV sequence (VID * S) + X, for example.
3341// The step S is represented as an integer numerator divided by a positive
3342// denominator. Note that the implementation currently only identifies
3343// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3344// cannot detect 2/3, for example.
3345// Note that this method will also match potentially unappealing index
3346// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3347// determine whether this is worth generating code for.
3348static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3349 unsigned EltSizeInBits) {
3350 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3351 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3352 return std::nullopt;
3353 bool IsInteger = Op.getValueType().isInteger();
3354
3355 std::optional<unsigned> SeqStepDenom;
3356 std::optional<int64_t> SeqStepNum, SeqAddend;
3357 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3358 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3359
3360 // First extract the ops into a list of constant integer values. This may not
3361 // be possible for floats if they're not all representable as integers.
3363 const unsigned OpSize = Op.getScalarValueSizeInBits();
3364 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3365 if (Elt.isUndef()) {
3366 Elts[Idx] = std::nullopt;
3367 continue;
3368 }
3369 if (IsInteger) {
3370 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3371 } else {
3372 auto ExactInteger =
3373 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3374 if (!ExactInteger)
3375 return std::nullopt;
3376 Elts[Idx] = *ExactInteger;
3377 }
3378 }
3379
3380 for (auto [Idx, Elt] : enumerate(Elts)) {
3381 // Assume undef elements match the sequence; we just have to be careful
3382 // when interpolating across them.
3383 if (!Elt)
3384 continue;
3385
3386 if (PrevElt) {
3387 // Calculate the step since the last non-undef element, and ensure
3388 // it's consistent across the entire sequence.
3389 unsigned IdxDiff = Idx - PrevElt->second;
3390 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3391
3392 // A zero-value value difference means that we're somewhere in the middle
3393 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3394 // step change before evaluating the sequence.
3395 if (ValDiff == 0)
3396 continue;
3397
3398 int64_t Remainder = ValDiff % IdxDiff;
3399 // Normalize the step if it's greater than 1.
3400 if (Remainder != ValDiff) {
3401 // The difference must cleanly divide the element span.
3402 if (Remainder != 0)
3403 return std::nullopt;
3404 ValDiff /= IdxDiff;
3405 IdxDiff = 1;
3406 }
3407
3408 if (!SeqStepNum)
3409 SeqStepNum = ValDiff;
3410 else if (ValDiff != SeqStepNum)
3411 return std::nullopt;
3412
3413 if (!SeqStepDenom)
3414 SeqStepDenom = IdxDiff;
3415 else if (IdxDiff != *SeqStepDenom)
3416 return std::nullopt;
3417 }
3418
3419 // Record this non-undef element for later.
3420 if (!PrevElt || PrevElt->first != *Elt)
3421 PrevElt = std::make_pair(*Elt, Idx);
3422 }
3423
3424 // We need to have logged a step for this to count as a legal index sequence.
3425 if (!SeqStepNum || !SeqStepDenom)
3426 return std::nullopt;
3427
3428 // Loop back through the sequence and validate elements we might have skipped
3429 // while waiting for a valid step. While doing this, log any sequence addend.
3430 for (auto [Idx, Elt] : enumerate(Elts)) {
3431 if (!Elt)
3432 continue;
3433 uint64_t ExpectedVal =
3434 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3435 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3436 if (!SeqAddend)
3437 SeqAddend = Addend;
3438 else if (Addend != SeqAddend)
3439 return std::nullopt;
3440 }
3441
3442 assert(SeqAddend && "Must have an addend if we have a step");
3443
3444 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3445}
3446
3447// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3448// and lower it as a VRGATHER_VX_VL from the source vector.
3449static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3450 SelectionDAG &DAG,
3451 const RISCVSubtarget &Subtarget) {
3452 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3453 return SDValue();
3454 SDValue Vec = SplatVal.getOperand(0);
3455 // Only perform this optimization on vectors of the same size for simplicity.
3456 // Don't perform this optimization for i1 vectors.
3457 // FIXME: Support i1 vectors, maybe by promoting to i8?
3458 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3459 return SDValue();
3460 SDValue Idx = SplatVal.getOperand(1);
3461 // The index must be a legal type.
3462 if (Idx.getValueType() != Subtarget.getXLenVT())
3463 return SDValue();
3464
3465 MVT ContainerVT = VT;
3466 if (VT.isFixedLengthVector()) {
3467 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3468 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3469 }
3470
3471 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3472
3473 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3474 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3475
3476 if (!VT.isFixedLengthVector())
3477 return Gather;
3478
3479 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3480}
3481
3482
3483/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3484/// which constitute a large proportion of the elements. In such cases we can
3485/// splat a vector with the dominant element and make up the shortfall with
3486/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3487/// Note that this includes vectors of 2 elements by association. The
3488/// upper-most element is the "dominant" one, allowing us to use a splat to
3489/// "insert" the upper element, and an insert of the lower element at position
3490/// 0, which improves codegen.
3492 const RISCVSubtarget &Subtarget) {
3493 MVT VT = Op.getSimpleValueType();
3494 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3495
3496 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3497
3498 SDLoc DL(Op);
3499 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3500
3501 MVT XLenVT = Subtarget.getXLenVT();
3502 unsigned NumElts = Op.getNumOperands();
3503
3504 SDValue DominantValue;
3505 unsigned MostCommonCount = 0;
3506 DenseMap<SDValue, unsigned> ValueCounts;
3507 unsigned NumUndefElts =
3508 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3509
3510 // Track the number of scalar loads we know we'd be inserting, estimated as
3511 // any non-zero floating-point constant. Other kinds of element are either
3512 // already in registers or are materialized on demand. The threshold at which
3513 // a vector load is more desirable than several scalar materializion and
3514 // vector-insertion instructions is not known.
3515 unsigned NumScalarLoads = 0;
3516
3517 for (SDValue V : Op->op_values()) {
3518 if (V.isUndef())
3519 continue;
3520
3521 ValueCounts.insert(std::make_pair(V, 0));
3522 unsigned &Count = ValueCounts[V];
3523 if (0 == Count)
3524 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3525 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3526
3527 // Is this value dominant? In case of a tie, prefer the highest element as
3528 // it's cheaper to insert near the beginning of a vector than it is at the
3529 // end.
3530 if (++Count >= MostCommonCount) {
3531 DominantValue = V;
3532 MostCommonCount = Count;
3533 }
3534 }
3535
3536 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3537 unsigned NumDefElts = NumElts - NumUndefElts;
3538 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3539
3540 // Don't perform this optimization when optimizing for size, since
3541 // materializing elements and inserting them tends to cause code bloat.
3542 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3543 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3544 ((MostCommonCount > DominantValueCountThreshold) ||
3545 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3546 // Start by splatting the most common element.
3547 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3548
3549 DenseSet<SDValue> Processed{DominantValue};
3550
3551 // We can handle an insert into the last element (of a splat) via
3552 // v(f)slide1down. This is slightly better than the vslideup insert
3553 // lowering as it avoids the need for a vector group temporary. It
3554 // is also better than using vmerge.vx as it avoids the need to
3555 // materialize the mask in a vector register.
3556 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3557 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3558 LastOp != DominantValue) {
3559 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3560 auto OpCode =
3562 if (!VT.isFloatingPoint())
3563 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3564 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3565 LastOp, Mask, VL);
3566 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3567 Processed.insert(LastOp);
3568 }
3569
3570 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3571 for (const auto &OpIdx : enumerate(Op->ops())) {
3572 const SDValue &V = OpIdx.value();
3573 if (V.isUndef() || !Processed.insert(V).second)
3574 continue;
3575 if (ValueCounts[V] == 1) {
3576 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3577 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3578 } else {
3579 // Blend in all instances of this value using a VSELECT, using a
3580 // mask where each bit signals whether that element is the one
3581 // we're after.
3583 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3584 return DAG.getConstant(V == V1, DL, XLenVT);
3585 });
3586 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3587 DAG.getBuildVector(SelMaskTy, DL, Ops),
3588 DAG.getSplatBuildVector(VT, DL, V), Vec);
3589 }
3590 }
3591
3592 return Vec;
3593 }
3594
3595 return SDValue();
3596}
3597
3599 const RISCVSubtarget &Subtarget) {
3600 MVT VT = Op.getSimpleValueType();
3601 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3602
3603 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3604
3605 SDLoc DL(Op);
3606 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3607
3608 MVT XLenVT = Subtarget.getXLenVT();
3609 unsigned NumElts = Op.getNumOperands();
3610
3611 if (VT.getVectorElementType() == MVT::i1) {
3612 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3613 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3614 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3615 }
3616
3617 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3618 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3619 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3620 }
3621
3622 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3623 // scalar integer chunks whose bit-width depends on the number of mask
3624 // bits and XLEN.
3625 // First, determine the most appropriate scalar integer type to use. This
3626 // is at most XLenVT, but may be shrunk to a smaller vector element type
3627 // according to the size of the final vector - use i8 chunks rather than
3628 // XLenVT if we're producing a v8i1. This results in more consistent
3629 // codegen across RV32 and RV64.
3630 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3631 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3632 // If we have to use more than one INSERT_VECTOR_ELT then this
3633 // optimization is likely to increase code size; avoid peforming it in
3634 // such a case. We can use a load from a constant pool in this case.
3635 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3636 return SDValue();
3637 // Now we can create our integer vector type. Note that it may be larger
3638 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3639 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3640 MVT IntegerViaVecVT =
3641 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3642 IntegerViaVecElts);
3643
3644 uint64_t Bits = 0;
3645 unsigned BitPos = 0, IntegerEltIdx = 0;
3646 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3647
3648 for (unsigned I = 0; I < NumElts;) {
3649 SDValue V = Op.getOperand(I);
3650 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3651 Bits |= ((uint64_t)BitValue << BitPos);
3652 ++BitPos;
3653 ++I;
3654
3655 // Once we accumulate enough bits to fill our scalar type or process the
3656 // last element, insert into our vector and clear our accumulated data.
3657 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3658 if (NumViaIntegerBits <= 32)
3659 Bits = SignExtend64<32>(Bits);
3660 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3661 Elts[IntegerEltIdx] = Elt;
3662 Bits = 0;
3663 BitPos = 0;
3664 IntegerEltIdx++;
3665 }
3666 }
3667
3668 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3669
3670 if (NumElts < NumViaIntegerBits) {
3671 // If we're producing a smaller vector than our minimum legal integer
3672 // type, bitcast to the equivalent (known-legal) mask type, and extract
3673 // our final mask.
3674 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3675 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3676 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3677 DAG.getConstant(0, DL, XLenVT));
3678 } else {
3679 // Else we must have produced an integer type with the same size as the
3680 // mask type; bitcast for the final result.
3681 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3682 Vec = DAG.getBitcast(VT, Vec);
3683 }
3684
3685 return Vec;
3686 }
3687
3688 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3689 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3691 if (!VT.isFloatingPoint())
3692 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3693 Splat =
3694 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3695 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3696 }
3697
3698 // Try and match index sequences, which we can lower to the vid instruction
3699 // with optional modifications. An all-undef vector is matched by
3700 // getSplatValue, above.
3701 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3702 int64_t StepNumerator = SimpleVID->StepNumerator;
3703 unsigned StepDenominator = SimpleVID->StepDenominator;
3704 int64_t Addend = SimpleVID->Addend;
3705
3706 assert(StepNumerator != 0 && "Invalid step");
3707 bool Negate = false;
3708 int64_t SplatStepVal = StepNumerator;
3709 unsigned StepOpcode = ISD::MUL;
3710 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3711 // anyway as the shift of 63 won't fit in uimm5.
3712 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3713 isPowerOf2_64(std::abs(StepNumerator))) {
3714 Negate = StepNumerator < 0;
3715 StepOpcode = ISD::SHL;
3716 SplatStepVal = Log2_64(std::abs(StepNumerator));
3717 }
3718
3719 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3720 // threshold since it's the immediate value many RVV instructions accept.
3721 // There is no vmul.vi instruction so ensure multiply constant can fit in
3722 // a single addi instruction.
3723 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3724 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3725 isPowerOf2_32(StepDenominator) &&
3726 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3727 MVT VIDVT =
3729 MVT VIDContainerVT =
3730 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3731 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3732 // Convert right out of the scalable type so we can use standard ISD
3733 // nodes for the rest of the computation. If we used scalable types with
3734 // these, we'd lose the fixed-length vector info and generate worse
3735 // vsetvli code.
3736 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3737 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3738 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3739 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3740 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3741 }
3742 if (StepDenominator != 1) {
3743 SDValue SplatStep =
3744 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3745 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3746 }
3747 if (Addend != 0 || Negate) {
3748 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3749 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3750 VID);
3751 }
3752 if (VT.isFloatingPoint()) {
3753 // TODO: Use vfwcvt to reduce register pressure.
3754 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3755 }
3756 return VID;
3757 }
3758 }
3759
3760 // For very small build_vectors, use a single scalar insert of a constant.
3761 // TODO: Base this on constant rematerialization cost, not size.
3762 const unsigned EltBitSize = VT.getScalarSizeInBits();
3763 if (VT.getSizeInBits() <= 32 &&
3765 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3766 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3767 "Unexpected sequence type");
3768 // If we can use the original VL with the modified element type, this
3769 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3770 // be moved into InsertVSETVLI?
3771 unsigned ViaVecLen =
3772 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3773 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3774
3775 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3776 uint64_t SplatValue = 0;
3777 // Construct the amalgamated value at this larger vector type.
3778 for (const auto &OpIdx : enumerate(Op->op_values())) {
3779 const auto &SeqV = OpIdx.value();
3780 if (!SeqV.isUndef())
3781 SplatValue |=
3782 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3783 }
3784
3785 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3786 // achieve better constant materializion.
3787 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3788 SplatValue = SignExtend64<32>(SplatValue);
3789
3790 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3791 DAG.getUNDEF(ViaVecVT),
3792 DAG.getConstant(SplatValue, DL, XLenVT),
3793 DAG.getVectorIdxConstant(0, DL));
3794 if (ViaVecLen != 1)
3796 MVT::getVectorVT(ViaIntVT, 1), Vec,
3797 DAG.getConstant(0, DL, XLenVT));
3798 return DAG.getBitcast(VT, Vec);
3799 }
3800
3801
3802 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3803 // when re-interpreted as a vector with a larger element type. For example,
3804 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3805 // could be instead splat as
3806 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3807 // TODO: This optimization could also work on non-constant splats, but it
3808 // would require bit-manipulation instructions to construct the splat value.
3809 SmallVector<SDValue> Sequence;
3810 const auto *BV = cast<BuildVectorSDNode>(Op);
3811 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3813 BV->getRepeatedSequence(Sequence) &&
3814 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3815 unsigned SeqLen = Sequence.size();
3816 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3817 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3818 ViaIntVT == MVT::i64) &&
3819 "Unexpected sequence type");
3820
3821 // If we can use the original VL with the modified element type, this
3822 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3823 // be moved into InsertVSETVLI?
3824 const unsigned RequiredVL = NumElts / SeqLen;
3825 const unsigned ViaVecLen =
3826 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3827 NumElts : RequiredVL;
3828 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3829
3830 unsigned EltIdx = 0;
3831 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3832 uint64_t SplatValue = 0;
3833 // Construct the amalgamated value which can be splatted as this larger
3834 // vector type.
3835 for (const auto &SeqV : Sequence) {
3836 if (!SeqV.isUndef())
3837 SplatValue |=
3838 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3839 EltIdx++;
3840 }
3841
3842 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3843 // achieve better constant materializion.
3844 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3845 SplatValue = SignExtend64<32>(SplatValue);
3846
3847 // Since we can't introduce illegal i64 types at this stage, we can only
3848 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3849 // way we can use RVV instructions to splat.
3850 assert((ViaIntVT.bitsLE(XLenVT) ||
3851 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3852 "Unexpected bitcast sequence");
3853 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3854 SDValue ViaVL =
3855 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3856 MVT ViaContainerVT =
3857 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3858 SDValue Splat =
3859 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3860 DAG.getUNDEF(ViaContainerVT),
3861 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3862 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3863 if (ViaVecLen != RequiredVL)
3865 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3866 DAG.getConstant(0, DL, XLenVT));
3867 return DAG.getBitcast(VT, Splat);
3868 }
3869 }
3870
3871 // If the number of signbits allows, see if we can lower as a <N x i8>.
3872 // Our main goal here is to reduce LMUL (and thus work) required to
3873 // build the constant, but we will also narrow if the resulting
3874 // narrow vector is known to materialize cheaply.
3875 // TODO: We really should be costing the smaller vector. There are
3876 // profitable cases this misses.
3877 if (EltBitSize > 8 && VT.isInteger() &&
3878 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3879 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3880 if (EltBitSize - SignBits < 8) {
3881 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3882 DL, Op->ops());
3883 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3884 Source, DAG, Subtarget);
3885 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3886 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3887 }
3888 }
3889
3890 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3891 return Res;
3892
3893 // For constant vectors, use generic constant pool lowering. Otherwise,
3894 // we'd have to materialize constants in GPRs just to move them into the
3895 // vector.
3896 return SDValue();
3897}
3898
3900 const RISCVSubtarget &Subtarget) {
3901 MVT VT = Op.getSimpleValueType();
3902 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3903
3904 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3906 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3907
3908 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3909
3910 SDLoc DL(Op);
3911 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3912
3913 MVT XLenVT = Subtarget.getXLenVT();
3914
3915 if (VT.getVectorElementType() == MVT::i1) {
3916 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3917 // vector type, we have a legal equivalently-sized i8 type, so we can use
3918 // that.
3919 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3920 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3921
3922 SDValue WideVec;
3923 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3924 // For a splat, perform a scalar truncate before creating the wider
3925 // vector.
3926 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3927 DAG.getConstant(1, DL, Splat.getValueType()));
3928 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3929 } else {
3930 SmallVector<SDValue, 8> Ops(Op->op_values());
3931 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3932 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3933 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3934 }
3935
3936 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3937 }
3938
3939 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3940 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3941 return Gather;
3942 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3944 if (!VT.isFloatingPoint())
3945 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3946 Splat =
3947 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3948 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3949 }
3950
3951 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3952 return Res;
3953
3954 // If we're compiling for an exact VLEN value, we can split our work per
3955 // register in the register group.
3956 if (const auto VLen = Subtarget.getRealVLen();
3957 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3958 MVT ElemVT = VT.getVectorElementType();
3959 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3960 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3961 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3962 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3963 assert(M1VT == getLMUL1VT(M1VT));
3964
3965 // The following semantically builds up a fixed length concat_vector
3966 // of the component build_vectors. We eagerly lower to scalable and
3967 // insert_subvector here to avoid DAG combining it back to a large
3968 // build_vector.
3969 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3970 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3971 SDValue Vec = DAG.getUNDEF(ContainerVT);
3972 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3973 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3974 SDValue SubBV =
3975 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3976 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3977 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3978 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3979 DAG.getVectorIdxConstant(InsertIdx, DL));
3980 }
3981 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3982 }
3983
3984 // For m1 vectors, if we have non-undef values in both halves of our vector,
3985 // split the vector into low and high halves, build them separately, then
3986 // use a vselect to combine them. For long vectors, this cuts the critical
3987 // path of the vslide1down sequence in half, and gives us an opportunity
3988 // to special case each half independently. Note that we don't change the
3989 // length of the sub-vectors here, so if both fallback to the generic
3990 // vslide1down path, we should be able to fold the vselect into the final
3991 // vslidedown (for the undef tail) for the first half w/ masking.
3992 unsigned NumElts = VT.getVectorNumElements();
3993 unsigned NumUndefElts =
3994 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3995 unsigned NumDefElts = NumElts - NumUndefElts;
3996 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3997 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3998 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3999 SmallVector<SDValue> MaskVals;
4000 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4001 SubVecAOps.reserve(NumElts);
4002 SubVecBOps.reserve(NumElts);
4003 for (unsigned i = 0; i < NumElts; i++) {
4004 SDValue Elem = Op->getOperand(i);
4005 if (i < NumElts / 2) {
4006 SubVecAOps.push_back(Elem);
4007 SubVecBOps.push_back(UndefElem);
4008 } else {
4009 SubVecAOps.push_back(UndefElem);
4010 SubVecBOps.push_back(Elem);
4011 }
4012 bool SelectMaskVal = (i < NumElts / 2);
4013 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4014 }
4015 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4016 MaskVals.size() == NumElts);
4017
4018 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4019 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4020 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4021 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4022 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4023 }
4024
4025 // Cap the cost at a value linear to the number of elements in the vector.
4026 // The default lowering is to use the stack. The vector store + scalar loads
4027 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4028 // being (at least) linear in LMUL. As a result, using the vslidedown
4029 // lowering for every element ends up being VL*LMUL..
4030 // TODO: Should we be directly costing the stack alternative? Doing so might
4031 // give us a more accurate upper bound.
4032 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4033
4034 // TODO: unify with TTI getSlideCost.
4035 InstructionCost PerSlideCost = 1;
4036 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4037 default: break;
4039 PerSlideCost = 2;
4040 break;
4042 PerSlideCost = 4;
4043 break;
4045 PerSlideCost = 8;
4046 break;
4047 }
4048
4049 // TODO: Should we be using the build instseq then cost + evaluate scheme
4050 // we use for integer constants here?
4051 unsigned UndefCount = 0;
4052 for (const SDValue &V : Op->ops()) {
4053 if (V.isUndef()) {
4054 UndefCount++;
4055 continue;
4056 }
4057 if (UndefCount) {
4058 LinearBudget -= PerSlideCost;
4059 UndefCount = 0;
4060 }
4061 LinearBudget -= PerSlideCost;
4062 }
4063 if (UndefCount) {
4064 LinearBudget -= PerSlideCost;
4065 }
4066
4067 if (LinearBudget < 0)
4068 return SDValue();
4069
4070 assert((!VT.isFloatingPoint() ||
4071 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4072 "Illegal type which will result in reserved encoding");
4073
4074 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4075
4076 SDValue Vec;
4077 UndefCount = 0;
4078 for (SDValue V : Op->ops()) {
4079 if (V.isUndef()) {
4080 UndefCount++;
4081 continue;
4082 }
4083
4084 // Start our sequence with a TA splat in the hopes that hardware is able to
4085 // recognize there's no dependency on the prior value of our temporary
4086 // register.
4087 if (!Vec) {
4088 Vec = DAG.getSplatVector(VT, DL, V);
4089 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4090 UndefCount = 0;
4091 continue;
4092 }
4093
4094 if (UndefCount) {
4095 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4096 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4097 Vec, Offset, Mask, VL, Policy);
4098 UndefCount = 0;
4099 }
4100 auto OpCode =
4102 if (!VT.isFloatingPoint())
4103 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4104 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4105 V, Mask, VL);
4106 }
4107 if (UndefCount) {
4108 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4109 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4110 Vec, Offset, Mask, VL, Policy);
4111 }
4112 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4113}
4114
4115static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4117 SelectionDAG &DAG) {
4118 if (!Passthru)
4119 Passthru = DAG.getUNDEF(VT);
4120 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4121 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4122 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4123 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4124 // node in order to try and match RVV vector/scalar instructions.
4125 if ((LoC >> 31) == HiC)
4126 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4127
4128 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4129 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4130 // vlmax vsetvli or vsetivli to change the VL.
4131 // FIXME: Support larger constants?
4132 // FIXME: Support non-constant VLs by saturating?
4133 if (LoC == HiC) {
4134 SDValue NewVL;
4135 if (isAllOnesConstant(VL) ||
4136 (isa<RegisterSDNode>(VL) &&
4137 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4138 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4139 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4140 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4141
4142 if (NewVL) {
4143 MVT InterVT =
4144 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4145 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4146 DAG.getUNDEF(InterVT), Lo, NewVL);
4147 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4148 }
4149 }
4150 }
4151
4152 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4153 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4154 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4155 Hi.getConstantOperandVal(1) == 31)
4156 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4157
4158 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4159 // even if it might be sign extended.
4160 if (Hi.isUndef())
4161 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4162
4163 // Fall back to a stack store and stride x0 vector load.
4164 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4165 Hi, VL);
4166}
4167
4168// Called by type legalization to handle splat of i64 on RV32.
4169// FIXME: We can optimize this when the type has sign or zero bits in one
4170// of the halves.
4171static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4172 SDValue Scalar, SDValue VL,
4173 SelectionDAG &DAG) {
4174 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4175 SDValue Lo, Hi;
4176 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4177 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4178}
4179
4180// This function lowers a splat of a scalar operand Splat with the vector
4181// length VL. It ensures the final sequence is type legal, which is useful when
4182// lowering a splat after type legalization.
4183static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4184 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4185 const RISCVSubtarget &Subtarget) {
4186 bool HasPassthru = Passthru && !Passthru.isUndef();
4187 if (!HasPassthru && !Passthru)
4188 Passthru = DAG.getUNDEF(VT);
4189 if (VT.isFloatingPoint())
4190 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4191
4192 MVT XLenVT = Subtarget.getXLenVT();
4193
4194 // Simplest case is that the operand needs to be promoted to XLenVT.
4195 if (Scalar.getValueType().bitsLE(XLenVT)) {
4196 // If the operand is a constant, sign extend to increase our chances
4197 // of being able to use a .vi instruction. ANY_EXTEND would become a
4198 // a zero extend and the simm5 check in isel would fail.
4199 // FIXME: Should we ignore the upper bits in isel instead?
4200 unsigned ExtOpc =
4201 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4202 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4203 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4204 }
4205
4206 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4207 "Unexpected scalar for splat lowering!");
4208
4209 if (isOneConstant(VL) && isNullConstant(Scalar))
4210 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4211 DAG.getConstant(0, DL, XLenVT), VL);
4212
4213 // Otherwise use the more complicated splatting algorithm.
4214 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4215}
4216
4217// This function lowers an insert of a scalar operand Scalar into lane
4218// 0 of the vector regardless of the value of VL. The contents of the
4219// remaining lanes of the result vector are unspecified. VL is assumed
4220// to be non-zero.
4222 const SDLoc &DL, SelectionDAG &DAG,
4223 const RISCVSubtarget &Subtarget) {
4224 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4225
4226 const MVT XLenVT = Subtarget.getXLenVT();
4227 SDValue Passthru = DAG.getUNDEF(VT);
4228
4229 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4230 isNullConstant(Scalar.getOperand(1))) {
4231 SDValue ExtractedVal = Scalar.getOperand(0);
4232 // The element types must be the same.
4233 if (ExtractedVal.getValueType().getVectorElementType() ==
4234 VT.getVectorElementType()) {
4235 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4236 MVT ExtractedContainerVT = ExtractedVT;
4237 if (ExtractedContainerVT.isFixedLengthVector()) {
4238 ExtractedContainerVT = getContainerForFixedLengthVector(
4239 DAG, ExtractedContainerVT, Subtarget);
4240 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4241 ExtractedVal, DAG, Subtarget);
4242 }
4243 if (ExtractedContainerVT.bitsLE(VT))
4244 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4245 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4246 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4247 DAG.getVectorIdxConstant(0, DL));
4248 }
4249 }
4250
4251
4252 if (VT.isFloatingPoint())
4253 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4254 DAG.getUNDEF(VT), Scalar, VL);
4255
4256 // Avoid the tricky legalization cases by falling back to using the
4257 // splat code which already handles it gracefully.
4258 if (!Scalar.getValueType().bitsLE(XLenVT))
4259 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4260 DAG.getConstant(1, DL, XLenVT),
4261 VT, DL, DAG, Subtarget);
4262
4263 // If the operand is a constant, sign extend to increase our chances
4264 // of being able to use a .vi instruction. ANY_EXTEND would become a
4265 // a zero extend and the simm5 check in isel would fail.
4266 // FIXME: Should we ignore the upper bits in isel instead?
4267 unsigned ExtOpc =
4268 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4269 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4270 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4271 DAG.getUNDEF(VT), Scalar, VL);
4272}
4273
4274// Is this a shuffle extracts either the even or odd elements of a vector?
4275// That is, specifically, either (a) or (b) below.
4276// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4277// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4278// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4279// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4280// Returns {Src Vector, Even Elements} om success
4281static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4282 SDValue V2, ArrayRef<int> Mask,
4283 const RISCVSubtarget &Subtarget) {
4284 // Need to be able to widen the vector.
4285 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4286 return false;
4287
4288 // Both input must be extracts.
4289 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4290 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4291 return false;
4292
4293 // Extracting from the same source.
4294 SDValue Src = V1.getOperand(0);
4295 if (Src != V2.getOperand(0))
4296 return false;
4297
4298 // Src needs to have twice the number of elements.
4299 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4300 return false;
4301
4302 // The extracts must extract the two halves of the source.
4303 if (V1.getConstantOperandVal(1) != 0 ||
4304 V2.getConstantOperandVal(1) != Mask.size())
4305 return false;
4306
4307 // First index must be the first even or odd element from V1.
4308 if (Mask[0] != 0 && Mask[0] != 1)
4309 return false;
4310
4311 // The others must increase by 2 each time.
4312 // TODO: Support undef elements?
4313 for (unsigned i = 1; i != Mask.size(); ++i)
4314 if (Mask[i] != Mask[i - 1] + 2)
4315 return false;
4316
4317 return true;
4318}
4319
4320/// Is this shuffle interleaving contiguous elements from one vector into the
4321/// even elements and contiguous elements from another vector into the odd
4322/// elements. \p EvenSrc will contain the element that should be in the first
4323/// even element. \p OddSrc will contain the element that should be in the first
4324/// odd element. These can be the first element in a source or the element half
4325/// way through the source.
4326static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4327 int &OddSrc, const RISCVSubtarget &Subtarget) {
4328 // We need to be able to widen elements to the next larger integer type.
4329 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4330 return false;
4331
4332 int Size = Mask.size();
4333 int NumElts = VT.getVectorNumElements();
4334 assert(Size == (int)NumElts && "Unexpected mask size");
4335
4336 SmallVector<unsigned, 2> StartIndexes;
4337 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4338 return false;
4339
4340 EvenSrc = StartIndexes[0];
4341 OddSrc = StartIndexes[1];
4342
4343 // One source should be low half of first vector.
4344 if (EvenSrc != 0 && OddSrc != 0)
4345 return false;
4346
4347 // Subvectors will be subtracted from either at the start of the two input
4348 // vectors, or at the start and middle of the first vector if it's an unary
4349 // interleave.
4350 // In both cases, HalfNumElts will be extracted.
4351 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4352 // we'll create an illegal extract_subvector.
4353 // FIXME: We could support other values using a slidedown first.
4354 int HalfNumElts = NumElts / 2;
4355 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4356}
4357
4358/// Match shuffles that concatenate two vectors, rotate the concatenation,
4359/// and then extract the original number of elements from the rotated result.
4360/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4361/// returned rotation amount is for a rotate right, where elements move from
4362/// higher elements to lower elements. \p LoSrc indicates the first source
4363/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4364/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4365/// 0 or 1 if a rotation is found.
4366///
4367/// NOTE: We talk about rotate to the right which matches how bit shift and
4368/// rotate instructions are described where LSBs are on the right, but LLVM IR
4369/// and the table below write vectors with the lowest elements on the left.
4370static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4371 int Size = Mask.size();
4372
4373 // We need to detect various ways of spelling a rotation:
4374 // [11, 12, 13, 14, 15, 0, 1, 2]
4375 // [-1, 12, 13, 14, -1, -1, 1, -1]
4376 // [-1, -1, -1, -1, -1, -1, 1, 2]
4377 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4378 // [-1, 4, 5, 6, -1, -1, 9, -1]
4379 // [-1, 4, 5, 6, -1, -1, -1, -1]
4380 int Rotation = 0;
4381 LoSrc = -1;
4382 HiSrc = -1;
4383 for (int i = 0; i != Size; ++i) {
4384 int M = Mask[i];
4385 if (M < 0)
4386 continue;
4387
4388 // Determine where a rotate vector would have started.
4389 int StartIdx = i - (M % Size);
4390 // The identity rotation isn't interesting, stop.
4391 if (StartIdx == 0)
4392 return -1;
4393
4394 // If we found the tail of a vector the rotation must be the missing
4395 // front. If we found the head of a vector, it must be how much of the
4396 // head.
4397 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4398
4399 if (Rotation == 0)
4400 Rotation = CandidateRotation;
4401 else if (Rotation != CandidateRotation)
4402 // The rotations don't match, so we can't match this mask.
4403 return -1;
4404
4405 // Compute which value this mask is pointing at.
4406 int MaskSrc = M < Size ? 0 : 1;
4407
4408 // Compute which of the two target values this index should be assigned to.
4409 // This reflects whether the high elements are remaining or the low elemnts
4410 // are remaining.
4411 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4412
4413 // Either set up this value if we've not encountered it before, or check
4414 // that it remains consistent.
4415 if (TargetSrc < 0)
4416 TargetSrc = MaskSrc;
4417 else if (TargetSrc != MaskSrc)
4418 // This may be a rotation, but it pulls from the inputs in some
4419 // unsupported interleaving.
4420 return -1;
4421 }
4422
4423 // Check that we successfully analyzed the mask, and normalize the results.
4424 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4425 assert((LoSrc >= 0 || HiSrc >= 0) &&
4426 "Failed to find a rotated input vector!");
4427
4428 return Rotation;
4429}
4430
4431// Lower a deinterleave shuffle to vnsrl.
4432// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4433// -> [p, q, r, s] (EvenElts == false)
4434// VT is the type of the vector to return, <[vscale x ]n x ty>
4435// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4437 bool EvenElts,
4438 const RISCVSubtarget &Subtarget,
4439 SelectionDAG &DAG) {
4440 // The result is a vector of type <m x n x ty>
4441 MVT ContainerVT = VT;
4442 // Convert fixed vectors to scalable if needed
4443 if (ContainerVT.isFixedLengthVector()) {
4444 assert(Src.getSimpleValueType().isFixedLengthVector());
4445 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4446
4447 // The source is a vector of type <m x n*2 x ty>
4448 MVT SrcContainerVT =
4450 ContainerVT.getVectorElementCount() * 2);
4451 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4452 }
4453
4454 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4455
4456 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4457 // This also converts FP to int.
4458 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4459 MVT WideSrcContainerVT = MVT::getVectorVT(
4460 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4461 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4462
4463 // The integer version of the container type.
4464 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4465
4466 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4467 // the original element size.
4468 unsigned Shift = EvenElts ? 0 : EltBits;
4469 SDValue SplatShift = DAG.getNode(
4470 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4471 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4472 SDValue Res =
4473 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4474 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4475 // Cast back to FP if needed.
4476 Res = DAG.getBitcast(ContainerVT, Res);
4477
4478 if (VT.isFixedLengthVector())
4479 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4480 return Res;
4481}
4482
4483// Lower the following shuffle to vslidedown.
4484// a)
4485// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4486// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4487// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4488// b)
4489// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4490// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4491// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4492// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4493// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4494// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4496 SDValue V1, SDValue V2,
4497 ArrayRef<int> Mask,
4498 const RISCVSubtarget &Subtarget,
4499 SelectionDAG &DAG) {
4500 auto findNonEXTRACT_SUBVECTORParent =
4501 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4502 uint64_t Offset = 0;
4503 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4504 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4505 // a scalable vector. But we don't want to match the case.
4506 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4507 Offset += Parent.getConstantOperandVal(1);
4508 Parent = Parent.getOperand(0);
4509 }
4510 return std::make_pair(Parent, Offset);
4511 };
4512
4513 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4514 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4515
4516 // Extracting from the same source.
4517 SDValue Src = V1Src;
4518 if (Src != V2Src)
4519 return SDValue();
4520
4521 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4522 SmallVector<int, 16> NewMask(Mask);
4523 for (size_t i = 0; i != NewMask.size(); ++i) {
4524 if (NewMask[i] == -1)
4525 continue;
4526
4527 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4528 NewMask[i] = NewMask[i] + V1IndexOffset;
4529 } else {
4530 // Minus NewMask.size() is needed. Otherwise, the b case would be
4531 // <5,6,7,12> instead of <5,6,7,8>.
4532 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4533 }
4534 }
4535
4536 // First index must be known and non-zero. It will be used as the slidedown
4537 // amount.
4538 if (NewMask[0] <= 0)
4539 return SDValue();
4540
4541 // NewMask is also continuous.
4542 for (unsigned i = 1; i != NewMask.size(); ++i)
4543 if (NewMask[i - 1] + 1 != NewMask[i])
4544 return SDValue();
4545
4546 MVT XLenVT = Subtarget.getXLenVT();
4547 MVT SrcVT = Src.getSimpleValueType();
4548 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4549 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4550 SDValue Slidedown =
4551 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4552 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4553 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4554 return DAG.getNode(
4556 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4557 DAG.getConstant(0, DL, XLenVT));
4558}
4559
4560// Because vslideup leaves the destination elements at the start intact, we can
4561// use it to perform shuffles that insert subvectors:
4562//
4563// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4564// ->
4565// vsetvli zero, 8, e8, mf2, ta, ma
4566// vslideup.vi v8, v9, 4
4567//
4568// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4569// ->
4570// vsetvli zero, 5, e8, mf2, tu, ma
4571// vslideup.v1 v8, v9, 2
4573 SDValue V1, SDValue V2,
4574 ArrayRef<int> Mask,
4575 const RISCVSubtarget &Subtarget,
4576 SelectionDAG &DAG) {
4577 unsigned NumElts = VT.getVectorNumElements();
4578 int NumSubElts, Index;
4579 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4580 Index))
4581 return SDValue();
4582
4583 bool OpsSwapped = Mask[Index] < (int)NumElts;
4584 SDValue InPlace = OpsSwapped ? V2 : V1;
4585 SDValue ToInsert = OpsSwapped ? V1 : V2;
4586
4587 MVT XLenVT = Subtarget.getXLenVT();
4588 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4589 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4590 // We slide up by the index that the subvector is being inserted at, and set
4591 // VL to the index + the number of elements being inserted.
4593 // If the we're adding a suffix to the in place vector, i.e. inserting right
4594 // up to the very end of it, then we don't actually care about the tail.
4595 if (NumSubElts + Index >= (int)NumElts)
4596 Policy |= RISCVII::TAIL_AGNOSTIC;
4597
4598 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4599 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4600 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4601
4602 SDValue Res;
4603 // If we're inserting into the lowest elements, use a tail undisturbed
4604 // vmv.v.v.
4605 if (Index == 0)
4606 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4607 VL);
4608 else
4609 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4610 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4611 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4612}
4613
4614/// Match v(f)slide1up/down idioms. These operations involve sliding
4615/// N-1 elements to make room for an inserted scalar at one end.
4617 SDValue V1, SDValue V2,
4618 ArrayRef<int> Mask,
4619 const RISCVSubtarget &Subtarget,
4620 SelectionDAG &DAG) {
4621 bool OpsSwapped = false;
4622 if (!isa<BuildVectorSDNode>(V1)) {
4623 if (!isa<BuildVectorSDNode>(V2))
4624 return SDValue();
4625 std::swap(V1, V2);
4626 OpsSwapped = true;
4627 }
4628 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4629 if (!Splat)
4630 return SDValue();
4631
4632 // Return true if the mask could describe a slide of Mask.size() - 1
4633 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4634 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4635 const unsigned S = (Offset > 0) ? 0 : -Offset;
4636 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4637 for (unsigned i = S; i != E; ++i)
4638 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4639 return false;
4640 return true;
4641 };
4642
4643 const unsigned NumElts = VT.getVectorNumElements();
4644 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4645 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4646 return SDValue();
4647
4648 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4649 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4650 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4651 return SDValue();
4652
4653 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4654 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4655 auto OpCode = IsVSlidedown ?
4658 if (!VT.isFloatingPoint())
4659 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4660 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4661 DAG.getUNDEF(ContainerVT),
4662 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4663 Splat, TrueMask, VL);
4664 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4665}
4666
4667// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4668// to create an interleaved vector of <[vscale x] n*2 x ty>.
4669// This requires that the size of ty is less than the subtarget's maximum ELEN.
4671 const SDLoc &DL, SelectionDAG &DAG,
4672 const RISCVSubtarget &Subtarget) {
4673 MVT VecVT = EvenV.getSimpleValueType();
4674 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4675 // Convert fixed vectors to scalable if needed
4676 if (VecContainerVT.isFixedLengthVector()) {
4677 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4678 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4679 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4680 }
4681
4682 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4683
4684 // We're working with a vector of the same size as the resulting
4685 // interleaved vector, but with half the number of elements and
4686 // twice the SEW (Hence the restriction on not using the maximum
4687 // ELEN)
4688 MVT WideVT =
4690 VecVT.getVectorElementCount());
4691 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4692 if (WideContainerVT.isFixedLengthVector())
4693 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4694
4695 // Bitcast the input vectors to integers in case they are FP
4696 VecContainerVT = VecContainerVT.changeTypeToInteger();
4697 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4698 OddV = DAG.getBitcast(VecContainerVT, OddV);
4699
4700 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4701 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4702
4703 SDValue Interleaved;
4704 if (OddV.isUndef()) {
4705 // If OddV is undef, this is a zero extend.
4706 // FIXME: Not only does this optimize the code, it fixes some correctness
4707 // issues because MIR does not have freeze.
4708 Interleaved =
4709 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4710 } else if (Subtarget.hasStdExtZvbb()) {
4711 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4712 SDValue OffsetVec =
4713 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4714 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4715 OffsetVec, Passthru, Mask, VL);
4716 if (!EvenV.isUndef())
4717 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4718 Interleaved, EvenV, Passthru, Mask, VL);
4719 } else if (EvenV.isUndef()) {
4720 Interleaved =
4721 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4722
4723 SDValue OffsetVec =
4724 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4725 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4726 Interleaved, OffsetVec, Passthru, Mask, VL);
4727 } else {
4728 // FIXME: We should freeze the odd vector here. We already handled the case
4729 // of provably undef/poison above.
4730
4731 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4732 // vwaddu.vv
4733 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4734 OddV, Passthru, Mask, VL);
4735
4736 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4737 SDValue AllOnesVec = DAG.getSplatVector(
4738 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4739 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4740 OddV, AllOnesVec, Passthru, Mask, VL);
4741
4742 // Add the two together so we get
4743 // (OddV * 0xff...ff) + (OddV + EvenV)
4744 // = (OddV * 0x100...00) + EvenV
4745 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4746 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4747 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4748 Interleaved, OddsMul, Passthru, Mask, VL);
4749 }
4750
4751 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4752 MVT ResultContainerVT = MVT::getVectorVT(
4753 VecVT.getVectorElementType(), // Make sure to use original type
4754 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4755 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4756
4757 // Convert back to a fixed vector if needed
4758 MVT ResultVT =
4761 if (ResultVT.isFixedLengthVector())
4762 Interleaved =
4763 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4764
4765 return Interleaved;
4766}
4767
4768// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4769// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4771 SelectionDAG &DAG,
4772 const RISCVSubtarget &Subtarget) {
4773 SDLoc DL(SVN);
4774 MVT VT = SVN->getSimpleValueType(0);
4775 SDValue V = SVN->getOperand(0);
4776 unsigned NumElts = VT.getVectorNumElements();
4777
4778 assert(VT.getVectorElementType() == MVT::i1);
4779
4781 SVN->getMask().size()) ||
4782 !SVN->getOperand(1).isUndef())
4783 return SDValue();
4784
4785 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4786 EVT ViaVT = EVT::getVectorVT(
4787 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4788 EVT ViaBitVT =
4789 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4790
4791 // If we don't have zvbb or the larger element type > ELEN, the operation will
4792 // be illegal.
4794 ViaVT) ||
4795 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4796 return SDValue();
4797
4798 // If the bit vector doesn't fit exactly into the larger element type, we need
4799 // to insert it into the larger vector and then shift up the reversed bits
4800 // afterwards to get rid of the gap introduced.
4801 if (ViaEltSize > NumElts)
4802 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4803 V, DAG.getVectorIdxConstant(0, DL));
4804
4805 SDValue Res =
4806 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4807
4808 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4809 // element type.
4810 if (ViaEltSize > NumElts)
4811 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4812 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4813
4814 Res = DAG.getBitcast(ViaBitVT, Res);
4815
4816 if (ViaEltSize > NumElts)
4817 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4818 DAG.getVectorIdxConstant(0, DL));
4819 return Res;
4820}
4821
4823 SelectionDAG &DAG,
4824 const RISCVSubtarget &Subtarget,
4825 MVT &RotateVT, unsigned &RotateAmt) {
4826 SDLoc DL(SVN);
4827
4828 EVT VT = SVN->getValueType(0);
4829 unsigned NumElts = VT.getVectorNumElements();
4830 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4831 unsigned NumSubElts;
4832 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4833 NumElts, NumSubElts, RotateAmt))
4834 return false;
4835 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4836 NumElts / NumSubElts);
4837
4838 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4839 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4840}
4841
4842// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4843// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4844// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4846 SelectionDAG &DAG,
4847 const RISCVSubtarget &Subtarget) {
4848 SDLoc DL(SVN);
4849
4850 EVT VT = SVN->getValueType(0);
4851 unsigned RotateAmt;
4852 MVT RotateVT;
4853 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4854 return SDValue();
4855
4856 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4857
4858 SDValue Rotate;
4859 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4860 // so canonicalize to vrev8.
4861 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4862 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4863 else
4864 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4865 DAG.getConstant(RotateAmt, DL, RotateVT));
4866
4867 return DAG.getBitcast(VT, Rotate);
4868}
4869
4870// If compiling with an exactly known VLEN, see if we can split a
4871// shuffle on m2 or larger into a small number of m1 sized shuffles
4872// which write each destination registers exactly once.
4874 SelectionDAG &DAG,
4875 const RISCVSubtarget &Subtarget) {
4876 SDLoc DL(SVN);
4877 MVT VT = SVN->getSimpleValueType(0);
4878 SDValue V1 = SVN->getOperand(0);
4879 SDValue V2 = SVN->getOperand(1);
4880 ArrayRef<int> Mask = SVN->getMask();
4881 unsigned NumElts = VT.getVectorNumElements();
4882
4883 // If we don't know exact data layout, not much we can do. If this
4884 // is already m1 or smaller, no point in splitting further.
4885 const auto VLen = Subtarget.getRealVLen();
4886 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4887 return SDValue();
4888
4889 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4890 // expansion for.
4891 unsigned RotateAmt;
4892 MVT RotateVT;
4893 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4894 return SDValue();
4895
4896 MVT ElemVT = VT.getVectorElementType();
4897 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4898 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4899
4901 OutMasks(VRegsPerSrc, {-1, {}});
4902
4903 // Check if our mask can be done as a 1-to-1 mapping from source
4904 // to destination registers in the group without needing to
4905 // write each destination more than once.
4906 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4907 int DstVecIdx = DstIdx / ElemsPerVReg;
4908 int DstSubIdx = DstIdx % ElemsPerVReg;
4909 int SrcIdx = Mask[DstIdx];
4910 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4911 continue;
4912 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4913 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4914 if (OutMasks[DstVecIdx].first == -1)
4915 OutMasks[DstVecIdx].first = SrcVecIdx;
4916 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4917 // Note: This case could easily be handled by keeping track of a chain
4918 // of source values and generating two element shuffles below. This is
4919 // less an implementation question, and more a profitability one.
4920 return SDValue();
4921
4922 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4923 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4924 }
4925
4926 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4927 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4928 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4929 assert(M1VT == getLMUL1VT(M1VT));
4930 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4931 SDValue Vec = DAG.getUNDEF(ContainerVT);
4932 // The following semantically builds up a fixed length concat_vector
4933 // of the component shuffle_vectors. We eagerly lower to scalable here
4934 // to avoid DAG combining it back to a large shuffle_vector again.
4935 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4936 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4937 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4938 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4939 if (SrcVecIdx == -1)
4940 continue;
4941 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4942 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4943 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4944 DAG.getVectorIdxConstant(ExtractIdx, DL));
4945 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4946 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4947 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4948 unsigned InsertIdx = DstVecIdx * NumOpElts;
4949 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4950 DAG.getVectorIdxConstant(InsertIdx, DL));
4951 }
4952 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4953}
4954
4956 const RISCVSubtarget &Subtarget) {
4957 SDValue V1 = Op.getOperand(0);
4958 SDValue V2 = Op.getOperand(1);
4959 SDLoc DL(Op);
4960 MVT XLenVT = Subtarget.getXLenVT();
4961 MVT VT = Op.getSimpleValueType();
4962 unsigned NumElts = VT.getVectorNumElements();
4963 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4964
4965 if (VT.getVectorElementType() == MVT::i1) {
4966 // Lower to a vror.vi of a larger element type if possible before we promote
4967 // i1s to i8s.
4968 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4969 return V;
4970 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4971 return V;
4972
4973 // Promote i1 shuffle to i8 shuffle.
4974 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4975 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4976 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4977 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4978 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4979 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4980 ISD::SETNE);
4981 }
4982
4983 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4984
4985 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4986
4987 if (SVN->isSplat()) {
4988 const int Lane = SVN->getSplatIndex();
4989 if (Lane >= 0) {
4990 MVT SVT = VT.getVectorElementType();
4991
4992 // Turn splatted vector load into a strided load with an X0 stride.
4993 SDValue V = V1;
4994 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4995 // with undef.
4996 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4997 int Offset = Lane;
4998 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4999 int OpElements =
5000 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5001 V = V.getOperand(Offset / OpElements);
5002 Offset %= OpElements;
5003 }
5004
5005 // We need to ensure the load isn't atomic or volatile.
5006 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5007 auto *Ld = cast<LoadSDNode>(V);
5008 Offset *= SVT.getStoreSize();
5009 SDValue NewAddr = DAG.getMemBasePlusOffset(
5010 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5011
5012 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5013 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5014 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5015 SDValue IntID =
5016 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5017 SDValue Ops[] = {Ld->getChain(),
5018 IntID,
5019 DAG.getUNDEF(ContainerVT),
5020 NewAddr,
5021 DAG.getRegister(RISCV::X0, XLenVT),
5022 VL};
5023 SDValue NewLoad = DAG.getMemIntrinsicNode(
5024 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5026 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5027 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5028 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5029 }
5030
5031 // Otherwise use a scalar load and splat. This will give the best
5032 // opportunity to fold a splat into the operation. ISel can turn it into
5033 // the x0 strided load if we aren't able to fold away the select.
5034 if (SVT.isFloatingPoint())
5035 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5036 Ld->getPointerInfo().getWithOffset(Offset),
5037 Ld->getOriginalAlign(),
5038 Ld->getMemOperand()->getFlags());
5039 else
5040 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5041 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5042 Ld->getOriginalAlign(),
5043 Ld->getMemOperand()->getFlags());
5045
5046 unsigned Opc =
5048 SDValue Splat =
5049 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
5050 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5051 }
5052
5053 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5054 assert(Lane < (int)NumElts && "Unexpected lane!");
5055 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5056 V1, DAG.getConstant(Lane, DL, XLenVT),
5057 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5058 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5059 }
5060 }
5061
5062 // For exact VLEN m2 or greater, try to split to m1 operations if we
5063 // can split cleanly.
5064 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5065 return V;
5066
5067 ArrayRef<int> Mask = SVN->getMask();
5068
5069 if (SDValue V =
5070 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5071 return V;
5072
5073 if (SDValue V =
5074 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5075 return V;
5076
5077 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5078 // available.
5079 if (Subtarget.hasStdExtZvkb())
5080 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5081 return V;
5082
5083 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5084 // be undef which can be handled with a single SLIDEDOWN/UP.
5085 int LoSrc, HiSrc;
5086 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5087 if (Rotation > 0) {
5088 SDValue LoV, HiV;
5089 if (LoSrc >= 0) {
5090 LoV = LoSrc == 0 ? V1 : V2;
5091 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5092 }
5093 if (HiSrc >= 0) {
5094 HiV = HiSrc == 0 ? V1 : V2;
5095 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5096 }
5097
5098 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5099 // to slide LoV up by (NumElts - Rotation).
5100 unsigned InvRotate = NumElts - Rotation;
5101
5102 SDValue Res = DAG.getUNDEF(ContainerVT);
5103 if (HiV) {
5104 // Even though we could use a smaller VL, don't to avoid a vsetivli
5105 // toggle.
5106 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5107 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5108 }
5109 if (LoV)
5110 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5111 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5113
5114 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5115 }
5116
5117 // If this is a deinterleave and we can widen the vector, then we can use
5118 // vnsrl to deinterleave.
5119 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5120 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5121 Subtarget, DAG);
5122 }
5123
5124 if (SDValue V =
5125 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5126 return V;
5127
5128 // Detect an interleave shuffle and lower to
5129 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5130 int EvenSrc, OddSrc;
5131 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5132 // Extract the halves of the vectors.
5133 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5134
5135 int Size = Mask.size();
5136 SDValue EvenV, OddV;
5137 assert(EvenSrc >= 0 && "Undef source?");
5138 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5139 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5140 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5141
5142 assert(OddSrc >= 0 && "Undef source?");
5143 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5144 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5145 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5146
5147 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5148 }
5149
5150
5151 // Handle any remaining single source shuffles
5152 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5153 if (V2.isUndef()) {
5154 // We might be able to express the shuffle as a bitrotate. But even if we
5155 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5156 // shifts and a vor will have a higher throughput than a vrgather.
5157 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5158 return V;
5159
5160 if (VT.getScalarSizeInBits() == 8 &&
5161 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5162 // On such a vector we're unable to use i8 as the index type.
5163 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5164 // may involve vector splitting if we're already at LMUL=8, or our
5165 // user-supplied maximum fixed-length LMUL.
5166 return SDValue();
5167 }
5168
5169 // Base case for the two operand recursion below - handle the worst case
5170 // single source shuffle.
5171 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5172 MVT IndexVT = VT.changeTypeToInteger();
5173 // Since we can't introduce illegal index types at this stage, use i16 and
5174 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5175 // than XLenVT.
5176 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5177 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5178 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5179 }
5180
5181 // If the mask allows, we can do all the index computation in 16 bits. This
5182 // requires less work and less register pressure at high LMUL, and creates
5183 // smaller constants which may be cheaper to materialize.
5184 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5185 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5186 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5187 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5188 }
5189
5190 MVT IndexContainerVT =
5191 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5192
5193 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5194 SmallVector<SDValue> GatherIndicesLHS;
5195 for (int MaskIndex : Mask) {
5196 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5197 GatherIndicesLHS.push_back(IsLHSIndex
5198 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5199 : DAG.getUNDEF(XLenVT));
5200 }
5201 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5202 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5203 Subtarget);
5204 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5205 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5206 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5207 }
5208
5209 // By default we preserve the original operand order, and use a mask to
5210 // select LHS as true and RHS as false. However, since RVV vector selects may
5211 // feature splats but only on the LHS, we may choose to invert our mask and
5212 // instead select between RHS and LHS.
5213 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5214
5215 // Detect shuffles which can be re-expressed as vector selects; these are
5216 // shuffles in which each element in the destination is taken from an element
5217 // at the corresponding index in either source vectors.
5218 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5219 int MaskIndex = MaskIdx.value();
5220 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5221 });
5222 if (IsSelect) {
5223 // Now construct the mask that will be used by the vselect operation.
5224 SmallVector<SDValue> MaskVals;
5225 for (int MaskIndex : Mask) {
5226 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5227 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5228 }
5229
5230 if (SwapOps)
5231 std::swap(V1, V2);
5232
5233 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5234 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5235 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5236 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5237 }
5238
5239 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5240 // merged with a second vrgather.
5241 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5242 SmallVector<SDValue> MaskVals;
5243
5244 // Now construct the mask that will be used by the blended vrgather operation.
5245 // Cconstruct the appropriate indices into each vector.
5246 for (int MaskIndex : Mask) {
5247 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5248 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5249 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5250 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5251 ? MaskIndex : -1);
5252 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5253 }
5254
5255 if (SwapOps) {
5256 std::swap(V1, V2);
5257 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5258 }
5259
5260 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5261 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5262 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5263
5264 // Recursively invoke lowering for each operand if we had two
5265 // independent single source shuffles, and then combine the result via a
5266 // vselect. Note that the vselect will likely be folded back into the
5267 // second permute (vrgather, or other) by the post-isel combine.
5268 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5269 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5270 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5271}
5272
5274 // Support splats for any type. These should type legalize well.
5275 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5276 return true;
5277
5278 // Only support legal VTs for other shuffles for now.
5279 if (!isTypeLegal(VT))
5280 return false;
5281
5282 MVT SVT = VT.getSimpleVT();
5283
5284 // Not for i1 vectors.
5285 if (SVT.getScalarType() == MVT::i1)
5286 return false;
5287
5288 int Dummy1, Dummy2;
5289 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5290 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5291}
5292
5293// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5294// the exponent.
5295SDValue
5296RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5297 SelectionDAG &DAG) const {
5298 MVT VT = Op.getSimpleValueType();
5299 unsigned EltSize = VT.getScalarSizeInBits();
5300 SDValue Src = Op.getOperand(0);
5301 SDLoc DL(Op);
5302 MVT ContainerVT = VT;
5303
5304 SDValue Mask, VL;
5305 if (Op->isVPOpcode()) {
5306 Mask = Op.getOperand(1);
5307 if (VT.isFixedLengthVector())
5308 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5309 Subtarget);
5310 VL = Op.getOperand(2);
5311 }
5312
5313 // We choose FP type that can represent the value if possible. Otherwise, we
5314 // use rounding to zero conversion for correct exponent of the result.
5315 // TODO: Use f16 for i8 when possible?
5316 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5317 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5318 FloatEltVT = MVT::f32;
5319 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5320
5321 // Legal types should have been checked in the RISCVTargetLowering
5322 // constructor.
5323 // TODO: Splitting may make sense in some cases.
5324 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5325 "Expected legal float type!");
5326
5327 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5328 // The trailing zero count is equal to log2 of this single bit value.
5329 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5330 SDValue Neg = DAG.getNegative(Src, DL, VT);
5331 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5332 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5333 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5334 Src, Mask, VL);
5335 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5336 }
5337
5338 // We have a legal FP type, convert to it.
5339 SDValue FloatVal;
5340 if (FloatVT.bitsGT(VT)) {
5341 if (Op->isVPOpcode())
5342 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5343 else
5344 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5345 } else {
5346 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5347 if (VT.isFixedLengthVector()) {
5348 ContainerVT = getContainerForFixedLengthVector(VT);
5349 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5350 }
5351 if (!Op->isVPOpcode())
5352 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5353 SDValue RTZRM =
5355 MVT ContainerFloatVT =
5356 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5357 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5358 Src, Mask, RTZRM, VL);
5359 if (VT.isFixedLengthVector())
5360 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5361 }
5362 // Bitcast to integer and shift the exponent to the LSB.
5363 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5364 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5365 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5366
5367 SDValue Exp;
5368 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5369 if (Op->isVPOpcode()) {
5370 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5371 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5372 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5373 } else {
5374 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5375 DAG.getConstant(ShiftAmt, DL, IntVT));
5376 if (IntVT.bitsLT(VT))
5377 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5378 else if (IntVT.bitsGT(VT))
5379 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5380 }
5381
5382 // The exponent contains log2 of the value in biased form.
5383 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5384 // For trailing zeros, we just need to subtract the bias.
5385 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5386 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5387 DAG.getConstant(ExponentBias, DL, VT));
5388 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5389 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5390 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5391
5392 // For leading zeros, we need to remove the bias and convert from log2 to
5393 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5394 unsigned Adjust = ExponentBias + (EltSize - 1);
5395 SDValue Res;
5396 if (Op->isVPOpcode())
5397 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5398 Mask, VL);
5399 else
5400 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5401
5402 // The above result with zero input equals to Adjust which is greater than
5403 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5404 if (Op.getOpcode() == ISD::CTLZ)
5405 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5406 else if (Op.getOpcode() == ISD::VP_CTLZ)
5407 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5408 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5409 return Res;
5410}
5411
5412SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5413 SelectionDAG &DAG) const {
5414 SDLoc DL(Op);
5415 MVT XLenVT = Subtarget.getXLenVT();
5416 SDValue Source = Op->getOperand(0);
5417 MVT SrcVT = Source.getSimpleValueType();
5418 SDValue Mask = Op->getOperand(1);
5419 SDValue EVL = Op->getOperand(2);
5420
5421 if (SrcVT.isFixedLengthVector()) {
5422 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5423 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5424 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5425 Subtarget);
5426 SrcVT = ContainerVT;
5427 }
5428
5429 // Convert to boolean vector.
5430 if (SrcVT.getScalarType() != MVT::i1) {
5431 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5432 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5433 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5434 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5435 DAG.getUNDEF(SrcVT), Mask, EVL});
5436 }
5437
5438 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5439 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5440 // In this case, we can interpret poison as -1, so nothing to do further.
5441 return Res;
5442
5443 // Convert -1 to VL.
5444 SDValue SetCC =
5445 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5446 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5447 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5448}
5449
5450// While RVV has alignment restrictions, we should always be able to load as a
5451// legal equivalently-sized byte-typed vector instead. This method is
5452// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5453// the load is already correctly-aligned, it returns SDValue().
5454SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5455 SelectionDAG &DAG) const {
5456 auto *Load = cast<LoadSDNode>(Op);
5457 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5458
5460 Load->getMemoryVT(),
5461 *Load->getMemOperand()))
5462 return SDValue();
5463
5464 SDLoc DL(Op);
5465 MVT VT = Op.getSimpleValueType();
5466 unsigned EltSizeBits = VT.getScalarSizeInBits();
5467 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5468 "Unexpected unaligned RVV load type");
5469 MVT NewVT =
5470 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5471 assert(NewVT.isValid() &&
5472 "Expecting equally-sized RVV vector types to be legal");
5473 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5474 Load->getPointerInfo(), Load->getOriginalAlign(),
5475 Load->getMemOperand()->getFlags());
5476 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5477}
5478
5479// While RVV has alignment restrictions, we should always be able to store as a
5480// legal equivalently-sized byte-typed vector instead. This method is
5481// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5482// returns SDValue() if the store is already correctly aligned.
5483SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5484 SelectionDAG &DAG) const {
5485 auto *Store = cast<StoreSDNode>(Op);
5486 assert(Store && Store->getValue().getValueType().isVector() &&
5487 "Expected vector store");
5488
5490 Store->getMemoryVT(),
5491 *Store->getMemOperand()))
5492 return SDValue();
5493
5494 SDLoc DL(Op);
5495 SDValue StoredVal = Store->getValue();
5496 MVT VT = StoredVal.getSimpleValueType();
5497 unsigned EltSizeBits = VT.getScalarSizeInBits();
5498 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5499 "Unexpected unaligned RVV store type");
5500 MVT NewVT =
5501 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5502 assert(NewVT.isValid() &&
5503 "Expecting equally-sized RVV vector types to be legal");
5504 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5505 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5506 Store->getPointerInfo(), Store->getOriginalAlign(),
5507 Store->getMemOperand()->getFlags());
5508}
5509
5511 const RISCVSubtarget &Subtarget) {
5512 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5513
5514 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5515
5516 // All simm32 constants should be handled by isel.
5517 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5518 // this check redundant, but small immediates are common so this check
5519 // should have better compile time.
5520 if (isInt<32>(Imm))
5521 return Op;
5522
5523 // We only need to cost the immediate, if constant pool lowering is enabled.
5524 if (!Subtarget.useConstantPoolForLargeInts())
5525 return Op;
5526
5528 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5529 return Op;
5530
5531 // Optimizations below are disabled for opt size. If we're optimizing for
5532 // size, use a constant pool.
5533 if (DAG.shouldOptForSize())
5534 return SDValue();
5535
5536 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5537 // that if it will avoid a constant pool.
5538 // It will require an extra temporary register though.
5539 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5540 // low and high 32 bits are the same and bit 31 and 63 are set.
5541 unsigned ShiftAmt, AddOpc;
5542 RISCVMatInt::InstSeq SeqLo =
5543 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5544 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5545 return Op;
5546
5547 return SDValue();
5548}
5549
5551 const RISCVSubtarget &Subtarget) {
5552 SDLoc dl(Op);
5553 AtomicOrdering FenceOrdering =
5554 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5555 SyncScope::ID FenceSSID =
5556 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5557
5558 if (Subtarget.hasStdExtZtso()) {
5559 // The only fence that needs an instruction is a sequentially-consistent
5560 // cross-thread fence.
5561 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5562 FenceSSID == SyncScope::System)
5563 return Op;
5564
5565 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5566 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5567 }
5568
5569 // singlethread fences only synchronize with signal handlers on the same
5570 // thread and thus only need to preserve instruction order, not actually
5571 // enforce memory ordering.
5572 if (FenceSSID == SyncScope::SingleThread)
5573 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5574 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5575
5576 return Op;
5577}
5578
5580 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5581 "Unexpected custom legalisation");
5582
5583 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5584 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5585 SDLoc DL(Op);
5586 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5587 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5588 SDValue Result =
5589 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5590
5591 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5592 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5593 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5594 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5595 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5596 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5597 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5598}
5599
5601 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5602 "Unexpected custom legalisation");
5603
5604 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5605 // sign extend allows overflow of the lower 32 bits to be detected on
5606 // the promoted size.
5607 SDLoc DL(Op);
5608 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5609 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5610 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5611 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5612}
5613
5614// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5616 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5617 "Unexpected custom legalisation");
5618 if (isa<ConstantSDNode>(Op.getOperand(1)))
5619 return SDValue();
5620
5621 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5622 SDLoc DL(Op);
5623 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5624 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5625 SDValue WideOp =
5626 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5627 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5628 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5629 DAG.getValueType(MVT::i32));
5630 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5631 ISD::SETNE);
5632 return DAG.getMergeValues({Res, Ovf}, DL);
5633}
5634
5635// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5637 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5638 "Unexpected custom legalisation");
5639 SDLoc DL(Op);
5640 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5641 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5642 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5643 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5644 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5645 DAG.getValueType(MVT::i32));
5646 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5647 ISD::SETNE);
5648 return DAG.getMergeValues({Res, Ovf}, DL);
5649}
5650
5651SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5652 SelectionDAG &DAG) const {
5653 SDLoc DL(Op);
5654 MVT VT = Op.getSimpleValueType();
5655 MVT XLenVT = Subtarget.getXLenVT();
5656 unsigned Check = Op.getConstantOperandVal(1);
5657 unsigned TDCMask = 0;
5658 if (Check & fcSNan)
5659 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5660 if (Check & fcQNan)
5661 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5662 if (Check & fcPosInf)
5664 if (Check & fcNegInf)
5666 if (Check & fcPosNormal)
5668 if (Check & fcNegNormal)
5670 if (Check & fcPosSubnormal)
5672 if (Check & fcNegSubnormal)
5674 if (Check & fcPosZero)
5675 TDCMask |= RISCV::FPMASK_Positive_Zero;
5676 if (Check & fcNegZero)
5677 TDCMask |= RISCV::FPMASK_Negative_Zero;
5678
5679 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5680
5681 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5682
5683 if (VT.isVector()) {
5684 SDValue Op0 = Op.getOperand(0);
5685 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5686
5687 if (VT.isScalableVector()) {
5689 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5690 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5691 Mask = Op.getOperand(2);
5692 VL = Op.getOperand(3);
5693 }
5694 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5695 VL, Op->getFlags());
5696 if (IsOneBitMask)
5697 return DAG.getSetCC(DL, VT, FPCLASS,
5698 DAG.getConstant(TDCMask, DL, DstVT),
5700 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5701 DAG.getConstant(TDCMask, DL, DstVT));
5702 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5703 ISD::SETNE);
5704 }
5705
5706 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5707 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5708 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5709 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5710 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5711 Mask = Op.getOperand(2);
5712 MVT MaskContainerVT =
5713 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5714 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5715 VL = Op.getOperand(3);
5716 }
5717 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5718
5719 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5720 Mask, VL, Op->getFlags());
5721
5722 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5723 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5724 if (IsOneBitMask) {
5725 SDValue VMSEQ =
5726 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5727 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5728 DAG.getUNDEF(ContainerVT), Mask, VL});
5729 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5730 }
5731 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5732 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5733
5734 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5735 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5736 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5737
5738 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5739 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5740 DAG.getUNDEF(ContainerVT), Mask, VL});
5741 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5742 }
5743
5744 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5745 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5746 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5748 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5749}
5750
5751// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5752// operations propagate nans.
5754 const RISCVSubtarget &Subtarget) {
5755 SDLoc DL(Op);
5756 MVT VT = Op.getSimpleValueType();
5757
5758 SDValue X = Op.getOperand(0);
5759 SDValue Y = Op.getOperand(1);
5760
5761 if (!VT.isVector()) {
5762 MVT XLenVT = Subtarget.getXLenVT();
5763
5764 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5765 // ensures that when one input is a nan, the other will also be a nan
5766 // allowing the nan to propagate. If both inputs are nan, this will swap the
5767 // inputs which is harmless.
5768
5769 SDValue NewY = Y;
5770 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5771 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5772 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5773 }
5774
5775 SDValue NewX = X;
5776 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5777 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5778 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5779 }
5780
5781 unsigned Opc =
5782 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5783 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5784 }
5785
5786 // Check no NaNs before converting to fixed vector scalable.
5787 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5788 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5789
5790 MVT ContainerVT = VT;
5791 if (VT.isFixedLengthVector()) {
5792 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5793 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5794 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5795 }
5796
5797 SDValue Mask, VL;
5798 if (Op->isVPOpcode()) {
5799 Mask = Op.getOperand(2);
5800 if (VT.isFixedLengthVector())
5801 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5802 Subtarget);
5803 VL = Op.getOperand(3);
5804 } else {
5805 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5806 }
5807
5808 SDValue NewY = Y;
5809 if (!XIsNeverNan) {
5810 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5811 {X, X, DAG.getCondCode(ISD::SETOEQ),
5812 DAG.getUNDEF(ContainerVT), Mask, VL});
5813 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5814 DAG.getUNDEF(ContainerVT), VL);
5815 }
5816
5817 SDValue NewX = X;
5818 if (!YIsNeverNan) {
5819 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5820 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5821 DAG.getUNDEF(ContainerVT), Mask, VL});
5822 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5823 DAG.getUNDEF(ContainerVT), VL);
5824 }
5825
5826 unsigned Opc =
5827 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5830 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5831 DAG.getUNDEF(ContainerVT), Mask, VL);
5832 if (VT.isFixedLengthVector())
5833 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5834 return Res;
5835}
5836
5837/// Get a RISC-V target specified VL op for a given SDNode.
5838static unsigned getRISCVVLOp(SDValue Op) {
5839#define OP_CASE(NODE) \
5840 case ISD::NODE: \
5841 return RISCVISD::NODE##_VL;
5842#define VP_CASE(NODE) \
5843 case ISD::VP_##NODE: \
5844 return RISCVISD::NODE##_VL;
5845 // clang-format off
5846 switch (Op.getOpcode()) {
5847 default:
5848 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5849 OP_CASE(ADD)
5850 OP_CASE(SUB)
5851 OP_CASE(MUL)
5852 OP_CASE(MULHS)
5853 OP_CASE(MULHU)
5854 OP_CASE(SDIV)
5855 OP_CASE(SREM)
5856 OP_CASE(UDIV)
5857 OP_CASE(UREM)
5858 OP_CASE(SHL)
5859 OP_CASE(SRA)
5860 OP_CASE(SRL)
5861 OP_CASE(ROTL)
5862 OP_CASE(ROTR)
5863 OP_CASE(BSWAP)
5864 OP_CASE(CTTZ)
5865 OP_CASE(CTLZ)
5866 OP_CASE(CTPOP)
5867 OP_CASE(BITREVERSE)
5868 OP_CASE(SADDSAT)
5869 OP_CASE(UADDSAT)
5870 OP_CASE(SSUBSAT)
5871 OP_CASE(USUBSAT)
5872 OP_CASE(AVGFLOORS)
5873 OP_CASE(AVGFLOORU)
5874 OP_CASE(AVGCEILS)
5875 OP_CASE(AVGCEILU)
5876 OP_CASE(FADD)
5877 OP_CASE(FSUB)
5878 OP_CASE(FMUL)
5879 OP_CASE(FDIV)
5880 OP_CASE(FNEG)
5881 OP_CASE(FABS)
5882 OP_CASE(FSQRT)
5883 OP_CASE(SMIN)
5884 OP_CASE(SMAX)
5885 OP_CASE(UMIN)
5886 OP_CASE(UMAX)
5887 OP_CASE(STRICT_FADD)
5888 OP_CASE(STRICT_FSUB)
5889 OP_CASE(STRICT_FMUL)
5890 OP_CASE(STRICT_FDIV)
5891 OP_CASE(STRICT_FSQRT)
5892 VP_CASE(ADD) // VP_ADD
5893 VP_CASE(SUB) // VP_SUB
5894 VP_CASE(MUL) // VP_MUL
5895 VP_CASE(SDIV) // VP_SDIV
5896 VP_CASE(SREM) // VP_SREM
5897 VP_CASE(UDIV) // VP_UDIV
5898 VP_CASE(UREM) // VP_UREM
5899 VP_CASE(SHL) // VP_SHL
5900 VP_CASE(FADD) // VP_FADD
5901 VP_CASE(FSUB) // VP_FSUB
5902 VP_CASE(FMUL) // VP_FMUL
5903 VP_CASE(FDIV) // VP_FDIV
5904 VP_CASE(FNEG) // VP_FNEG
5905 VP_CASE(FABS) // VP_FABS
5906 VP_CASE(SMIN) // VP_SMIN
5907 VP_CASE(SMAX) // VP_SMAX
5908 VP_CASE(UMIN) // VP_UMIN
5909 VP_CASE(UMAX) // VP_UMAX
5910 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5911 VP_CASE(SETCC) // VP_SETCC
5912 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5913 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5914 VP_CASE(BITREVERSE) // VP_BITREVERSE
5915 VP_CASE(SADDSAT) // VP_SADDSAT
5916 VP_CASE(UADDSAT) // VP_UADDSAT
5917 VP_CASE(SSUBSAT) // VP_SSUBSAT
5918 VP_CASE(USUBSAT) // VP_USUBSAT
5919 VP_CASE(BSWAP) // VP_BSWAP
5920 VP_CASE(CTLZ) // VP_CTLZ
5921 VP_CASE(CTTZ) // VP_CTTZ
5922 VP_CASE(CTPOP) // VP_CTPOP
5924 case ISD::VP_CTLZ_ZERO_UNDEF:
5925 return RISCVISD::CTLZ_VL;
5927 case ISD::VP_CTTZ_ZERO_UNDEF:
5928 return RISCVISD::CTTZ_VL;
5929 case ISD::FMA:
5930 case ISD::VP_FMA:
5931 return RISCVISD::VFMADD_VL;
5932 case ISD::STRICT_FMA:
5934 case ISD::AND:
5935 case ISD::VP_AND:
5936 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5937 return RISCVISD::VMAND_VL;
5938 return RISCVISD::AND_VL;
5939 case ISD::OR:
5940 case ISD::VP_OR:
5941 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5942 return RISCVISD::VMOR_VL;
5943 return RISCVISD::OR_VL;
5944 case ISD::XOR:
5945 case ISD::VP_XOR:
5946 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5947 return RISCVISD::VMXOR_VL;
5948 return RISCVISD::XOR_VL;
5949 case ISD::VP_SELECT:
5950 case ISD::VP_MERGE:
5951 return RISCVISD::VMERGE_VL;
5952 case ISD::VP_SRA:
5953 return RISCVISD::SRA_VL;
5954 case ISD::VP_SRL:
5955 return RISCVISD::SRL_VL;
5956 case ISD::VP_SQRT:
5957 return RISCVISD::FSQRT_VL;
5958 case ISD::VP_SIGN_EXTEND:
5959 return RISCVISD::VSEXT_VL;
5960 case ISD::VP_ZERO_EXTEND:
5961 return RISCVISD::VZEXT_VL;
5962 case ISD::VP_FP_TO_SINT:
5964 case ISD::VP_FP_TO_UINT:
5966 case ISD::FMINNUM:
5967 case ISD::VP_FMINNUM:
5968 return RISCVISD::VFMIN_VL;
5969 case ISD::FMAXNUM:
5970 case ISD::VP_FMAXNUM:
5971 return RISCVISD::VFMAX_VL;
5972 case ISD::LRINT:
5973 case ISD::VP_LRINT:
5974 case ISD::LLRINT:
5975 case ISD::VP_LLRINT:
5977 }
5978 // clang-format on
5979#undef OP_CASE
5980#undef VP_CASE
5981}
5982
5983/// Return true if a RISC-V target specified op has a merge operand.
5984static bool hasMergeOp(unsigned Opcode) {
5985 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5987 "not a RISC-V target specific op");
5989 130 &&
5992 21 &&
5993 "adding target specific op should update this function");
5994 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5995 return true;
5996 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5997 return true;
5998 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5999 return true;
6000 if (Opcode == RISCVISD::SETCC_VL)
6001 return true;
6002 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6003 return true;
6004 if (Opcode == RISCVISD::VMERGE_VL)
6005 return true;
6006 return false;
6007}
6008
6009/// Return true if a RISC-V target specified op has a mask operand.
6010static bool hasMaskOp(unsigned Opcode) {
6011 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6013 "not a RISC-V target specific op");
6015 130 &&
6018 21 &&
6019 "adding target specific op should update this function");
6020 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6021 return true;
6022 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6023 return true;
6024 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6026 return true;
6027 return false;
6028}
6029
6031 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6032 SDLoc DL(Op);
6033
6036
6037 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6038 if (!Op.getOperand(j).getValueType().isVector()) {
6039 LoOperands[j] = Op.getOperand(j);
6040 HiOperands[j] = Op.getOperand(j);
6041 continue;
6042 }
6043 std::tie(LoOperands[j], HiOperands[j]) =
6044 DAG.SplitVector(Op.getOperand(j), DL);
6045 }
6046
6047 SDValue LoRes =
6048 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6049 SDValue HiRes =
6050 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6051
6052 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6053}
6054
6056 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6057 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6058 SDLoc DL(Op);
6059
6062
6063 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6064 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6065 std::tie(LoOperands[j], HiOperands[j]) =
6066 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6067 continue;
6068 }
6069 if (!Op.getOperand(j).getValueType().isVector()) {
6070 LoOperands[j] = Op.getOperand(j);
6071 HiOperands[j] = Op.getOperand(j);
6072 continue;
6073 }
6074 std::tie(LoOperands[j], HiOperands[j]) =
6075 DAG.SplitVector(Op.getOperand(j), DL);
6076 }
6077
6078 SDValue LoRes =
6079 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6080 SDValue HiRes =
6081 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6082
6083 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6084}
6085
6087 SDLoc DL(Op);
6088
6089 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6090 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6091 auto [EVLLo, EVLHi] =
6092 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6093
6094 SDValue ResLo =
6095 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6096 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6097 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6098 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6099}
6100
6102
6103 assert(Op->isStrictFPOpcode());
6104
6105 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6106
6107 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6108 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6109
6110 SDLoc DL(Op);
6111
6114
6115 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6116 if (!Op.getOperand(j).getValueType().isVector()) {
6117 LoOperands[j] = Op.getOperand(j);
6118 HiOperands[j] = Op.getOperand(j);
6119 continue;
6120 }
6121 std::tie(LoOperands[j], HiOperands[j]) =
6122 DAG.SplitVector(Op.getOperand(j), DL);
6123 }
6124
6125 SDValue LoRes =
6126 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6127 HiOperands[0] = LoRes.getValue(1);
6128 SDValue HiRes =
6129 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6130
6131 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6132 LoRes.getValue(0), HiRes.getValue(0));
6133 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6134}
6135
6137 SelectionDAG &DAG) const {
6138 switch (Op.getOpcode()) {
6139 default:
6140 report_fatal_error("unimplemented operand");
6141 case ISD::ATOMIC_FENCE:
6142 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6143 case ISD::GlobalAddress:
6144 return lowerGlobalAddress(Op, DAG);
6145 case ISD::BlockAddress:
6146 return lowerBlockAddress(Op, DAG);
6147 case ISD::ConstantPool:
6148 return lowerConstantPool(Op, DAG);
6149 case ISD::JumpTable:
6150 return lowerJumpTable(Op, DAG);
6152 return lowerGlobalTLSAddress(Op, DAG);
6153 case ISD::Constant:
6154 return lowerConstant(Op, DAG, Subtarget);
6155 case ISD::SELECT:
6156 return lowerSELECT(Op, DAG);
6157 case ISD::BRCOND:
6158 return lowerBRCOND(Op, DAG);
6159 case ISD::VASTART:
6160 return lowerVASTART(Op, DAG);
6161 case ISD::FRAMEADDR:
6162 return lowerFRAMEADDR(Op, DAG);
6163 case ISD::RETURNADDR:
6164 return lowerRETURNADDR(Op, DAG);
6165 case ISD::SADDO:
6166 case ISD::SSUBO:
6167 return lowerSADDO_SSUBO(Op, DAG);
6168 case ISD::SMULO:
6169 return lowerSMULO(Op, DAG);
6170 case ISD::SHL_PARTS:
6171 return lowerShiftLeftParts(Op, DAG);
6172 case ISD::SRA_PARTS:
6173 return lowerShiftRightParts(Op, DAG, true);
6174 case ISD::SRL_PARTS:
6175 return lowerShiftRightParts(Op, DAG, false);
6176 case ISD::ROTL:
6177 case ISD::ROTR:
6178 if (Op.getValueType().isFixedLengthVector()) {
6179 assert(Subtarget.hasStdExtZvkb());
6180 return lowerToScalableOp(Op, DAG);
6181 }
6182 assert(Subtarget.hasVendorXTHeadBb() &&
6183 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6184 "Unexpected custom legalization");
6185 // XTHeadBb only supports rotate by constant.
6186 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6187 return SDValue();
6188 return Op;
6189 case ISD::BITCAST: {
6190 SDLoc DL(Op);
6191 EVT VT = Op.getValueType();
6192 SDValue Op0 = Op.getOperand(0);
6193 EVT Op0VT = Op0.getValueType();
6194 MVT XLenVT = Subtarget.getXLenVT();
6195 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6196 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6197 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6198 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6199 return FPConv;
6200 }
6201 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6202 Subtarget.hasStdExtZfbfmin()) {
6203 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6204 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6205 return FPConv;
6206 }
6207 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6208 Subtarget.hasStdExtFOrZfinx()) {
6209 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6210 SDValue FPConv =
6211 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6212 return FPConv;
6213 }
6214 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6215 SDValue Lo, Hi;
6216 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6217 SDValue RetReg =
6218 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6219 return RetReg;
6220 }
6221
6222 // Consider other scalar<->scalar casts as legal if the types are legal.
6223 // Otherwise expand them.
6224 if (!VT.isVector() && !Op0VT.isVector()) {
6225 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6226 return Op;
6227 return SDValue();
6228 }
6229
6230 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6231 "Unexpected types");
6232
6233 if (VT.isFixedLengthVector()) {
6234 // We can handle fixed length vector bitcasts with a simple replacement
6235 // in isel.
6236 if (Op0VT.isFixedLengthVector())
6237 return Op;
6238 // When bitcasting from scalar to fixed-length vector, insert the scalar
6239 // into a one-element vector of the result type, and perform a vector
6240 // bitcast.
6241 if (!Op0VT.isVector()) {
6242 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6243 if (!isTypeLegal(BVT))
6244 return SDValue();
6245 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6246 DAG.getUNDEF(BVT), Op0,
6247 DAG.getVectorIdxConstant(0, DL)));
6248 }
6249 return SDValue();
6250 }
6251 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6252 // thus: bitcast the vector to a one-element vector type whose element type
6253 // is the same as the result type, and extract the first element.
6254 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6255 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6256 if (!isTypeLegal(BVT))
6257 return SDValue();
6258 SDValue BVec = DAG.getBitcast(BVT, Op0);
6259 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6260 DAG.getVectorIdxConstant(0, DL));
6261 }
6262 return SDValue();
6263 }
6265 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6267 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6269 return LowerINTRINSIC_VOID(Op, DAG);
6270 case ISD::IS_FPCLASS:
6271 return LowerIS_FPCLASS(Op, DAG);
6272 case ISD::BITREVERSE: {
6273 MVT VT = Op.getSimpleValueType();
6274 if (VT.isFixedLengthVector()) {
6275 assert(Subtarget.hasStdExtZvbb());
6276 return lowerToScalableOp(Op, DAG);
6277 }
6278 SDLoc DL(Op);
6279 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6280 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6281 // Expand bitreverse to a bswap(rev8) followed by brev8.
6282 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6283 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6284 }
6285 case ISD::TRUNCATE:
6286 // Only custom-lower vector truncates
6287 if (!Op.getSimpleValueType().isVector())
6288 return Op;
6289 return lowerVectorTruncLike(Op, DAG);
6290 case ISD::ANY_EXTEND:
6291 case ISD::ZERO_EXTEND:
6292 if (Op.getOperand(0).getValueType().isVector() &&
6293 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6294 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6295 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6296 case ISD::SIGN_EXTEND:
6297 if (Op.getOperand(0).getValueType().isVector() &&
6298 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6299 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6300 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6302 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6304 return lowerINSERT_VECTOR_ELT(Op, DAG);
6306 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6307 case ISD::SCALAR_TO_VECTOR: {
6308 MVT VT = Op.getSimpleValueType();
6309 SDLoc DL(Op);
6310 SDValue Scalar = Op.getOperand(0);
6311 if (VT.getVectorElementType() == MVT::i1) {
6312 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6313 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6314 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6315 }
6316 MVT ContainerVT = VT;
6317 if (VT.isFixedLengthVector())
6318 ContainerVT = getContainerForFixedLengthVector(VT);
6319 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6320 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6321 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6322 DAG.getUNDEF(ContainerVT), Scalar, VL);
6323 if (VT.isFixedLengthVector())
6324 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6325 return V;
6326 }
6327 case ISD::VSCALE: {
6328 MVT XLenVT = Subtarget.getXLenVT();
6329 MVT VT = Op.getSimpleValueType();
6330 SDLoc DL(Op);
6331 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6332 // We define our scalable vector types for lmul=1 to use a 64 bit known
6333 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6334 // vscale as VLENB / 8.
6335 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6336 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6337 report_fatal_error("Support for VLEN==32 is incomplete.");
6338 // We assume VLENB is a multiple of 8. We manually choose the best shift
6339 // here because SimplifyDemandedBits isn't always able to simplify it.
6340 uint64_t Val = Op.getConstantOperandVal(0);
6341 if (isPowerOf2_64(Val)) {
6342 uint64_t Log2 = Log2_64(Val);
6343 if (Log2 < 3)
6344 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6345 DAG.getConstant(3 - Log2, DL, VT));
6346 else if (Log2 > 3)
6347 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6348 DAG.getConstant(Log2 - 3, DL, XLenVT));
6349 } else if ((Val % 8) == 0) {
6350 // If the multiplier is a multiple of 8, scale it down to avoid needing
6351 // to shift the VLENB value.
6352 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6353 DAG.getConstant(Val / 8, DL, XLenVT));
6354 } else {
6355 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6356 DAG.getConstant(3, DL, XLenVT));
6357 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6358 DAG.getConstant(Val, DL, XLenVT));
6359 }
6360 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6361 }
6362 case ISD::FPOWI: {
6363 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6364 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6365 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6366 Op.getOperand(1).getValueType() == MVT::i32) {
6367 SDLoc DL(Op);
6368 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6369 SDValue Powi =
6370 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6371 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6372 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6373 }
6374 return SDValue();
6375 }
6376 case ISD::FMAXIMUM:
6377 case ISD::FMINIMUM:
6378 if (Op.getValueType() == MVT::nxv32f16 &&
6379 (Subtarget.hasVInstructionsF16Minimal() &&
6380 !Subtarget.hasVInstructionsF16()))
6381 return SplitVectorOp(Op, DAG);
6382 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6383 case ISD::FP_EXTEND: {
6384 SDLoc DL(Op);
6385 EVT VT = Op.getValueType();
6386 SDValue Op0 = Op.getOperand(0);
6387 EVT Op0VT = Op0.getValueType();
6388 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6389 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6390 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6391 SDValue FloatVal =
6392 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6393 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6394 }
6395
6396 if (!Op.getValueType().isVector())
6397 return Op;
6398 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6399 }
6400 case ISD::FP_ROUND: {
6401 SDLoc DL(Op);
6402 EVT VT = Op.getValueType();
6403 SDValue Op0 = Op.getOperand(0);
6404 EVT Op0VT = Op0.getValueType();
6405 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6406 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6407 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6408 Subtarget.hasStdExtDOrZdinx()) {
6409 SDValue FloatVal =
6410 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6411 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6412 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6413 }
6414
6415 if (!Op.getValueType().isVector())
6416 return Op;
6417 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6418 }
6421 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6422 case ISD::SINT_TO_FP:
6423 case ISD::UINT_TO_FP:
6424 if (Op.getValueType().isVector() &&
6425 Op.getValueType().getScalarType() == MVT::f16 &&
6426 (Subtarget.hasVInstructionsF16Minimal() &&
6427 !Subtarget.hasVInstructionsF16())) {
6428 if (Op.getValueType() == MVT::nxv32f16)
6429 return SplitVectorOp(Op, DAG);
6430 // int -> f32
6431 SDLoc DL(Op);
6432 MVT NVT =
6433 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6434 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6435 // f32 -> f16
6436 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6437 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6438 }
6439 [[fallthrough]];
6440 case ISD::FP_TO_SINT:
6441 case ISD::FP_TO_UINT:
6442 if (SDValue Op1 = Op.getOperand(0);
6443 Op1.getValueType().isVector() &&
6444 Op1.getValueType().getScalarType() == MVT::f16 &&
6445 (Subtarget.hasVInstructionsF16Minimal() &&
6446 !Subtarget.hasVInstructionsF16())) {
6447 if (Op1.getValueType() == MVT::nxv32f16)
6448 return SplitVectorOp(Op, DAG);
6449 // f16 -> f32
6450 SDLoc DL(Op);
6451 MVT NVT = MVT::getVectorVT(MVT::f32,
6452 Op1.getValueType().getVectorElementCount());
6453 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6454 // f32 -> int
6455 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6456 }
6457 [[fallthrough]];
6462 // RVV can only do fp<->int conversions to types half/double the size as
6463 // the source. We custom-lower any conversions that do two hops into
6464 // sequences.
6465 MVT VT = Op.getSimpleValueType();
6466 if (!VT.isVector())
6467 return Op;
6468 SDLoc DL(Op);
6469 bool IsStrict = Op->isStrictFPOpcode();
6470 SDValue Src = Op.getOperand(0 + IsStrict);
6471 MVT EltVT = VT.getVectorElementType();
6472 MVT SrcVT = Src.getSimpleValueType();
6473 MVT SrcEltVT = SrcVT.getVectorElementType();
6474 unsigned EltSize = EltVT.getSizeInBits();
6475 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6476 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6477 "Unexpected vector element types");
6478
6479 bool IsInt2FP = SrcEltVT.isInteger();
6480 // Widening conversions
6481 if (EltSize > (2 * SrcEltSize)) {
6482 if (IsInt2FP) {
6483 // Do a regular integer sign/zero extension then convert to float.
6484 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6486 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6487 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6490 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6491 if (IsStrict)
6492 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6493 Op.getOperand(0), Ext);
6494 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6495 }
6496 // FP2Int
6497 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6498 // Do one doubling fp_extend then complete the operation by converting
6499 // to int.
6500 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6501 if (IsStrict) {
6502 auto [FExt, Chain] =
6503 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6504 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6505 }
6506 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6507 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6508 }
6509
6510 // Narrowing conversions
6511 if (SrcEltSize > (2 * EltSize)) {
6512 if (IsInt2FP) {
6513 // One narrowing int_to_fp, then an fp_round.
6514 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6515 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6516 if (IsStrict) {
6517 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6518 DAG.getVTList(InterimFVT, MVT::Other),
6519 Op.getOperand(0), Src);
6520 SDValue Chain = Int2FP.getValue(1);
6521 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6522 }
6523 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6524 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6525 }
6526 // FP2Int
6527 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6528 // representable by the integer, the result is poison.
6529 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6531 if (IsStrict) {
6532 SDValue FP2Int =
6533 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6534 Op.getOperand(0), Src);
6535 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6536 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6537 }
6538 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6539 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6540 }
6541
6542 // Scalable vectors can exit here. Patterns will handle equally-sized
6543 // conversions halving/doubling ones.
6544 if (!VT.isFixedLengthVector())
6545 return Op;
6546
6547 // For fixed-length vectors we lower to a custom "VL" node.
6548 unsigned RVVOpc = 0;
6549 switch (Op.getOpcode()) {
6550 default:
6551 llvm_unreachable("Impossible opcode");
6552 case ISD::FP_TO_SINT:
6554 break;
6555 case ISD::FP_TO_UINT:
6557 break;
6558 case ISD::SINT_TO_FP:
6559 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6560 break;
6561 case ISD::UINT_TO_FP:
6562 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6563 break;
6566 break;
6569 break;
6572 break;
6575 break;
6576 }
6577
6578 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6579 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6580 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6581 "Expected same element count");
6582
6583 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6584
6585 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6586 if (IsStrict) {
6587 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6588 Op.getOperand(0), Src, Mask, VL);
6589 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6590 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6591 }
6592 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6593 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6594 }
6597 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6598 case ISD::FP_TO_BF16: {
6599 // Custom lower to ensure the libcall return is passed in an FPR on hard
6600 // float ABIs.
6601 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6602 SDLoc DL(Op);
6603 MakeLibCallOptions CallOptions;
6604 RTLIB::Libcall LC =
6605 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6606 SDValue Res =
6607 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6608 if (Subtarget.is64Bit() && !RV64LegalI32)
6609 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6610 return DAG.getBitcast(MVT::i32, Res);
6611 }
6612 case ISD::BF16_TO_FP: {
6613 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6614 MVT VT = Op.getSimpleValueType();
6615 SDLoc DL(Op);
6616 Op = DAG.getNode(
6617 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6618 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6619 SDValue Res = Subtarget.is64Bit()
6620 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6621 : DAG.getBitcast(MVT::f32, Op);
6622 // fp_extend if the target VT is bigger than f32.
6623 if (VT != MVT::f32)
6624 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6625 return Res;
6626 }
6627 case ISD::FP_TO_FP16: {
6628 // Custom lower to ensure the libcall return is passed in an FPR on hard
6629 // float ABIs.
6630 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6631 SDLoc DL(Op);
6632 MakeLibCallOptions CallOptions;
6633 RTLIB::Libcall LC =
6634 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6635 SDValue Res =
6636 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6637 if (Subtarget.is64Bit() && !RV64LegalI32)
6638 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6639 return DAG.getBitcast(MVT::i32, Res);
6640 }
6641 case ISD::FP16_TO_FP: {
6642 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6643 // float ABIs.
6644 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6645 SDLoc DL(Op);
6646 MakeLibCallOptions CallOptions;
6647 SDValue Arg = Subtarget.is64Bit()
6648 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6649 Op.getOperand(0))
6650 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6651 SDValue Res =
6652 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6653 .first;
6654 return Res;
6655 }
6656 case ISD::FTRUNC:
6657 case ISD::FCEIL:
6658 case ISD::FFLOOR:
6659 case ISD::FNEARBYINT:
6660 case ISD::FRINT:
6661 case ISD::FROUND:
6662 case ISD::FROUNDEVEN:
6663 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6664 case ISD::LRINT:
6665 case ISD::LLRINT:
6666 return lowerVectorXRINT(Op, DAG, Subtarget);
6667 case ISD::VECREDUCE_ADD:
6672 return lowerVECREDUCE(Op, DAG);
6673 case ISD::VECREDUCE_AND:
6674 case ISD::VECREDUCE_OR:
6675 case ISD::VECREDUCE_XOR:
6676 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6677 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6678 return lowerVECREDUCE(Op, DAG);
6685 return lowerFPVECREDUCE(Op, DAG);
6686 case ISD::VP_REDUCE_ADD:
6687 case ISD::VP_REDUCE_UMAX:
6688 case ISD::VP_REDUCE_SMAX:
6689 case ISD::VP_REDUCE_UMIN:
6690 case ISD::VP_REDUCE_SMIN:
6691 case ISD::VP_REDUCE_FADD:
6692 case ISD::VP_REDUCE_SEQ_FADD:
6693 case ISD::VP_REDUCE_FMIN:
6694 case ISD::VP_REDUCE_FMAX:
6695 case ISD::VP_REDUCE_FMINIMUM:
6696 case ISD::VP_REDUCE_FMAXIMUM:
6697 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6698 (Subtarget.hasVInstructionsF16Minimal() &&
6699 !Subtarget.hasVInstructionsF16()))
6700 return SplitVectorReductionOp(Op, DAG);
6701 return lowerVPREDUCE(Op, DAG);
6702 case ISD::VP_REDUCE_AND:
6703 case ISD::VP_REDUCE_OR:
6704 case ISD::VP_REDUCE_XOR:
6705 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6706 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6707 return lowerVPREDUCE(Op, DAG);
6708 case ISD::VP_CTTZ_ELTS:
6709 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6710 return lowerVPCttzElements(Op, DAG);
6711 case ISD::UNDEF: {
6712 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6713 return convertFromScalableVector(Op.getSimpleValueType(),
6714 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6715 }
6717 return lowerINSERT_SUBVECTOR(Op, DAG);
6719 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6721 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6723 return lowerVECTOR_INTERLEAVE(Op, DAG);
6724 case ISD::STEP_VECTOR:
6725 return lowerSTEP_VECTOR(Op, DAG);
6727 return lowerVECTOR_REVERSE(Op, DAG);
6728 case ISD::VECTOR_SPLICE:
6729 return lowerVECTOR_SPLICE(Op, DAG);
6730 case ISD::BUILD_VECTOR:
6731 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6732 case ISD::SPLAT_VECTOR:
6733 if ((Op.getValueType().getScalarType() == MVT::f16 &&
6734 (Subtarget.hasVInstructionsF16Minimal() &&
6735 Subtarget.hasStdExtZfhminOrZhinxmin() &&
6736 !Subtarget.hasVInstructionsF16())) ||
6737 (Op.getValueType().getScalarType() == MVT::bf16 &&
6738 (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) {
6739 if (Op.getValueType() == MVT::nxv32f16 ||
6740 Op.getValueType() == MVT::nxv32bf16)
6741 return SplitVectorOp(Op, DAG);
6742 SDLoc DL(Op);
6743 SDValue NewScalar =
6744 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6745 SDValue NewSplat = DAG.getNode(
6747 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6748 NewScalar);
6749 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6750 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6751 }
6752 if (Op.getValueType().getVectorElementType() == MVT::i1)
6753 return lowerVectorMaskSplat(Op, DAG);
6754 return SDValue();
6756 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6757 case ISD::CONCAT_VECTORS: {
6758 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6759 // better than going through the stack, as the default expansion does.
6760 SDLoc DL(Op);
6761 MVT VT = Op.getSimpleValueType();
6762 MVT ContainerVT = VT;
6763 if (VT.isFixedLengthVector())
6764 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6765
6766 // Recursively split concat_vectors with more than 2 operands:
6767 //
6768 // concat_vector op1, op2, op3, op4
6769 // ->
6770 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6771 //
6772 // This reduces the length of the chain of vslideups and allows us to
6773 // perform the vslideups at a smaller LMUL, limited to MF2.
6774 if (Op.getNumOperands() > 2 &&
6775 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6776 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6778 size_t HalfNumOps = Op.getNumOperands() / 2;
6779 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6780 Op->ops().take_front(HalfNumOps));
6781 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6782 Op->ops().drop_front(HalfNumOps));
6783 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6784 }
6785
6786 unsigned NumOpElts =
6787 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6788 SDValue Vec = DAG.getUNDEF(VT);
6789 for (const auto &OpIdx : enumerate(Op->ops())) {
6790 SDValue SubVec = OpIdx.value();
6791 // Don't insert undef subvectors.
6792 if (SubVec.isUndef())
6793 continue;
6794 Vec =
6795 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6796 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6797 }
6798 return Vec;
6799 }
6800 case ISD::LOAD:
6801 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6802 return V;
6803 if (Op.getValueType().isFixedLengthVector())
6804 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6805 return Op;
6806 case ISD::STORE:
6807 if (auto V = expandUnalignedRVVStore(Op, DAG))
6808 return V;
6809 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6810 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6811 return Op;
6812 case ISD::MLOAD:
6813 case ISD::VP_LOAD:
6814 return lowerMaskedLoad(Op, DAG);
6815 case ISD::MSTORE:
6816 case ISD::VP_STORE:
6817 return lowerMaskedStore(Op, DAG);
6818 case ISD::SELECT_CC: {
6819 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6820 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6821 // into separate SETCC+SELECT just like LegalizeDAG.
6822 SDValue Tmp1 = Op.getOperand(0);
6823 SDValue Tmp2 = Op.getOperand(1);
6824 SDValue True = Op.getOperand(2);
6825 SDValue False = Op.getOperand(3);
6826 EVT VT = Op.getValueType();
6827 SDValue CC = Op.getOperand(4);
6828 EVT CmpVT = Tmp1.getValueType();
6829 EVT CCVT =
6830 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6831 SDLoc DL(Op);
6832 SDValue Cond =
6833 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6834 return DAG.getSelect(DL, VT, Cond, True, False);
6835 }
6836 case ISD::SETCC: {
6837 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6838 if (OpVT.isScalarInteger()) {
6839 MVT VT = Op.getSimpleValueType();
6840 SDValue LHS = Op.getOperand(0);
6841 SDValue RHS = Op.getOperand(1);
6842 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6843 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6844 "Unexpected CondCode");
6845
6846 SDLoc DL(Op);
6847
6848 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6849 // convert this to the equivalent of (set(u)ge X, C+1) by using
6850 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6851 // in a register.
6852 if (isa<ConstantSDNode>(RHS)) {
6853 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6854 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6855 // If this is an unsigned compare and the constant is -1, incrementing
6856 // the constant would change behavior. The result should be false.
6857 if (CCVal == ISD::SETUGT && Imm == -1)
6858 return DAG.getConstant(0, DL, VT);
6859 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6860 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6861 SDValue SetCC = DAG.getSetCC(
6862 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6863 return DAG.getLogicalNOT(DL, SetCC, VT);
6864 }
6865 }
6866
6867 // Not a constant we could handle, swap the operands and condition code to
6868 // SETLT/SETULT.
6869 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6870 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6871 }
6872
6873 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6874 (Subtarget.hasVInstructionsF16Minimal() &&
6875 !Subtarget.hasVInstructionsF16()))
6876 return SplitVectorOp(Op, DAG);
6877
6878 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6879 }
6880 case ISD::ADD:
6881 case ISD::SUB:
6882 case ISD::MUL:
6883 case ISD::MULHS:
6884 case ISD::MULHU:
6885 case ISD::AND:
6886 case ISD::OR:
6887 case ISD::XOR:
6888 case ISD::SDIV:
6889 case ISD::SREM:
6890 case ISD::UDIV:
6891 case ISD::UREM:
6892 case ISD::BSWAP:
6893 case ISD::CTPOP:
6894 return lowerToScalableOp(Op, DAG);
6895 case ISD::SHL:
6896 case ISD::SRA:
6897 case ISD::SRL:
6898 if (Op.getSimpleValueType().isFixedLengthVector())
6899 return lowerToScalableOp(Op, DAG);
6900 // This can be called for an i32 shift amount that needs to be promoted.
6901 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6902 "Unexpected custom legalisation");
6903 return SDValue();
6904 case ISD::FADD:
6905 case ISD::FSUB:
6906 case ISD::FMUL:
6907 case ISD::FDIV:
6908 case ISD::FNEG:
6909 case ISD::FABS:
6910 case ISD::FSQRT:
6911 case ISD::FMA:
6912 case ISD::FMINNUM:
6913 case ISD::FMAXNUM:
6914 if (Op.getValueType() == MVT::nxv32f16 &&
6915 (Subtarget.hasVInstructionsF16Minimal() &&
6916 !Subtarget.hasVInstructionsF16()))
6917 return SplitVectorOp(Op, DAG);
6918 [[fallthrough]];
6919 case ISD::AVGFLOORS:
6920 case ISD::AVGFLOORU:
6921 case ISD::AVGCEILS:
6922 case ISD::AVGCEILU:
6923 case ISD::SMIN:
6924 case ISD::SMAX:
6925 case ISD::UMIN:
6926 case ISD::UMAX:
6927 return lowerToScalableOp(Op, DAG);
6928 case ISD::UADDSAT:
6929 case ISD::USUBSAT:
6930 if (!Op.getValueType().isVector())
6931 return lowerUADDSAT_USUBSAT(Op, DAG);
6932 return lowerToScalableOp(Op, DAG);
6933 case ISD::SADDSAT:
6934 case ISD::SSUBSAT:
6935 if (!Op.getValueType().isVector())
6936 return lowerSADDSAT_SSUBSAT(Op, DAG);
6937 return lowerToScalableOp(Op, DAG);
6938 case ISD::ABDS:
6939 case ISD::ABDU: {
6940 SDLoc dl(Op);
6941 EVT VT = Op->getValueType(0);
6942 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6943 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6944 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6945
6946 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6947 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6948 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6949 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6950 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6951 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6952 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6953 }
6954 case ISD::ABS:
6955 case ISD::VP_ABS:
6956 return lowerABS(Op, DAG);
6957 case ISD::CTLZ:
6959 case ISD::CTTZ:
6961 if (Subtarget.hasStdExtZvbb())
6962 return lowerToScalableOp(Op, DAG);
6963 assert(Op.getOpcode() != ISD::CTTZ);
6964 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6965 case ISD::VSELECT:
6966 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6967 case ISD::FCOPYSIGN:
6968 if (Op.getValueType() == MVT::nxv32f16 &&
6969 (Subtarget.hasVInstructionsF16Minimal() &&
6970 !Subtarget.hasVInstructionsF16()))
6971 return SplitVectorOp(Op, DAG);
6972 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6973 case ISD::STRICT_FADD:
6974 case ISD::STRICT_FSUB:
6975 case ISD::STRICT_FMUL:
6976 case ISD::STRICT_FDIV:
6977 case ISD::STRICT_FSQRT:
6978 case ISD::STRICT_FMA:
6979 if (Op.getValueType() == MVT::nxv32f16 &&
6980 (Subtarget.hasVInstructionsF16Minimal() &&
6981 !Subtarget.hasVInstructionsF16()))
6982 return SplitStrictFPVectorOp(Op, DAG);
6983 return lowerToScalableOp(Op, DAG);
6984 case ISD::STRICT_FSETCC:
6986 return lowerVectorStrictFSetcc(Op, DAG);
6987 case ISD::STRICT_FCEIL:
6988 case ISD::STRICT_FRINT:
6989 case ISD::STRICT_FFLOOR:
6990 case ISD::STRICT_FTRUNC:
6992 case ISD::STRICT_FROUND:
6994 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6995 case ISD::MGATHER:
6996 case ISD::VP_GATHER:
6997 return lowerMaskedGather(Op, DAG);
6998 case ISD::MSCATTER:
6999 case ISD::VP_SCATTER:
7000 return lowerMaskedScatter(Op, DAG);
7001 case ISD::GET_ROUNDING:
7002 return lowerGET_ROUNDING(Op, DAG);
7003 case ISD::SET_ROUNDING:
7004 return lowerSET_ROUNDING(Op, DAG);
7005 case ISD::EH_DWARF_CFA:
7006 return lowerEH_DWARF_CFA(Op, DAG);
7007 case ISD::VP_SELECT:
7008 case ISD::VP_MERGE:
7009 case ISD::VP_ADD:
7010 case ISD::VP_SUB:
7011 case ISD::VP_MUL:
7012 case ISD::VP_SDIV:
7013 case ISD::VP_UDIV:
7014 case ISD::VP_SREM:
7015 case ISD::VP_UREM:
7016 case ISD::VP_UADDSAT:
7017 case ISD::VP_USUBSAT:
7018 case ISD::VP_SADDSAT:
7019 case ISD::VP_SSUBSAT:
7020 case ISD::VP_LRINT:
7021 case ISD::VP_LLRINT:
7022 return lowerVPOp(Op, DAG);
7023 case ISD::VP_AND:
7024 case ISD::VP_OR:
7025 case ISD::VP_XOR:
7026 return lowerLogicVPOp(Op, DAG);
7027 case ISD::VP_FADD:
7028 case ISD::VP_FSUB:
7029 case ISD::VP_FMUL:
7030 case ISD::VP_FDIV:
7031 case ISD::VP_FNEG:
7032 case ISD::VP_FABS:
7033 case ISD::VP_SQRT:
7034 case ISD::VP_FMA:
7035 case ISD::VP_FMINNUM:
7036 case ISD::VP_FMAXNUM:
7037 case ISD::VP_FCOPYSIGN:
7038 if (Op.getValueType() == MVT::nxv32f16 &&
7039 (Subtarget.hasVInstructionsF16Minimal() &&
7040 !Subtarget.hasVInstructionsF16()))
7041 return SplitVPOp(Op, DAG);
7042 [[fallthrough]];
7043 case ISD::VP_SRA:
7044 case ISD::VP_SRL:
7045 case ISD::VP_SHL:
7046 return lowerVPOp(Op, DAG);
7047 case ISD::VP_IS_FPCLASS:
7048 return LowerIS_FPCLASS(Op, DAG);
7049 case ISD::VP_SIGN_EXTEND:
7050 case ISD::VP_ZERO_EXTEND:
7051 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7052 return lowerVPExtMaskOp(Op, DAG);
7053 return lowerVPOp(Op, DAG);
7054 case ISD::VP_TRUNCATE:
7055 return lowerVectorTruncLike(Op, DAG);
7056 case ISD::VP_FP_EXTEND:
7057 case ISD::VP_FP_ROUND:
7058 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7059 case ISD::VP_SINT_TO_FP:
7060 case ISD::VP_UINT_TO_FP:
7061 if (Op.getValueType().isVector() &&
7062 Op.getValueType().getScalarType() == MVT::f16 &&
7063 (Subtarget.hasVInstructionsF16Minimal() &&
7064 !Subtarget.hasVInstructionsF16())) {
7065 if (Op.getValueType() == MVT::nxv32f16)
7066 return SplitVPOp(Op, DAG);
7067 // int -> f32
7068 SDLoc DL(Op);
7069 MVT NVT =
7070 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7071 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7072 // f32 -> f16
7073 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7074 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7075 }
7076 [[fallthrough]];
7077 case ISD::VP_FP_TO_SINT:
7078 case ISD::VP_FP_TO_UINT:
7079 if (SDValue Op1 = Op.getOperand(0);
7080 Op1.getValueType().isVector() &&
7081 Op1.getValueType().getScalarType() == MVT::f16 &&
7082 (Subtarget.hasVInstructionsF16Minimal() &&
7083 !Subtarget.hasVInstructionsF16())) {
7084 if (Op1.getValueType() == MVT::nxv32f16)
7085 return SplitVPOp(Op, DAG);
7086 // f16 -> f32
7087 SDLoc DL(Op);
7088 MVT NVT = MVT::getVectorVT(MVT::f32,
7089 Op1.getValueType().getVectorElementCount());
7090 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7091 // f32 -> int
7092 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7093 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7094 }
7095 return lowerVPFPIntConvOp(Op, DAG);
7096 case ISD::VP_SETCC:
7097 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7098 (Subtarget.hasVInstructionsF16Minimal() &&
7099 !Subtarget.hasVInstructionsF16()))
7100 return SplitVPOp(Op, DAG);
7101 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7102 return lowerVPSetCCMaskOp(Op, DAG);
7103 [[fallthrough]];
7104 case ISD::VP_SMIN:
7105 case ISD::VP_SMAX:
7106 case ISD::VP_UMIN:
7107 case ISD::VP_UMAX:
7108 case ISD::VP_BITREVERSE:
7109 case ISD::VP_BSWAP:
7110 return lowerVPOp(Op, DAG);
7111 case ISD::VP_CTLZ:
7112 case ISD::VP_CTLZ_ZERO_UNDEF:
7113 if (Subtarget.hasStdExtZvbb())
7114 return lowerVPOp(Op, DAG);
7115 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7116 case ISD::VP_CTTZ:
7117 case ISD::VP_CTTZ_ZERO_UNDEF:
7118 if (Subtarget.hasStdExtZvbb())
7119 return lowerVPOp(Op, DAG);
7120 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7121 case ISD::VP_CTPOP:
7122 return lowerVPOp(Op, DAG);
7123 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7124 return lowerVPStridedLoad(Op, DAG);
7125 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7126 return lowerVPStridedStore(Op, DAG);
7127 case ISD::VP_FCEIL:
7128 case ISD::VP_FFLOOR:
7129 case ISD::VP_FRINT:
7130 case ISD::VP_FNEARBYINT:
7131 case ISD::VP_FROUND:
7132 case ISD::VP_FROUNDEVEN:
7133 case ISD::VP_FROUNDTOZERO:
7134 if (Op.getValueType() == MVT::nxv32f16 &&
7135 (Subtarget.hasVInstructionsF16Minimal() &&
7136 !Subtarget.hasVInstructionsF16()))
7137 return SplitVPOp(Op, DAG);
7138 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7139 case ISD::VP_FMAXIMUM:
7140 case ISD::VP_FMINIMUM:
7141 if (Op.getValueType() == MVT::nxv32f16 &&
7142 (Subtarget.hasVInstructionsF16Minimal() &&
7143 !Subtarget.hasVInstructionsF16()))
7144 return SplitVPOp(Op, DAG);
7145 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7146 case ISD::EXPERIMENTAL_VP_SPLICE:
7147 return lowerVPSpliceExperimental(Op, DAG);
7148 case ISD::EXPERIMENTAL_VP_REVERSE:
7149 return lowerVPReverseExperimental(Op, DAG);
7150 case ISD::CLEAR_CACHE: {
7151 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7152 "llvm.clear_cache only needs custom lower on Linux targets");
7153 SDLoc DL(Op);
7154 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7155 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7156 Op.getOperand(2), Flags, DL);
7157 }
7158 }
7159}
7160
7161SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7162 SDValue Start, SDValue End,
7163 SDValue Flags, SDLoc DL) const {
7164 MakeLibCallOptions CallOptions;
7165 std::pair<SDValue, SDValue> CallResult =
7166 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7167 {Start, End, Flags}, CallOptions, DL, InChain);
7168
7169 // This function returns void so only the out chain matters.
7170 return CallResult.second;
7171}
7172
7174 SelectionDAG &DAG, unsigned Flags) {
7175 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7176}
7177
7179 SelectionDAG &DAG, unsigned Flags) {
7180 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7181 Flags);
7182}
7183
7185 SelectionDAG &DAG, unsigned Flags) {
7186 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7187 N->getOffset(), Flags);
7188}
7189
7191 SelectionDAG &DAG, unsigned Flags) {
7192 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7193}
7194
7195template <class NodeTy>
7196SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7197 bool IsLocal, bool IsExternWeak) const {
7198 SDLoc DL(N);
7199 EVT Ty = getPointerTy(DAG.getDataLayout());
7200
7201 // When HWASAN is used and tagging of global variables is enabled
7202 // they should be accessed via the GOT, since the tagged address of a global
7203 // is incompatible with existing code models. This also applies to non-pic
7204 // mode.
7205 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7206 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7207 if (IsLocal && !Subtarget.allowTaggedGlobals())
7208 // Use PC-relative addressing to access the symbol. This generates the
7209 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7210 // %pcrel_lo(auipc)).
7211 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7212
7213 // Use PC-relative addressing to access the GOT for this symbol, then load
7214 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7215 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7216 SDValue Load =
7217 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7223 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7224 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7225 return Load;
7226 }
7227
7228 switch (getTargetMachine().getCodeModel()) {
7229 default:
7230 report_fatal_error("Unsupported code model for lowering");
7231 case CodeModel::Small: {
7232 // Generate a sequence for accessing addresses within the first 2 GiB of
7233 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7234 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7235 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7236 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7237 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7238 }
7239 case CodeModel::Medium: {
7240 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7241 if (IsExternWeak) {
7242 // An extern weak symbol may be undefined, i.e. have value 0, which may
7243 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7244 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7245 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7246 SDValue Load =
7247 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7253 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7254 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7255 return Load;
7256 }
7257
7258 // Generate a sequence for accessing addresses within any 2GiB range within
7259 // the address space. This generates the pattern (PseudoLLA sym), which
7260 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7261 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7262 }
7263 }
7264}
7265
7266SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7267 SelectionDAG &DAG) const {
7268 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7269 assert(N->getOffset() == 0 && "unexpected offset in global node");
7270 const GlobalValue *GV = N->getGlobal();
7271 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7272}
7273
7274SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7275 SelectionDAG &DAG) const {
7276 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7277
7278 return getAddr(N, DAG);
7279}
7280
7281SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7282 SelectionDAG &DAG) const {
7283 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7284
7285 return getAddr(N, DAG);
7286}
7287
7288SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7289 SelectionDAG &DAG) const {
7290 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7291
7292 return getAddr(N, DAG);
7293}
7294
7295SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7296 SelectionDAG &DAG,
7297 bool UseGOT) const {
7298 SDLoc DL(N);
7299 EVT Ty = getPointerTy(DAG.getDataLayout());
7300 const GlobalValue *GV = N->getGlobal();
7301 MVT XLenVT = Subtarget.getXLenVT();
7302
7303 if (UseGOT) {
7304 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7305 // load the address from the GOT and add the thread pointer. This generates
7306 // the pattern (PseudoLA_TLS_IE sym), which expands to
7307 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7308 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7309 SDValue Load =
7310 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7316 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7317 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7318
7319 // Add the thread pointer.
7320 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7321 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7322 }
7323
7324 // Generate a sequence for accessing the address relative to the thread
7325 // pointer, with the appropriate adjustment for the thread pointer offset.
7326 // This generates the pattern
7327 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7328 SDValue AddrHi =
7330 SDValue AddrAdd =
7332 SDValue AddrLo =
7334
7335 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7336 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7337 SDValue MNAdd =
7338 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7339 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7340}
7341
7342SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7343 SelectionDAG &DAG) const {
7344 SDLoc DL(N);
7345 EVT Ty = getPointerTy(DAG.getDataLayout());
7346 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7347 const GlobalValue *GV = N->getGlobal();
7348
7349 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7350 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7351 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7352 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7353 SDValue Load =
7354 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7355
7356 // Prepare argument list to generate call.
7358 ArgListEntry Entry;
7359 Entry.Node = Load;
7360 Entry.Ty = CallTy;
7361 Args.push_back(Entry);
7362
7363 // Setup call to __tls_get_addr.
7365 CLI.setDebugLoc(DL)
7366 .setChain(DAG.getEntryNode())
7367 .setLibCallee(CallingConv::C, CallTy,
7368 DAG.getExternalSymbol("__tls_get_addr", Ty),
7369 std::move(Args));
7370
7371 return LowerCallTo(CLI).first;
7372}
7373
7374SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7375 SelectionDAG &DAG) const {
7376 SDLoc DL(N);
7377 EVT Ty = getPointerTy(DAG.getDataLayout());
7378 const GlobalValue *GV = N->getGlobal();
7379
7380 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7381 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7382 //
7383 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7384 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7385 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7386 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7387 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7388 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7389}
7390
7391SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7392 SelectionDAG &DAG) const {
7393 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7394 assert(N->getOffset() == 0 && "unexpected offset in global node");
7395
7396 if (DAG.getTarget().useEmulatedTLS())
7397 return LowerToTLSEmulatedModel(N, DAG);
7398
7400
7403 report_fatal_error("In GHC calling convention TLS is not supported");
7404
7405 SDValue Addr;
7406 switch (Model) {
7408 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7409 break;
7411 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7412 break;
7415 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7416 : getDynamicTLSAddr(N, DAG);
7417 break;
7418 }
7419
7420 return Addr;
7421}
7422
7423// Return true if Val is equal to (setcc LHS, RHS, CC).
7424// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7425// Otherwise, return std::nullopt.
7426static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7427 ISD::CondCode CC, SDValue Val) {
7428 assert(Val->getOpcode() == ISD::SETCC);
7429 SDValue LHS2 = Val.getOperand(0);
7430 SDValue RHS2 = Val.getOperand(1);
7431 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7432
7433 if (LHS == LHS2 && RHS == RHS2) {
7434 if (CC == CC2)
7435 return true;
7436 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7437 return false;
7438 } else if (LHS == RHS2 && RHS == LHS2) {
7440 if (CC == CC2)
7441 return true;
7442 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7443 return false;
7444 }
7445
7446 return std::nullopt;
7447}
7448
7450 const RISCVSubtarget &Subtarget) {
7451 SDValue CondV = N->getOperand(0);
7452 SDValue TrueV = N->getOperand(1);
7453 SDValue FalseV = N->getOperand(2);
7454 MVT VT = N->getSimpleValueType(0);
7455 SDLoc DL(N);
7456
7457 if (!Subtarget.hasConditionalMoveFusion()) {
7458 // (select c, -1, y) -> -c | y
7459 if (isAllOnesConstant(TrueV)) {
7460 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7461 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7462 }
7463 // (select c, y, -1) -> (c-1) | y
7464 if (isAllOnesConstant(FalseV)) {
7465 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7466 DAG.getAllOnesConstant(DL, VT));
7467 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7468 }
7469
7470 // (select c, 0, y) -> (c-1) & y
7471 if (isNullConstant(TrueV)) {
7472 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7473 DAG.getAllOnesConstant(DL, VT));
7474 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7475 }
7476 // (select c, y, 0) -> -c & y
7477 if (isNullConstant(FalseV)) {
7478 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7479 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7480 }
7481 }
7482
7483 // select c, ~x, x --> xor -c, x
7484 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7485 const APInt &TrueVal = TrueV->getAsAPIntVal();
7486 const APInt &FalseVal = FalseV->getAsAPIntVal();
7487 if (~TrueVal == FalseVal) {
7488 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7489 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7490 }
7491 }
7492
7493 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7494 // when both truev and falsev are also setcc.
7495 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7496 FalseV.getOpcode() == ISD::SETCC) {
7497 SDValue LHS = CondV.getOperand(0);
7498 SDValue RHS = CondV.getOperand(1);
7499 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7500
7501 // (select x, x, y) -> x | y
7502 // (select !x, x, y) -> x & y
7503 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7504 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7505 DAG.getFreeze(FalseV));
7506 }
7507 // (select x, y, x) -> x & y
7508 // (select !x, y, x) -> x | y
7509 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7510 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7511 DAG.getFreeze(TrueV), FalseV);
7512 }
7513 }
7514
7515 return SDValue();
7516}
7517
7518// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7519// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7520// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7521// being `0` or `-1`. In such cases we can replace `select` with `and`.
7522// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7523// than `c0`?
7524static SDValue
7526 const RISCVSubtarget &Subtarget) {
7527 if (Subtarget.hasShortForwardBranchOpt())
7528 return SDValue();
7529
7530 unsigned SelOpNo = 0;
7531 SDValue Sel = BO->getOperand(0);
7532 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7533 SelOpNo = 1;
7534 Sel = BO->getOperand(1);
7535 }
7536
7537 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7538 return SDValue();
7539
7540 unsigned ConstSelOpNo = 1;
7541 unsigned OtherSelOpNo = 2;
7542 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7543 ConstSelOpNo = 2;
7544 OtherSelOpNo = 1;
7545 }
7546 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7547 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7548 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7549 return SDValue();
7550
7551 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7552 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7553 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7554 return SDValue();
7555
7556 SDLoc DL(Sel);
7557 EVT VT = BO->getValueType(0);
7558
7559 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7560 if (SelOpNo == 1)
7561 std::swap(NewConstOps[0], NewConstOps[1]);
7562
7563 SDValue NewConstOp =
7564 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7565 if (!NewConstOp)
7566 return SDValue();
7567
7568 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7569 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7570 return SDValue();
7571
7572 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7573 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7574 if (SelOpNo == 1)
7575 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7576 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7577
7578 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7579 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7580 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7581}
7582
7583SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7584 SDValue CondV = Op.getOperand(0);
7585 SDValue TrueV = Op.getOperand(1);
7586 SDValue FalseV = Op.getOperand(2);
7587 SDLoc DL(Op);
7588 MVT VT = Op.getSimpleValueType();
7589 MVT XLenVT = Subtarget.getXLenVT();
7590
7591 // Lower vector SELECTs to VSELECTs by splatting the condition.
7592 if (VT.isVector()) {
7593 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7594 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7595 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7596 }
7597
7598 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7599 // nodes to implement the SELECT. Performing the lowering here allows for
7600 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7601 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7602 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7603 VT.isScalarInteger()) {
7604 // (select c, t, 0) -> (czero_eqz t, c)
7605 if (isNullConstant(FalseV))
7606 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7607 // (select c, 0, f) -> (czero_nez f, c)
7608 if (isNullConstant(TrueV))
7609 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7610
7611 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7612 if (TrueV.getOpcode() == ISD::AND &&
7613 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7614 return DAG.getNode(
7615 ISD::OR, DL, VT, TrueV,
7616 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7617 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7618 if (FalseV.getOpcode() == ISD::AND &&
7619 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7620 return DAG.getNode(
7621 ISD::OR, DL, VT, FalseV,
7622 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7623
7624 // Try some other optimizations before falling back to generic lowering.
7625 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7626 return V;
7627
7628 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7629 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7630 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7631 const APInt &TrueVal = TrueV->getAsAPIntVal();
7632 const APInt &FalseVal = FalseV->getAsAPIntVal();
7633 const int TrueValCost = RISCVMatInt::getIntMatCost(
7634 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7635 const int FalseValCost = RISCVMatInt::getIntMatCost(
7636 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7637 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7638 SDValue LHSVal = DAG.getConstant(
7639 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7640 SDValue RHSVal =
7641 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7642 SDValue CMOV =
7644 DL, VT, LHSVal, CondV);
7645 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7646 }
7647
7648 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7649 // Unless we have the short forward branch optimization.
7650 if (!Subtarget.hasConditionalMoveFusion())
7651 return DAG.getNode(
7652 ISD::OR, DL, VT,
7653 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7654 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7655 }
7656
7657 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7658 return V;
7659
7660 if (Op.hasOneUse()) {
7661 unsigned UseOpc = Op->use_begin()->getOpcode();
7662 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7663 SDNode *BinOp = *Op->use_begin();
7664 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7665 DAG, Subtarget)) {
7666 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7667 return lowerSELECT(NewSel, DAG);
7668 }
7669 }
7670 }
7671
7672 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7673 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7674 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7675 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7676 if (FPTV && FPFV) {
7677 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7678 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7679 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7680 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7681 DAG.getConstant(1, DL, XLenVT));
7682 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7683 }
7684 }
7685
7686 // If the condition is not an integer SETCC which operates on XLenVT, we need
7687 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7688 // (select condv, truev, falsev)
7689 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7690 if (CondV.getOpcode() != ISD::SETCC ||
7691 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7692 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7693 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7694
7695 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7696
7697 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7698 }
7699
7700 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7701 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7702 // advantage of the integer compare+branch instructions. i.e.:
7703 // (select (setcc lhs, rhs, cc), truev, falsev)
7704 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7705 SDValue LHS = CondV.getOperand(0);
7706 SDValue RHS = CondV.getOperand(1);
7707 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7708
7709 // Special case for a select of 2 constants that have a diffence of 1.
7710 // Normally this is done by DAGCombine, but if the select is introduced by
7711 // type legalization or op legalization, we miss it. Restricting to SETLT
7712 // case for now because that is what signed saturating add/sub need.
7713 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7714 // but we would probably want to swap the true/false values if the condition
7715 // is SETGE/SETLE to avoid an XORI.
7716 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7717 CCVal == ISD::SETLT) {
7718 const APInt &TrueVal = TrueV->getAsAPIntVal();
7719 const APInt &FalseVal = FalseV->getAsAPIntVal();
7720 if (TrueVal - 1 == FalseVal)
7721 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7722 if (TrueVal + 1 == FalseVal)
7723 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7724 }
7725
7726 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7727 // 1 < x ? x : 1 -> 0 < x ? x : 1
7728 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7729 RHS == TrueV && LHS == FalseV) {
7730 LHS = DAG.getConstant(0, DL, VT);
7731 // 0 <u x is the same as x != 0.
7732 if (CCVal == ISD::SETULT) {
7733 std::swap(LHS, RHS);
7734 CCVal = ISD::SETNE;
7735 }
7736 }
7737
7738 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7739 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7740 RHS == FalseV) {
7741 RHS = DAG.getConstant(0, DL, VT);
7742 }
7743
7744 SDValue TargetCC = DAG.getCondCode(CCVal);
7745
7746 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7747 // (select (setcc lhs, rhs, CC), constant, falsev)
7748 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7749 std::swap(TrueV, FalseV);
7750 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7751 }
7752
7753 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7754 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7755}
7756
7757SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7758 SDValue CondV = Op.getOperand(1);
7759 SDLoc DL(Op);
7760 MVT XLenVT = Subtarget.getXLenVT();
7761
7762 if (CondV.getOpcode() == ISD::SETCC &&
7763 CondV.getOperand(0).getValueType() == XLenVT) {
7764 SDValue LHS = CondV.getOperand(0);
7765 SDValue RHS = CondV.getOperand(1);
7766 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7767
7768 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7769
7770 SDValue TargetCC = DAG.getCondCode(CCVal);
7771 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7772 LHS, RHS, TargetCC, Op.getOperand(2));
7773 }
7774
7775 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7776 CondV, DAG.getConstant(0, DL, XLenVT),
7777 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7778}
7779
7780SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7783
7784 SDLoc DL(Op);
7785 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7787
7788 // vastart just stores the address of the VarArgsFrameIndex slot into the
7789 // memory location argument.
7790 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7791 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7792 MachinePointerInfo(SV));
7793}
7794
7795SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7796 SelectionDAG &DAG) const {
7797 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7799 MachineFrameInfo &MFI = MF.getFrameInfo();
7800 MFI.setFrameAddressIsTaken(true);
7801 Register FrameReg = RI.getFrameRegister(MF);
7802 int XLenInBytes = Subtarget.getXLen() / 8;
7803
7804 EVT VT = Op.getValueType();
7805 SDLoc DL(Op);
7806 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7807 unsigned Depth = Op.getConstantOperandVal(0);
7808 while (Depth--) {
7809 int Offset = -(XLenInBytes * 2);
7810 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7812 FrameAddr =
7813 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7814 }
7815 return FrameAddr;
7816}
7817
7818SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7819 SelectionDAG &DAG) const {
7820 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7822 MachineFrameInfo &MFI = MF.getFrameInfo();
7823 MFI.setReturnAddressIsTaken(true);
7824 MVT XLenVT = Subtarget.getXLenVT();
7825 int XLenInBytes = Subtarget.getXLen() / 8;
7826
7828 return SDValue();
7829
7830 EVT VT = Op.getValueType();
7831 SDLoc DL(Op);
7832 unsigned Depth = Op.getConstantOperandVal(0);
7833 if (Depth) {
7834 int Off = -XLenInBytes;
7835 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7836 SDValue Offset = DAG.getConstant(Off, DL, VT);
7837 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7838 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7840 }
7841
7842 // Return the value of the return address register, marking it an implicit
7843 // live-in.
7844 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7845 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7846}
7847
7848SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7849 SelectionDAG &DAG) const {
7850 SDLoc DL(Op);
7851 SDValue Lo = Op.getOperand(0);
7852 SDValue Hi = Op.getOperand(1);
7853 SDValue Shamt = Op.getOperand(2);
7854 EVT VT = Lo.getValueType();
7855
7856 // if Shamt-XLEN < 0: // Shamt < XLEN
7857 // Lo = Lo << Shamt
7858 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7859 // else:
7860 // Lo = 0
7861 // Hi = Lo << (Shamt-XLEN)
7862
7863 SDValue Zero = DAG.getConstant(0, DL, VT);
7864 SDValue One = DAG.getConstant(1, DL, VT);
7865 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7866 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7867 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7868 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7869
7870 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7871 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7872 SDValue ShiftRightLo =
7873 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7874 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7875 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7876 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7877
7878 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7879
7880 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7881 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7882
7883 SDValue Parts[2] = {Lo, Hi};
7884 return DAG.getMergeValues(Parts, DL);
7885}
7886
7887SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7888 bool IsSRA) const {
7889 SDLoc DL(Op);
7890 SDValue Lo = Op.getOperand(0);
7891 SDValue Hi = Op.getOperand(1);
7892 SDValue Shamt = Op.getOperand(2);
7893 EVT VT = Lo.getValueType();
7894
7895 // SRA expansion:
7896 // if Shamt-XLEN < 0: // Shamt < XLEN
7897 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7898 // Hi = Hi >>s Shamt
7899 // else:
7900 // Lo = Hi >>s (Shamt-XLEN);
7901 // Hi = Hi >>s (XLEN-1)
7902 //
7903 // SRL expansion:
7904 // if Shamt-XLEN < 0: // Shamt < XLEN
7905 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7906 // Hi = Hi >>u Shamt
7907 // else:
7908 // Lo = Hi >>u (Shamt-XLEN);
7909 // Hi = 0;
7910
7911 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7912
7913 SDValue Zero = DAG.getConstant(0, DL, VT);
7914 SDValue One = DAG.getConstant(1, DL, VT);
7915 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7916 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7917 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7918 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7919
7920 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7921 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7922 SDValue ShiftLeftHi =
7923 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7924 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7925 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7926 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7927 SDValue HiFalse =
7928 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7929
7930 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7931
7932 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7933 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7934
7935 SDValue Parts[2] = {Lo, Hi};
7936 return DAG.getMergeValues(Parts, DL);
7937}
7938
7939// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7940// legal equivalently-sized i8 type, so we can use that as a go-between.
7941SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7942 SelectionDAG &DAG) const {
7943 SDLoc DL(Op);
7944 MVT VT = Op.getSimpleValueType();
7945 SDValue SplatVal = Op.getOperand(0);
7946 // All-zeros or all-ones splats are handled specially.
7947 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7948 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7949 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7950 }
7951 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7952 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7953 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7954 }
7955 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7956 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7957 DAG.getConstant(1, DL, SplatVal.getValueType()));
7958 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7959 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7960 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7961}
7962
7963// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7964// illegal (currently only vXi64 RV32).
7965// FIXME: We could also catch non-constant sign-extended i32 values and lower
7966// them to VMV_V_X_VL.
7967SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7968 SelectionDAG &DAG) const {
7969 SDLoc DL(Op);
7970 MVT VecVT = Op.getSimpleValueType();
7971 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7972 "Unexpected SPLAT_VECTOR_PARTS lowering");
7973
7974 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7975 SDValue Lo = Op.getOperand(0);
7976 SDValue Hi = Op.getOperand(1);
7977
7978 MVT ContainerVT = VecVT;
7979 if (VecVT.isFixedLengthVector())
7980 ContainerVT = getContainerForFixedLengthVector(VecVT);
7981
7982 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7983
7984 SDValue Res =
7985 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7986
7987 if (VecVT.isFixedLengthVector())
7988 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7989
7990 return Res;
7991}
7992
7993// Custom-lower extensions from mask vectors by using a vselect either with 1
7994// for zero/any-extension or -1 for sign-extension:
7995// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7996// Note that any-extension is lowered identically to zero-extension.
7997SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7998 int64_t ExtTrueVal) const {
7999 SDLoc DL(Op);
8000 MVT VecVT = Op.getSimpleValueType();
8001 SDValue Src = Op.getOperand(0);
8002 // Only custom-lower extensions from mask types
8003 assert(Src.getValueType().isVector() &&
8004 Src.getValueType().getVectorElementType() == MVT::i1);
8005
8006 if (VecVT.isScalableVector()) {
8007 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8008 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
8009 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8010 }
8011
8012 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8013 MVT I1ContainerVT =
8014 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8015
8016 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8017
8018 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8019
8020 MVT XLenVT = Subtarget.getXLenVT();
8021 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8022 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
8023
8024 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8025 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8026 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8027 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8028 SDValue Select =
8029 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8030 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8031
8032 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8033}
8034
8035SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8036 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8037 MVT ExtVT = Op.getSimpleValueType();
8038 // Only custom-lower extensions from fixed-length vector types.
8039 if (!ExtVT.isFixedLengthVector())
8040 return Op;
8041 MVT VT = Op.getOperand(0).getSimpleValueType();
8042 // Grab the canonical container type for the extended type. Infer the smaller
8043 // type from that to ensure the same number of vector elements, as we know
8044 // the LMUL will be sufficient to hold the smaller type.
8045 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8046 // Get the extended container type manually to ensure the same number of
8047 // vector elements between source and dest.
8048 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8049 ContainerExtVT.getVectorElementCount());
8050
8051 SDValue Op1 =
8052 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8053
8054 SDLoc DL(Op);
8055 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8056
8057 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8058
8059 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8060}
8061
8062// Custom-lower truncations from vectors to mask vectors by using a mask and a
8063// setcc operation:
8064// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8065SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8066 SelectionDAG &DAG) const {
8067 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8068 SDLoc DL(Op);
8069 EVT MaskVT = Op.getValueType();
8070 // Only expect to custom-lower truncations to mask types
8071 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8072 "Unexpected type for vector mask lowering");
8073 SDValue Src = Op.getOperand(0);
8074 MVT VecVT = Src.getSimpleValueType();
8075 SDValue Mask, VL;
8076 if (IsVPTrunc) {
8077 Mask = Op.getOperand(1);
8078 VL = Op.getOperand(2);
8079 }
8080 // If this is a fixed vector, we need to convert it to a scalable vector.
8081 MVT ContainerVT = VecVT;
8082
8083 if (VecVT.isFixedLengthVector()) {
8084 ContainerVT = getContainerForFixedLengthVector(VecVT);
8085 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8086 if (IsVPTrunc) {
8087 MVT MaskContainerVT =
8088 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8089 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8090 }
8091 }
8092
8093 if (!IsVPTrunc) {
8094 std::tie(Mask, VL) =
8095 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8096 }
8097
8098 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8099 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8100
8101 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8102 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8103 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8104 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8105
8106 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8107 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8108 DAG.getUNDEF(ContainerVT), Mask, VL);
8109 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8110 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8111 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8112 if (MaskVT.isFixedLengthVector())
8113 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8114 return Trunc;
8115}
8116
8117SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8118 SelectionDAG &DAG) const {
8119 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8120 SDLoc DL(Op);
8121
8122 MVT VT = Op.getSimpleValueType();
8123 // Only custom-lower vector truncates
8124 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8125
8126 // Truncates to mask types are handled differently
8127 if (VT.getVectorElementType() == MVT::i1)
8128 return lowerVectorMaskTruncLike(Op, DAG);
8129
8130 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8131 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8132 // truncate by one power of two at a time.
8133 MVT DstEltVT = VT.getVectorElementType();
8134
8135 SDValue Src = Op.getOperand(0);
8136 MVT SrcVT = Src.getSimpleValueType();
8137 MVT SrcEltVT = SrcVT.getVectorElementType();
8138
8139 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8140 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8141 "Unexpected vector truncate lowering");
8142
8143 MVT ContainerVT = SrcVT;
8144 SDValue Mask, VL;
8145 if (IsVPTrunc) {
8146 Mask = Op.getOperand(1);
8147 VL = Op.getOperand(2);
8148 }
8149 if (SrcVT.isFixedLengthVector()) {
8150 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8151 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8152 if (IsVPTrunc) {
8153 MVT MaskVT = getMaskTypeFor(ContainerVT);
8154 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8155 }
8156 }
8157
8158 SDValue Result = Src;
8159 if (!IsVPTrunc) {
8160 std::tie(Mask, VL) =
8161 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8162 }
8163
8164 LLVMContext &Context = *DAG.getContext();
8165 const ElementCount Count = ContainerVT.getVectorElementCount();
8166 do {
8167 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8168 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8169 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8170 Mask, VL);
8171 } while (SrcEltVT != DstEltVT);
8172
8173 if (SrcVT.isFixedLengthVector())
8174 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8175
8176 return Result;
8177}
8178
8179SDValue
8180RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8181 SelectionDAG &DAG) const {
8182 SDLoc DL(Op);
8183 SDValue Chain = Op.getOperand(0);
8184 SDValue Src = Op.getOperand(1);
8185 MVT VT = Op.getSimpleValueType();
8186 MVT SrcVT = Src.getSimpleValueType();
8187 MVT ContainerVT = VT;
8188 if (VT.isFixedLengthVector()) {
8189 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8190 ContainerVT =
8191 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8192 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8193 }
8194
8195 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8196
8197 // RVV can only widen/truncate fp to types double/half the size as the source.
8198 if ((VT.getVectorElementType() == MVT::f64 &&
8199 (SrcVT.getVectorElementType() == MVT::f16 ||
8200 SrcVT.getVectorElementType() == MVT::bf16)) ||
8201 ((VT.getVectorElementType() == MVT::f16 ||
8202 VT.getVectorElementType() == MVT::bf16) &&
8203 SrcVT.getVectorElementType() == MVT::f64)) {
8204 // For double rounding, the intermediate rounding should be round-to-odd.
8205 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8208 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8209 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8210 Chain, Src, Mask, VL);
8211 Chain = Src.getValue(1);
8212 }
8213
8214 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8217 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8218 Chain, Src, Mask, VL);
8219 if (VT.isFixedLengthVector()) {
8220 // StrictFP operations have two result values. Their lowered result should
8221 // have same result count.
8222 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8223 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8224 }
8225 return Res;
8226}
8227
8228SDValue
8229RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8230 SelectionDAG &DAG) const {
8231 bool IsVP =
8232 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8233 bool IsExtend =
8234 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8235 // RVV can only do truncate fp to types half the size as the source. We
8236 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8237 // conversion instruction.
8238 SDLoc DL(Op);
8239 MVT VT = Op.getSimpleValueType();
8240
8241 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8242
8243 SDValue Src = Op.getOperand(0);
8244 MVT SrcVT = Src.getSimpleValueType();
8245
8246 bool IsDirectExtend =
8247 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8248 (SrcVT.getVectorElementType() != MVT::f16 &&
8249 SrcVT.getVectorElementType() != MVT::bf16));
8250 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8251 VT.getVectorElementType() != MVT::bf16) ||
8252 SrcVT.getVectorElementType() != MVT::f64);
8253
8254 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8255
8256 // Prepare any fixed-length vector operands.
8257 MVT ContainerVT = VT;
8258 SDValue Mask, VL;
8259 if (IsVP) {
8260 Mask = Op.getOperand(1);
8261 VL = Op.getOperand(2);
8262 }
8263 if (VT.isFixedLengthVector()) {
8264 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8265 ContainerVT =
8266 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8267 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8268 if (IsVP) {
8269 MVT MaskVT = getMaskTypeFor(ContainerVT);
8270 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8271 }
8272 }
8273
8274 if (!IsVP)
8275 std::tie(Mask, VL) =
8276 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8277
8278 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8279
8280 if (IsDirectConv) {
8281 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8282 if (VT.isFixedLengthVector())
8283 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8284 return Src;
8285 }
8286
8287 unsigned InterConvOpc =
8289
8290 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8291 SDValue IntermediateConv =
8292 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8293 SDValue Result =
8294 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8295 if (VT.isFixedLengthVector())
8296 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8297 return Result;
8298}
8299
8300// Given a scalable vector type and an index into it, returns the type for the
8301// smallest subvector that the index fits in. This can be used to reduce LMUL
8302// for operations like vslidedown.
8303//
8304// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8305static std::optional<MVT>
8306getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8307 const RISCVSubtarget &Subtarget) {
8308 assert(VecVT.isScalableVector());
8309 const unsigned EltSize = VecVT.getScalarSizeInBits();
8310 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8311 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8312 MVT SmallerVT;
8313 if (MaxIdx < MinVLMAX)
8314 SmallerVT = getLMUL1VT(VecVT);
8315 else if (MaxIdx < MinVLMAX * 2)
8316 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8317 else if (MaxIdx < MinVLMAX * 4)
8318 SmallerVT = getLMUL1VT(VecVT)
8321 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8322 return std::nullopt;
8323 return SmallerVT;
8324}
8325
8326// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8327// first position of a vector, and that vector is slid up to the insert index.
8328// By limiting the active vector length to index+1 and merging with the
8329// original vector (with an undisturbed tail policy for elements >= VL), we
8330// achieve the desired result of leaving all elements untouched except the one
8331// at VL-1, which is replaced with the desired value.
8332SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8333 SelectionDAG &DAG) const {
8334 SDLoc DL(Op);
8335 MVT VecVT = Op.getSimpleValueType();
8336 SDValue Vec = Op.getOperand(0);
8337 SDValue Val = Op.getOperand(1);
8338 SDValue Idx = Op.getOperand(2);
8339
8340 if (VecVT.getVectorElementType() == MVT::i1) {
8341 // FIXME: For now we just promote to an i8 vector and insert into that,
8342 // but this is probably not optimal.
8343 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8344 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8345 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8346 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8347 }
8348
8349 MVT ContainerVT = VecVT;
8350 // If the operand is a fixed-length vector, convert to a scalable one.
8351 if (VecVT.isFixedLengthVector()) {
8352 ContainerVT = getContainerForFixedLengthVector(VecVT);
8353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8354 }
8355
8356 // If we know the index we're going to insert at, we can shrink Vec so that
8357 // we're performing the scalar inserts and slideup on a smaller LMUL.
8358 MVT OrigContainerVT = ContainerVT;
8359 SDValue OrigVec = Vec;
8360 SDValue AlignedIdx;
8361 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8362 const unsigned OrigIdx = IdxC->getZExtValue();
8363 // Do we know an upper bound on LMUL?
8364 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8365 DL, DAG, Subtarget)) {
8366 ContainerVT = *ShrunkVT;
8367 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8368 }
8369
8370 // If we're compiling for an exact VLEN value, we can always perform
8371 // the insert in m1 as we can determine the register corresponding to
8372 // the index in the register group.
8373 const MVT M1VT = getLMUL1VT(ContainerVT);
8374 if (auto VLEN = Subtarget.getRealVLen();
8375 VLEN && ContainerVT.bitsGT(M1VT)) {
8376 EVT ElemVT = VecVT.getVectorElementType();
8377 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8378 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8379 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8380 unsigned ExtractIdx =
8381 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8382 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8383 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8384 ContainerVT = M1VT;
8385 }
8386
8387 if (AlignedIdx)
8388 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8389 AlignedIdx);
8390 }
8391
8392 MVT XLenVT = Subtarget.getXLenVT();
8393
8394 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8395 // Even i64-element vectors on RV32 can be lowered without scalar
8396 // legalization if the most-significant 32 bits of the value are not affected
8397 // by the sign-extension of the lower 32 bits.
8398 // TODO: We could also catch sign extensions of a 32-bit value.
8399 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8400 const auto *CVal = cast<ConstantSDNode>(Val);
8401 if (isInt<32>(CVal->getSExtValue())) {
8402 IsLegalInsert = true;
8403 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8404 }
8405 }
8406
8407 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8408
8409 SDValue ValInVec;
8410
8411 if (IsLegalInsert) {
8412 unsigned Opc =
8414 if (isNullConstant(Idx)) {
8415 if (!VecVT.isFloatingPoint())
8416 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8417 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8418
8419 if (AlignedIdx)
8420 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8421 Vec, AlignedIdx);
8422 if (!VecVT.isFixedLengthVector())
8423 return Vec;
8424 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8425 }
8426 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8427 } else {
8428 // On RV32, i64-element vectors must be specially handled to place the
8429 // value at element 0, by using two vslide1down instructions in sequence on
8430 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8431 // this.
8432 SDValue ValLo, ValHi;
8433 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8434 MVT I32ContainerVT =
8435 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8436 SDValue I32Mask =
8437 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8438 // Limit the active VL to two.
8439 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8440 // If the Idx is 0 we can insert directly into the vector.
8441 if (isNullConstant(Idx)) {
8442 // First slide in the lo value, then the hi in above it. We use slide1down
8443 // to avoid the register group overlap constraint of vslide1up.
8444 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8445 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8446 // If the source vector is undef don't pass along the tail elements from
8447 // the previous slide1down.
8448 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8449 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8450 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8451 // Bitcast back to the right container type.
8452 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8453
8454 if (AlignedIdx)
8455 ValInVec =
8456 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8457 ValInVec, AlignedIdx);
8458 if (!VecVT.isFixedLengthVector())
8459 return ValInVec;
8460 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8461 }
8462
8463 // First slide in the lo value, then the hi in above it. We use slide1down
8464 // to avoid the register group overlap constraint of vslide1up.
8465 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8466 DAG.getUNDEF(I32ContainerVT),
8467 DAG.getUNDEF(I32ContainerVT), ValLo,
8468 I32Mask, InsertI64VL);
8469 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8470 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8471 I32Mask, InsertI64VL);
8472 // Bitcast back to the right container type.
8473 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8474 }
8475
8476 // Now that the value is in a vector, slide it into position.
8477 SDValue InsertVL =
8478 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8479
8480 // Use tail agnostic policy if Idx is the last index of Vec.
8482 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8483 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8484 Policy = RISCVII::TAIL_AGNOSTIC;
8485 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8486 Idx, Mask, InsertVL, Policy);
8487
8488 if (AlignedIdx)
8489 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8490 Slideup, AlignedIdx);
8491 if (!VecVT.isFixedLengthVector())
8492 return Slideup;
8493 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8494}
8495
8496// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8497// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8498// types this is done using VMV_X_S to allow us to glean information about the
8499// sign bits of the result.
8500SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8501 SelectionDAG &DAG) const {
8502 SDLoc DL(Op);
8503 SDValue Idx = Op.getOperand(1);
8504 SDValue Vec = Op.getOperand(0);
8505 EVT EltVT = Op.getValueType();
8506 MVT VecVT = Vec.getSimpleValueType();
8507 MVT XLenVT = Subtarget.getXLenVT();
8508
8509 if (VecVT.getVectorElementType() == MVT::i1) {
8510 // Use vfirst.m to extract the first bit.
8511 if (isNullConstant(Idx)) {
8512 MVT ContainerVT = VecVT;
8513 if (VecVT.isFixedLengthVector()) {
8514 ContainerVT = getContainerForFixedLengthVector(VecVT);
8515 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8516 }
8517 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8518 SDValue Vfirst =
8519 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8520 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8521 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8522 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8523 }
8524 if (VecVT.isFixedLengthVector()) {
8525 unsigned NumElts = VecVT.getVectorNumElements();
8526 if (NumElts >= 8) {
8527 MVT WideEltVT;
8528 unsigned WidenVecLen;
8529 SDValue ExtractElementIdx;
8530 SDValue ExtractBitIdx;
8531 unsigned MaxEEW = Subtarget.getELen();
8532 MVT LargestEltVT = MVT::getIntegerVT(
8533 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8534 if (NumElts <= LargestEltVT.getSizeInBits()) {
8535 assert(isPowerOf2_32(NumElts) &&
8536 "the number of elements should be power of 2");
8537 WideEltVT = MVT::getIntegerVT(NumElts);
8538 WidenVecLen = 1;
8539 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8540 ExtractBitIdx = Idx;
8541 } else {
8542 WideEltVT = LargestEltVT;
8543 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8544 // extract element index = index / element width
8545 ExtractElementIdx = DAG.getNode(
8546 ISD::SRL, DL, XLenVT, Idx,
8547 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8548 // mask bit index = index % element width
8549 ExtractBitIdx = DAG.getNode(
8550 ISD::AND, DL, XLenVT, Idx,
8551 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8552 }
8553 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8554 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8555 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8556 Vec, ExtractElementIdx);
8557 // Extract the bit from GPR.
8558 SDValue ShiftRight =
8559 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8560 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8561 DAG.getConstant(1, DL, XLenVT));
8562 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8563 }
8564 }
8565 // Otherwise, promote to an i8 vector and extract from that.
8566 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8567 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8568 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8569 }
8570
8571 // If this is a fixed vector, we need to convert it to a scalable vector.
8572 MVT ContainerVT = VecVT;
8573 if (VecVT.isFixedLengthVector()) {
8574 ContainerVT = getContainerForFixedLengthVector(VecVT);
8575 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8576 }
8577
8578 // If we're compiling for an exact VLEN value and we have a known
8579 // constant index, we can always perform the extract in m1 (or
8580 // smaller) as we can determine the register corresponding to
8581 // the index in the register group.
8582 const auto VLen = Subtarget.getRealVLen();
8583 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8584 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8585 MVT M1VT = getLMUL1VT(ContainerVT);
8586 unsigned OrigIdx = IdxC->getZExtValue();
8587 EVT ElemVT = VecVT.getVectorElementType();
8588 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8589 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8590 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8591 unsigned ExtractIdx =
8592 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8593 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8594 DAG.getVectorIdxConstant(ExtractIdx, DL));
8595 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8596 ContainerVT = M1VT;
8597 }
8598
8599 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8600 // contains our index.
8601 std::optional<uint64_t> MaxIdx;
8602 if (VecVT.isFixedLengthVector())
8603 MaxIdx = VecVT.getVectorNumElements() - 1;
8604 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8605 MaxIdx = IdxC->getZExtValue();
8606 if (MaxIdx) {
8607 if (auto SmallerVT =
8608 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8609 ContainerVT = *SmallerVT;
8610 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8611 DAG.getConstant(0, DL, XLenVT));
8612 }
8613 }
8614
8615 // If after narrowing, the required slide is still greater than LMUL2,
8616 // fallback to generic expansion and go through the stack. This is done
8617 // for a subtle reason: extracting *all* elements out of a vector is
8618 // widely expected to be linear in vector size, but because vslidedown
8619 // is linear in LMUL, performing N extracts using vslidedown becomes
8620 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8621 // seems to have the same problem (the store is linear in LMUL), but the
8622 // generic expansion *memoizes* the store, and thus for many extracts of
8623 // the same vector we end up with one store and a bunch of loads.
8624 // TODO: We don't have the same code for insert_vector_elt because we
8625 // have BUILD_VECTOR and handle the degenerate case there. Should we
8626 // consider adding an inverse BUILD_VECTOR node?
8627 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8628 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8629 return SDValue();
8630
8631 // If the index is 0, the vector is already in the right position.
8632 if (!isNullConstant(Idx)) {
8633 // Use a VL of 1 to avoid processing more elements than we need.
8634 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8635 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8636 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8637 }
8638
8639 if (!EltVT.isInteger()) {
8640 // Floating-point extracts are handled in TableGen.
8641 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8642 DAG.getVectorIdxConstant(0, DL));
8643 }
8644
8645 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8646 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8647}
8648
8649// Some RVV intrinsics may claim that they want an integer operand to be
8650// promoted or expanded.
8652 const RISCVSubtarget &Subtarget) {
8653 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8654 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8655 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8656 "Unexpected opcode");
8657
8658 if (!Subtarget.hasVInstructions())
8659 return SDValue();
8660
8661 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8662 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8663 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8664
8665 SDLoc DL(Op);
8666
8668 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8669 if (!II || !II->hasScalarOperand())
8670 return SDValue();
8671
8672 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8673 assert(SplatOp < Op.getNumOperands());
8674
8675 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8676 SDValue &ScalarOp = Operands[SplatOp];
8677 MVT OpVT = ScalarOp.getSimpleValueType();
8678 MVT XLenVT = Subtarget.getXLenVT();
8679
8680 // If this isn't a scalar, or its type is XLenVT we're done.
8681 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8682 return SDValue();
8683
8684 // Simplest case is that the operand needs to be promoted to XLenVT.
8685 if (OpVT.bitsLT(XLenVT)) {
8686 // If the operand is a constant, sign extend to increase our chances
8687 // of being able to use a .vi instruction. ANY_EXTEND would become a
8688 // a zero extend and the simm5 check in isel would fail.
8689 // FIXME: Should we ignore the upper bits in isel instead?
8690 unsigned ExtOpc =
8691 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8692 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8693 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8694 }
8695
8696 // Use the previous operand to get the vXi64 VT. The result might be a mask
8697 // VT for compares. Using the previous operand assumes that the previous
8698 // operand will never have a smaller element size than a scalar operand and
8699 // that a widening operation never uses SEW=64.
8700 // NOTE: If this fails the below assert, we can probably just find the
8701 // element count from any operand or result and use it to construct the VT.
8702 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8703 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8704
8705 // The more complex case is when the scalar is larger than XLenVT.
8706 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8707 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8708
8709 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8710 // instruction to sign-extend since SEW>XLEN.
8711 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8712 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8713 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8714 }
8715
8716 switch (IntNo) {
8717 case Intrinsic::riscv_vslide1up:
8718 case Intrinsic::riscv_vslide1down:
8719 case Intrinsic::riscv_vslide1up_mask:
8720 case Intrinsic::riscv_vslide1down_mask: {
8721 // We need to special case these when the scalar is larger than XLen.
8722 unsigned NumOps = Op.getNumOperands();
8723 bool IsMasked = NumOps == 7;
8724
8725 // Convert the vector source to the equivalent nxvXi32 vector.
8726 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8727 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8728 SDValue ScalarLo, ScalarHi;
8729 std::tie(ScalarLo, ScalarHi) =
8730 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8731
8732 // Double the VL since we halved SEW.
8733 SDValue AVL = getVLOperand(Op);
8734 SDValue I32VL;
8735
8736 // Optimize for constant AVL
8737 if (isa<ConstantSDNode>(AVL)) {
8738 const auto [MinVLMAX, MaxVLMAX] =
8740
8741 uint64_t AVLInt = AVL->getAsZExtVal();
8742 if (AVLInt <= MinVLMAX) {
8743 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8744 } else if (AVLInt >= 2 * MaxVLMAX) {
8745 // Just set vl to VLMAX in this situation
8747 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8748 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8749 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8750 SDValue SETVLMAX = DAG.getTargetConstant(
8751 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8752 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8753 LMUL);
8754 } else {
8755 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8756 // is related to the hardware implementation.
8757 // So let the following code handle
8758 }
8759 }
8760 if (!I32VL) {
8762 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8763 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8764 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8765 SDValue SETVL =
8766 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8767 // Using vsetvli instruction to get actually used length which related to
8768 // the hardware implementation
8769 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8770 SEW, LMUL);
8771 I32VL =
8772 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8773 }
8774
8775 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8776
8777 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8778 // instructions.
8779 SDValue Passthru;
8780 if (IsMasked)
8781 Passthru = DAG.getUNDEF(I32VT);
8782 else
8783 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8784
8785 if (IntNo == Intrinsic::riscv_vslide1up ||
8786 IntNo == Intrinsic::riscv_vslide1up_mask) {
8787 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8788 ScalarHi, I32Mask, I32VL);
8789 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8790 ScalarLo, I32Mask, I32VL);
8791 } else {
8792 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8793 ScalarLo, I32Mask, I32VL);
8794 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8795 ScalarHi, I32Mask, I32VL);
8796 }
8797
8798 // Convert back to nxvXi64.
8799 Vec = DAG.getBitcast(VT, Vec);
8800
8801 if (!IsMasked)
8802 return Vec;
8803 // Apply mask after the operation.
8804 SDValue Mask = Operands[NumOps - 3];
8805 SDValue MaskedOff = Operands[1];
8806 // Assume Policy operand is the last operand.
8807 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8808 // We don't need to select maskedoff if it's undef.
8809 if (MaskedOff.isUndef())
8810 return Vec;
8811 // TAMU
8812 if (Policy == RISCVII::TAIL_AGNOSTIC)
8813 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8814 DAG.getUNDEF(VT), AVL);
8815 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8816 // It's fine because vmerge does not care mask policy.
8817 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8818 MaskedOff, AVL);
8819 }
8820 }
8821
8822 // We need to convert the scalar to a splat vector.
8823 SDValue VL = getVLOperand(Op);
8824 assert(VL.getValueType() == XLenVT);
8825 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8826 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8827}
8828
8829// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8830// scalable vector llvm.get.vector.length for now.
8831//
8832// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8833// (vscale * VF). The vscale and VF are independent of element width. We use
8834// SEW=8 for the vsetvli because it is the only element width that supports all
8835// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8836// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8837// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8838// SEW and LMUL are better for the surrounding vector instructions.
8840 const RISCVSubtarget &Subtarget) {
8841 MVT XLenVT = Subtarget.getXLenVT();
8842
8843 // The smallest LMUL is only valid for the smallest element width.
8844 const unsigned ElementWidth = 8;
8845
8846 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8847 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8848 // We don't support VF==1 with ELEN==32.
8849 [[maybe_unused]] unsigned MinVF =
8850 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8851
8852 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8853 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8854 "Unexpected VF");
8855
8856 bool Fractional = VF < LMul1VF;
8857 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8858 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8859 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8860
8861 SDLoc DL(N);
8862
8863 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8864 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8865
8866 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8867
8868 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8869 SDValue Res =
8870 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8871 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8872}
8873
8875 const RISCVSubtarget &Subtarget) {
8876 SDValue Op0 = N->getOperand(1);
8877 MVT OpVT = Op0.getSimpleValueType();
8878 MVT ContainerVT = OpVT;
8879 if (OpVT.isFixedLengthVector()) {
8880 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8881 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8882 }
8883 MVT XLenVT = Subtarget.getXLenVT();
8884 SDLoc DL(N);
8885 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8886 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8887 if (isOneConstant(N->getOperand(2)))
8888 return Res;
8889
8890 // Convert -1 to VL.
8891 SDValue Setcc =
8892 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8893 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8894 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8895}
8896
8897static inline void promoteVCIXScalar(const SDValue &Op,
8899 SelectionDAG &DAG) {
8900 const RISCVSubtarget &Subtarget =
8902
8903 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8904 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8905 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8906 SDLoc DL(Op);
8907
8909 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8910 if (!II || !II->hasScalarOperand())
8911 return;
8912
8913 unsigned SplatOp = II->ScalarOperand + 1;
8914 assert(SplatOp < Op.getNumOperands());
8915
8916 SDValue &ScalarOp = Operands[SplatOp];
8917 MVT OpVT = ScalarOp.getSimpleValueType();
8918 MVT XLenVT = Subtarget.getXLenVT();
8919
8920 // The code below is partially copied from lowerVectorIntrinsicScalars.
8921 // If this isn't a scalar, or its type is XLenVT we're done.
8922 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8923 return;
8924
8925 // Manually emit promote operation for scalar operation.
8926 if (OpVT.bitsLT(XLenVT)) {
8927 unsigned ExtOpc =
8928 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8929 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8930 }
8931
8932 return;
8933}
8934
8935static void processVCIXOperands(SDValue &OrigOp,
8937 SelectionDAG &DAG) {
8938 promoteVCIXScalar(OrigOp, Operands, DAG);
8939 const RISCVSubtarget &Subtarget =
8941 for (SDValue &V : Operands) {
8942 EVT ValType = V.getValueType();
8943 if (ValType.isVector() && ValType.isFloatingPoint()) {
8944 MVT InterimIVT =
8945 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8946 ValType.getVectorElementCount());
8947 V = DAG.getBitcast(InterimIVT, V);
8948 }
8949 if (ValType.isFixedLengthVector()) {
8950 MVT OpContainerVT = getContainerForFixedLengthVector(
8951 DAG, V.getSimpleValueType(), Subtarget);
8952 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8953 }
8954 }
8955}
8956
8957// LMUL * VLEN should be greater than or equal to EGS * SEW
8958static inline bool isValidEGW(int EGS, EVT VT,
8959 const RISCVSubtarget &Subtarget) {
8960 return (Subtarget.getRealMinVLen() *
8962 EGS * VT.getScalarSizeInBits();
8963}
8964
8965SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8966 SelectionDAG &DAG) const {
8967 unsigned IntNo = Op.getConstantOperandVal(0);
8968 SDLoc DL(Op);
8969 MVT XLenVT = Subtarget.getXLenVT();
8970
8971 switch (IntNo) {
8972 default:
8973 break; // Don't custom lower most intrinsics.
8974 case Intrinsic::thread_pointer: {
8975 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8976 return DAG.getRegister(RISCV::X4, PtrVT);
8977 }
8978 case Intrinsic::riscv_orc_b:
8979 case Intrinsic::riscv_brev8:
8980 case Intrinsic::riscv_sha256sig0:
8981 case Intrinsic::riscv_sha256sig1:
8982 case Intrinsic::riscv_sha256sum0:
8983 case Intrinsic::riscv_sha256sum1:
8984 case Intrinsic::riscv_sm3p0:
8985 case Intrinsic::riscv_sm3p1: {
8986 unsigned Opc;
8987 switch (IntNo) {
8988 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8989 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8990 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8991 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8992 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8993 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8994 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8995 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8996 }
8997
8998 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8999 SDValue NewOp =
9000 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9001 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
9002 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9003 }
9004
9005 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9006 }
9007 case Intrinsic::riscv_sm4ks:
9008 case Intrinsic::riscv_sm4ed: {
9009 unsigned Opc =
9010 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9011
9012 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9013 SDValue NewOp0 =
9014 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9015 SDValue NewOp1 =
9016 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9017 SDValue Res =
9018 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
9019 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9020 }
9021
9022 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9023 Op.getOperand(3));
9024 }
9025 case Intrinsic::riscv_zip:
9026 case Intrinsic::riscv_unzip: {
9027 unsigned Opc =
9028 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9029 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9030 }
9031 case Intrinsic::riscv_mopr: {
9032 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9033 SDValue NewOp =
9034 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9035 SDValue Res = DAG.getNode(
9036 RISCVISD::MOPR, DL, MVT::i64, NewOp,
9037 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
9038 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9039 }
9040 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9041 Op.getOperand(2));
9042 }
9043
9044 case Intrinsic::riscv_moprr: {
9045 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9046 SDValue NewOp0 =
9047 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9048 SDValue NewOp1 =
9049 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9050 SDValue Res = DAG.getNode(
9051 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
9052 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
9053 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9054 }
9055 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9056 Op.getOperand(2), Op.getOperand(3));
9057 }
9058 case Intrinsic::riscv_clmul:
9059 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9060 SDValue NewOp0 =
9061 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9062 SDValue NewOp1 =
9063 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9064 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
9065 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9066 }
9067 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9068 Op.getOperand(2));
9069 case Intrinsic::riscv_clmulh:
9070 case Intrinsic::riscv_clmulr: {
9071 unsigned Opc =
9072 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9073 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9074 SDValue NewOp0 =
9075 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9076 SDValue NewOp1 =
9077 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9078 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
9079 DAG.getConstant(32, DL, MVT::i64));
9080 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
9081 DAG.getConstant(32, DL, MVT::i64));
9082 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
9083 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
9084 DAG.getConstant(32, DL, MVT::i64));
9085 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9086 }
9087
9088 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9089 }
9090 case Intrinsic::experimental_get_vector_length:
9091 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9092 case Intrinsic::experimental_cttz_elts:
9093 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9094 case Intrinsic::riscv_vmv_x_s: {
9095 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9096 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9097 }
9098 case Intrinsic::riscv_vfmv_f_s:
9099 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9100 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9101 case Intrinsic::riscv_vmv_v_x:
9102 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9103 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9104 Subtarget);
9105 case Intrinsic::riscv_vfmv_v_f:
9106 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9107 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9108 case Intrinsic::riscv_vmv_s_x: {
9109 SDValue Scalar = Op.getOperand(2);
9110
9111 if (Scalar.getValueType().bitsLE(XLenVT)) {
9112 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9113 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9114 Op.getOperand(1), Scalar, Op.getOperand(3));
9115 }
9116
9117 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9118
9119 // This is an i64 value that lives in two scalar registers. We have to
9120 // insert this in a convoluted way. First we build vXi64 splat containing
9121 // the two values that we assemble using some bit math. Next we'll use
9122 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9123 // to merge element 0 from our splat into the source vector.
9124 // FIXME: This is probably not the best way to do this, but it is
9125 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9126 // point.
9127 // sw lo, (a0)
9128 // sw hi, 4(a0)
9129 // vlse vX, (a0)
9130 //
9131 // vid.v vVid
9132 // vmseq.vx mMask, vVid, 0
9133 // vmerge.vvm vDest, vSrc, vVal, mMask
9134 MVT VT = Op.getSimpleValueType();
9135 SDValue Vec = Op.getOperand(1);
9136 SDValue VL = getVLOperand(Op);
9137
9138 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9139 if (Op.getOperand(1).isUndef())
9140 return SplattedVal;
9141 SDValue SplattedIdx =
9142 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9143 DAG.getConstant(0, DL, MVT::i32), VL);
9144
9145 MVT MaskVT = getMaskTypeFor(VT);
9146 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9147 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9148 SDValue SelectCond =
9149 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9150 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9151 DAG.getUNDEF(MaskVT), Mask, VL});
9152 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9153 Vec, DAG.getUNDEF(VT), VL);
9154 }
9155 case Intrinsic::riscv_vfmv_s_f:
9156 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9157 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9158 // EGS * EEW >= 128 bits
9159 case Intrinsic::riscv_vaesdf_vv:
9160 case Intrinsic::riscv_vaesdf_vs:
9161 case Intrinsic::riscv_vaesdm_vv:
9162 case Intrinsic::riscv_vaesdm_vs:
9163 case Intrinsic::riscv_vaesef_vv:
9164 case Intrinsic::riscv_vaesef_vs:
9165 case Intrinsic::riscv_vaesem_vv:
9166 case Intrinsic::riscv_vaesem_vs:
9167 case Intrinsic::riscv_vaeskf1:
9168 case Intrinsic::riscv_vaeskf2:
9169 case Intrinsic::riscv_vaesz_vs:
9170 case Intrinsic::riscv_vsm4k:
9171 case Intrinsic::riscv_vsm4r_vv:
9172 case Intrinsic::riscv_vsm4r_vs: {
9173 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9174 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9175 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9176 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9177 return Op;
9178 }
9179 // EGS * EEW >= 256 bits
9180 case Intrinsic::riscv_vsm3c:
9181 case Intrinsic::riscv_vsm3me: {
9182 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9183 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9184 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9185 return Op;
9186 }
9187 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9188 case Intrinsic::riscv_vsha2ch:
9189 case Intrinsic::riscv_vsha2cl:
9190 case Intrinsic::riscv_vsha2ms: {
9191 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9192 !Subtarget.hasStdExtZvknhb())
9193 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9194 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9195 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9196 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9197 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9198 return Op;
9199 }
9200 case Intrinsic::riscv_sf_vc_v_x:
9201 case Intrinsic::riscv_sf_vc_v_i:
9202 case Intrinsic::riscv_sf_vc_v_xv:
9203 case Intrinsic::riscv_sf_vc_v_iv:
9204 case Intrinsic::riscv_sf_vc_v_vv:
9205 case Intrinsic::riscv_sf_vc_v_fv:
9206 case Intrinsic::riscv_sf_vc_v_xvv:
9207 case Intrinsic::riscv_sf_vc_v_ivv:
9208 case Intrinsic::riscv_sf_vc_v_vvv:
9209 case Intrinsic::riscv_sf_vc_v_fvv:
9210 case Intrinsic::riscv_sf_vc_v_xvw:
9211 case Intrinsic::riscv_sf_vc_v_ivw:
9212 case Intrinsic::riscv_sf_vc_v_vvw:
9213 case Intrinsic::riscv_sf_vc_v_fvw: {
9214 MVT VT = Op.getSimpleValueType();
9215
9216 SmallVector<SDValue> Operands{Op->op_values()};
9218
9219 MVT RetVT = VT;
9220 if (VT.isFixedLengthVector())
9222 else if (VT.isFloatingPoint())
9225
9226 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9227
9228 if (VT.isFixedLengthVector())
9229 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9230 else if (VT.isFloatingPoint())
9231 NewNode = DAG.getBitcast(VT, NewNode);
9232
9233 if (Op == NewNode)
9234 break;
9235
9236 return NewNode;
9237 }
9238 }
9239
9240 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9241}
9242
9244 unsigned Type) {
9245 SDLoc DL(Op);
9246 SmallVector<SDValue> Operands{Op->op_values()};
9247 Operands.erase(Operands.begin() + 1);
9248
9249 const RISCVSubtarget &Subtarget =
9251 MVT VT = Op.getSimpleValueType();
9252 MVT RetVT = VT;
9253 MVT FloatVT = VT;
9254
9255 if (VT.isFloatingPoint()) {
9256 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9258 FloatVT = RetVT;
9259 }
9260 if (VT.isFixedLengthVector())
9262 Subtarget);
9263
9265
9266 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9267 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9268 SDValue Chain = NewNode.getValue(1);
9269
9270 if (VT.isFixedLengthVector())
9271 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9272 if (VT.isFloatingPoint())
9273 NewNode = DAG.getBitcast(VT, NewNode);
9274
9275 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9276
9277 return NewNode;
9278}
9279
9281 unsigned Type) {
9282 SmallVector<SDValue> Operands{Op->op_values()};
9283 Operands.erase(Operands.begin() + 1);
9285
9286 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9287}
9288
9289SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9290 SelectionDAG &DAG) const {
9291 unsigned IntNo = Op.getConstantOperandVal(1);
9292 switch (IntNo) {
9293 default:
9294 break;
9295 case Intrinsic::riscv_masked_strided_load: {
9296 SDLoc DL(Op);
9297 MVT XLenVT = Subtarget.getXLenVT();
9298
9299 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9300 // the selection of the masked intrinsics doesn't do this for us.
9301 SDValue Mask = Op.getOperand(5);
9302 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9303
9304 MVT VT = Op->getSimpleValueType(0);
9305 MVT ContainerVT = VT;
9306 if (VT.isFixedLengthVector())
9307 ContainerVT = getContainerForFixedLengthVector(VT);
9308
9309 SDValue PassThru = Op.getOperand(2);
9310 if (!IsUnmasked) {
9311 MVT MaskVT = getMaskTypeFor(ContainerVT);
9312 if (VT.isFixedLengthVector()) {
9313 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9314 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9315 }
9316 }
9317
9318 auto *Load = cast<MemIntrinsicSDNode>(Op);
9319 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9320 SDValue Ptr = Op.getOperand(3);
9321 SDValue Stride = Op.getOperand(4);
9322 SDValue Result, Chain;
9323
9324 // TODO: We restrict this to unmasked loads currently in consideration of
9325 // the complexity of handling all falses masks.
9326 MVT ScalarVT = ContainerVT.getVectorElementType();
9327 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9328 SDValue ScalarLoad =
9329 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9330 ScalarVT, Load->getMemOperand());
9331 Chain = ScalarLoad.getValue(1);
9332 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9333 Subtarget);
9334 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9335 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9336 Load->getMemOperand());
9337 Chain = ScalarLoad.getValue(1);
9338 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9339 } else {
9340 SDValue IntID = DAG.getTargetConstant(
9341 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9342 XLenVT);
9343
9344 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9345 if (IsUnmasked)
9346 Ops.push_back(DAG.getUNDEF(ContainerVT));
9347 else
9348 Ops.push_back(PassThru);
9349 Ops.push_back(Ptr);
9350 Ops.push_back(Stride);
9351 if (!IsUnmasked)
9352 Ops.push_back(Mask);
9353 Ops.push_back(VL);
9354 if (!IsUnmasked) {
9355 SDValue Policy =
9357 Ops.push_back(Policy);
9358 }
9359
9360 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9361 Result =
9363 Load->getMemoryVT(), Load->getMemOperand());
9364 Chain = Result.getValue(1);
9365 }
9366 if (VT.isFixedLengthVector())
9367 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9368 return DAG.getMergeValues({Result, Chain}, DL);
9369 }
9370 case Intrinsic::riscv_seg2_load:
9371 case Intrinsic::riscv_seg3_load:
9372 case Intrinsic::riscv_seg4_load:
9373 case Intrinsic::riscv_seg5_load:
9374 case Intrinsic::riscv_seg6_load:
9375 case Intrinsic::riscv_seg7_load:
9376 case Intrinsic::riscv_seg8_load: {
9377 SDLoc DL(Op);
9378 static const Intrinsic::ID VlsegInts[7] = {
9379 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9380 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9381 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9382 Intrinsic::riscv_vlseg8};
9383 unsigned NF = Op->getNumValues() - 1;
9384 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9385 MVT XLenVT = Subtarget.getXLenVT();
9386 MVT VT = Op->getSimpleValueType(0);
9387 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9388
9389 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9390 Subtarget);
9391 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9392 auto *Load = cast<MemIntrinsicSDNode>(Op);
9393 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9394 ContainerVTs.push_back(MVT::Other);
9395 SDVTList VTs = DAG.getVTList(ContainerVTs);
9396 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9397 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9398 Ops.push_back(Op.getOperand(2));
9399 Ops.push_back(VL);
9400 SDValue Result =
9402 Load->getMemoryVT(), Load->getMemOperand());
9404 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9405 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9406 DAG, Subtarget));
9407 Results.push_back(Result.getValue(NF));
9408 return DAG.getMergeValues(Results, DL);
9409 }
9410 case Intrinsic::riscv_sf_vc_v_x_se:
9412 case Intrinsic::riscv_sf_vc_v_i_se:
9414 case Intrinsic::riscv_sf_vc_v_xv_se:
9416 case Intrinsic::riscv_sf_vc_v_iv_se:
9418 case Intrinsic::riscv_sf_vc_v_vv_se:
9420 case Intrinsic::riscv_sf_vc_v_fv_se:
9422 case Intrinsic::riscv_sf_vc_v_xvv_se:
9424 case Intrinsic::riscv_sf_vc_v_ivv_se:
9426 case Intrinsic::riscv_sf_vc_v_vvv_se:
9428 case Intrinsic::riscv_sf_vc_v_fvv_se:
9430 case Intrinsic::riscv_sf_vc_v_xvw_se:
9432 case Intrinsic::riscv_sf_vc_v_ivw_se:
9434 case Intrinsic::riscv_sf_vc_v_vvw_se:
9436 case Intrinsic::riscv_sf_vc_v_fvw_se:
9438 }
9439
9440 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9441}
9442
9443SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9444 SelectionDAG &DAG) const {
9445 unsigned IntNo = Op.getConstantOperandVal(1);
9446 switch (IntNo) {
9447 default:
9448 break;
9449 case Intrinsic::riscv_masked_strided_store: {
9450 SDLoc DL(Op);
9451 MVT XLenVT = Subtarget.getXLenVT();
9452
9453 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9454 // the selection of the masked intrinsics doesn't do this for us.
9455 SDValue Mask = Op.getOperand(5);
9456 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9457
9458 SDValue Val = Op.getOperand(2);
9459 MVT VT = Val.getSimpleValueType();
9460 MVT ContainerVT = VT;
9461 if (VT.isFixedLengthVector()) {
9462 ContainerVT = getContainerForFixedLengthVector(VT);
9463 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9464 }
9465 if (!IsUnmasked) {
9466 MVT MaskVT = getMaskTypeFor(ContainerVT);
9467 if (VT.isFixedLengthVector())
9468 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9469 }
9470
9471 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9472
9473 SDValue IntID = DAG.getTargetConstant(
9474 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9475 XLenVT);
9476
9477 auto *Store = cast<MemIntrinsicSDNode>(Op);
9478 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9479 Ops.push_back(Val);
9480 Ops.push_back(Op.getOperand(3)); // Ptr
9481 Ops.push_back(Op.getOperand(4)); // Stride
9482 if (!IsUnmasked)
9483 Ops.push_back(Mask);
9484 Ops.push_back(VL);
9485
9486 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9487 Ops, Store->getMemoryVT(),
9488 Store->getMemOperand());
9489 }
9490 case Intrinsic::riscv_seg2_store:
9491 case Intrinsic::riscv_seg3_store:
9492 case Intrinsic::riscv_seg4_store:
9493 case Intrinsic::riscv_seg5_store:
9494 case Intrinsic::riscv_seg6_store:
9495 case Intrinsic::riscv_seg7_store:
9496 case Intrinsic::riscv_seg8_store: {
9497 SDLoc DL(Op);
9498 static const Intrinsic::ID VssegInts[] = {
9499 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9500 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9501 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9502 Intrinsic::riscv_vsseg8};
9503 // Operands are (chain, int_id, vec*, ptr, vl)
9504 unsigned NF = Op->getNumOperands() - 4;
9505 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9506 MVT XLenVT = Subtarget.getXLenVT();
9507 MVT VT = Op->getOperand(2).getSimpleValueType();
9508 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9509
9510 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9511 Subtarget);
9512 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9513 SDValue Ptr = Op->getOperand(NF + 2);
9514
9515 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9516 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9517 for (unsigned i = 0; i < NF; i++)
9519 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9520 Ops.append({Ptr, VL});
9521
9522 return DAG.getMemIntrinsicNode(
9523 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9524 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9525 }
9526 case Intrinsic::riscv_sf_vc_xv_se:
9528 case Intrinsic::riscv_sf_vc_iv_se:
9530 case Intrinsic::riscv_sf_vc_vv_se:
9532 case Intrinsic::riscv_sf_vc_fv_se:
9534 case Intrinsic::riscv_sf_vc_xvv_se:
9536 case Intrinsic::riscv_sf_vc_ivv_se:
9538 case Intrinsic::riscv_sf_vc_vvv_se:
9540 case Intrinsic::riscv_sf_vc_fvv_se:
9542 case Intrinsic::riscv_sf_vc_xvw_se:
9544 case Intrinsic::riscv_sf_vc_ivw_se:
9546 case Intrinsic::riscv_sf_vc_vvw_se:
9548 case Intrinsic::riscv_sf_vc_fvw_se:
9550 }
9551
9552 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9553}
9554
9555static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9556 switch (ISDOpcode) {
9557 default:
9558 llvm_unreachable("Unhandled reduction");
9559 case ISD::VP_REDUCE_ADD:
9560 case ISD::VECREDUCE_ADD:
9562 case ISD::VP_REDUCE_UMAX:
9565 case ISD::VP_REDUCE_SMAX:
9568 case ISD::VP_REDUCE_UMIN:
9571 case ISD::VP_REDUCE_SMIN:
9574 case ISD::VP_REDUCE_AND:
9575 case ISD::VECREDUCE_AND:
9577 case ISD::VP_REDUCE_OR:
9578 case ISD::VECREDUCE_OR:
9580 case ISD::VP_REDUCE_XOR:
9581 case ISD::VECREDUCE_XOR:
9583 case ISD::VP_REDUCE_FADD:
9585 case ISD::VP_REDUCE_SEQ_FADD:
9587 case ISD::VP_REDUCE_FMAX:
9588 case ISD::VP_REDUCE_FMAXIMUM:
9590 case ISD::VP_REDUCE_FMIN:
9591 case ISD::VP_REDUCE_FMINIMUM:
9593 }
9594
9595}
9596
9597SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9598 SelectionDAG &DAG,
9599 bool IsVP) const {
9600 SDLoc DL(Op);
9601 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9602 MVT VecVT = Vec.getSimpleValueType();
9603 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9604 Op.getOpcode() == ISD::VECREDUCE_OR ||
9605 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9606 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9607 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9608 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9609 "Unexpected reduction lowering");
9610
9611 MVT XLenVT = Subtarget.getXLenVT();
9612
9613 MVT ContainerVT = VecVT;
9614 if (VecVT.isFixedLengthVector()) {
9615 ContainerVT = getContainerForFixedLengthVector(VecVT);
9616 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9617 }
9618
9619 SDValue Mask, VL;
9620 if (IsVP) {
9621 Mask = Op.getOperand(2);
9622 VL = Op.getOperand(3);
9623 } else {
9624 std::tie(Mask, VL) =
9625 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9626 }
9627
9628 unsigned BaseOpc;
9630 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9631
9632 switch (Op.getOpcode()) {
9633 default:
9634 llvm_unreachable("Unhandled reduction");
9635 case ISD::VECREDUCE_AND:
9636 case ISD::VP_REDUCE_AND: {
9637 // vcpop ~x == 0
9638 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9639 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9640 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9641 CC = ISD::SETEQ;
9642 BaseOpc = ISD::AND;
9643 break;
9644 }
9645 case ISD::VECREDUCE_OR:
9646 case ISD::VP_REDUCE_OR:
9647 // vcpop x != 0
9648 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9649 CC = ISD::SETNE;
9650 BaseOpc = ISD::OR;
9651 break;
9652 case ISD::VECREDUCE_XOR:
9653 case ISD::VP_REDUCE_XOR: {
9654 // ((vcpop x) & 1) != 0
9655 SDValue One = DAG.getConstant(1, DL, XLenVT);
9656 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9657 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9658 CC = ISD::SETNE;
9659 BaseOpc = ISD::XOR;
9660 break;
9661 }
9662 }
9663
9664 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9665 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9666
9667 if (!IsVP)
9668 return SetCC;
9669
9670 // Now include the start value in the operation.
9671 // Note that we must return the start value when no elements are operated
9672 // upon. The vcpop instructions we've emitted in each case above will return
9673 // 0 for an inactive vector, and so we've already received the neutral value:
9674 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9675 // can simply include the start value.
9676 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9677}
9678
9679static bool isNonZeroAVL(SDValue AVL) {
9680 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9681 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9682 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9683 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9684}
9685
9686/// Helper to lower a reduction sequence of the form:
9687/// scalar = reduce_op vec, scalar_start
9688static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9689 SDValue StartValue, SDValue Vec, SDValue Mask,
9690 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9691 const RISCVSubtarget &Subtarget) {
9692 const MVT VecVT = Vec.getSimpleValueType();
9693 const MVT M1VT = getLMUL1VT(VecVT);
9694 const MVT XLenVT = Subtarget.getXLenVT();
9695 const bool NonZeroAVL = isNonZeroAVL(VL);
9696
9697 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9698 // or the original VT if fractional.
9699 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9700 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9701 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9702 // be the result of the reduction operation.
9703 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9704 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9705 DAG, Subtarget);
9706 if (M1VT != InnerVT)
9707 InitialValue =
9708 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9709 InitialValue, DAG.getVectorIdxConstant(0, DL));
9710 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9711 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9712 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9713 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9714 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9715 DAG.getVectorIdxConstant(0, DL));
9716}
9717
9718SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9719 SelectionDAG &DAG) const {
9720 SDLoc DL(Op);
9721 SDValue Vec = Op.getOperand(0);
9722 EVT VecEVT = Vec.getValueType();
9723
9724 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9725
9726 // Due to ordering in legalize types we may have a vector type that needs to
9727 // be split. Do that manually so we can get down to a legal type.
9728 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9730 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9731 VecEVT = Lo.getValueType();
9732 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9733 }
9734
9735 // TODO: The type may need to be widened rather than split. Or widened before
9736 // it can be split.
9737 if (!isTypeLegal(VecEVT))
9738 return SDValue();
9739
9740 MVT VecVT = VecEVT.getSimpleVT();
9741 MVT VecEltVT = VecVT.getVectorElementType();
9742 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9743
9744 MVT ContainerVT = VecVT;
9745 if (VecVT.isFixedLengthVector()) {
9746 ContainerVT = getContainerForFixedLengthVector(VecVT);
9747 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9748 }
9749
9750 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9751
9752 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9753 switch (BaseOpc) {
9754 case ISD::AND:
9755 case ISD::OR:
9756 case ISD::UMAX:
9757 case ISD::UMIN:
9758 case ISD::SMAX:
9759 case ISD::SMIN:
9760 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9761 DAG.getVectorIdxConstant(0, DL));
9762 }
9763 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9764 Mask, VL, DL, DAG, Subtarget);
9765}
9766
9767// Given a reduction op, this function returns the matching reduction opcode,
9768// the vector SDValue and the scalar SDValue required to lower this to a
9769// RISCVISD node.
9770static std::tuple<unsigned, SDValue, SDValue>
9772 const RISCVSubtarget &Subtarget) {
9773 SDLoc DL(Op);
9774 auto Flags = Op->getFlags();
9775 unsigned Opcode = Op.getOpcode();
9776 switch (Opcode) {
9777 default:
9778 llvm_unreachable("Unhandled reduction");
9779 case ISD::VECREDUCE_FADD: {
9780 // Use positive zero if we can. It is cheaper to materialize.
9781 SDValue Zero =
9782 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9783 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9784 }
9786 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9787 Op.getOperand(0));
9791 case ISD::VECREDUCE_FMAX: {
9792 SDValue Front =
9793 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9794 DAG.getVectorIdxConstant(0, DL));
9795 unsigned RVVOpc =
9796 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9799 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9800 }
9801 }
9802}
9803
9804SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9805 SelectionDAG &DAG) const {
9806 SDLoc DL(Op);
9807 MVT VecEltVT = Op.getSimpleValueType();
9808
9809 unsigned RVVOpcode;
9810 SDValue VectorVal, ScalarVal;
9811 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9812 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9813 MVT VecVT = VectorVal.getSimpleValueType();
9814
9815 MVT ContainerVT = VecVT;
9816 if (VecVT.isFixedLengthVector()) {
9817 ContainerVT = getContainerForFixedLengthVector(VecVT);
9818 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9819 }
9820
9821 MVT ResVT = Op.getSimpleValueType();
9822 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9823 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9824 VL, DL, DAG, Subtarget);
9825 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9826 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9827 return Res;
9828
9829 if (Op->getFlags().hasNoNaNs())
9830 return Res;
9831
9832 // Force output to NaN if any element is Nan.
9833 SDValue IsNan =
9834 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9835 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9836 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9837 MVT XLenVT = Subtarget.getXLenVT();
9838 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9839 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9840 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9841 return DAG.getSelect(
9842 DL, ResVT, NoNaNs, Res,
9844 ResVT));
9845}
9846
9847SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9848 SelectionDAG &DAG) const {
9849 SDLoc DL(Op);
9850 unsigned Opc = Op.getOpcode();
9851 SDValue Start = Op.getOperand(0);
9852 SDValue Vec = Op.getOperand(1);
9853 EVT VecEVT = Vec.getValueType();
9854 MVT XLenVT = Subtarget.getXLenVT();
9855
9856 // TODO: The type may need to be widened rather than split. Or widened before
9857 // it can be split.
9858 if (!isTypeLegal(VecEVT))
9859 return SDValue();
9860
9861 MVT VecVT = VecEVT.getSimpleVT();
9862 unsigned RVVOpcode = getRVVReductionOp(Opc);
9863
9864 if (VecVT.isFixedLengthVector()) {
9865 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9866 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9867 }
9868
9869 SDValue VL = Op.getOperand(3);
9870 SDValue Mask = Op.getOperand(2);
9871 SDValue Res =
9872 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9873 Vec, Mask, VL, DL, DAG, Subtarget);
9874 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9875 Op->getFlags().hasNoNaNs())
9876 return Res;
9877
9878 // Propagate NaNs.
9879 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9880 // Check if any of the elements in Vec is NaN.
9881 SDValue IsNaN = DAG.getNode(
9882 RISCVISD::SETCC_VL, DL, PredVT,
9883 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9884 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9885 // Check if the start value is NaN.
9886 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9887 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9888 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9889 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9890 MVT ResVT = Res.getSimpleValueType();
9891 return DAG.getSelect(
9892 DL, ResVT, NoNaNs, Res,
9894 ResVT));
9895}
9896
9897SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9898 SelectionDAG &DAG) const {
9899 SDValue Vec = Op.getOperand(0);
9900 SDValue SubVec = Op.getOperand(1);
9901 MVT VecVT = Vec.getSimpleValueType();
9902 MVT SubVecVT = SubVec.getSimpleValueType();
9903
9904 SDLoc DL(Op);
9905 MVT XLenVT = Subtarget.getXLenVT();
9906 unsigned OrigIdx = Op.getConstantOperandVal(2);
9907 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9908
9909 // We don't have the ability to slide mask vectors up indexed by their i1
9910 // elements; the smallest we can do is i8. Often we are able to bitcast to
9911 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9912 // into a scalable one, we might not necessarily have enough scalable
9913 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9914 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9915 (OrigIdx != 0 || !Vec.isUndef())) {
9916 if (VecVT.getVectorMinNumElements() >= 8 &&
9917 SubVecVT.getVectorMinNumElements() >= 8) {
9918 assert(OrigIdx % 8 == 0 && "Invalid index");
9919 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9920 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9921 "Unexpected mask vector lowering");
9922 OrigIdx /= 8;
9923 SubVecVT =
9924 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9925 SubVecVT.isScalableVector());
9926 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9927 VecVT.isScalableVector());
9928 Vec = DAG.getBitcast(VecVT, Vec);
9929 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9930 } else {
9931 // We can't slide this mask vector up indexed by its i1 elements.
9932 // This poses a problem when we wish to insert a scalable vector which
9933 // can't be re-expressed as a larger type. Just choose the slow path and
9934 // extend to a larger type, then truncate back down.
9935 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9936 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9937 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9938 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9939 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9940 Op.getOperand(2));
9941 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9942 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9943 }
9944 }
9945
9946 // If the subvector vector is a fixed-length type and we don't know VLEN
9947 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9948 // don't know which register of a LMUL group contains the specific subvector
9949 // as we only know the minimum register size. Therefore we must slide the
9950 // vector group up the full amount.
9951 const auto VLen = Subtarget.getRealVLen();
9952 if (SubVecVT.isFixedLengthVector() && !VLen) {
9953 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9954 return Op;
9955 MVT ContainerVT = VecVT;
9956 if (VecVT.isFixedLengthVector()) {
9957 ContainerVT = getContainerForFixedLengthVector(VecVT);
9958 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9959 }
9960
9961 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9962 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9963 DAG.getUNDEF(ContainerVT), SubVec,
9964 DAG.getVectorIdxConstant(0, DL));
9965 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9966 return DAG.getBitcast(Op.getValueType(), SubVec);
9967 }
9968
9969 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9970 DAG.getUNDEF(ContainerVT), SubVec,
9971 DAG.getVectorIdxConstant(0, DL));
9972 SDValue Mask =
9973 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9974 // Set the vector length to only the number of elements we care about. Note
9975 // that for slideup this includes the offset.
9976 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9977 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9978
9979 // Use tail agnostic policy if we're inserting over Vec's tail.
9981 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9982 Policy = RISCVII::TAIL_AGNOSTIC;
9983
9984 // If we're inserting into the lowest elements, use a tail undisturbed
9985 // vmv.v.v.
9986 if (OrigIdx == 0) {
9987 SubVec =
9988 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9989 } else {
9990 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9991 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9992 SlideupAmt, Mask, VL, Policy);
9993 }
9994
9995 if (VecVT.isFixedLengthVector())
9996 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9997 return DAG.getBitcast(Op.getValueType(), SubVec);
9998 }
9999
10000 MVT ContainerVecVT = VecVT;
10001 if (VecVT.isFixedLengthVector()) {
10002 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10003 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10004 }
10005
10006 MVT ContainerSubVecVT = SubVecVT;
10007 if (SubVecVT.isFixedLengthVector()) {
10008 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10009 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10010 }
10011
10012 unsigned SubRegIdx;
10013 ElementCount RemIdx;
10014 // insert_subvector scales the index by vscale if the subvector is scalable,
10015 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10016 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10017 if (SubVecVT.isFixedLengthVector()) {
10018 assert(VLen);
10019 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10020 auto Decompose =
10022 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10023 SubRegIdx = Decompose.first;
10024 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10025 (OrigIdx % Vscale));
10026 } else {
10027 auto Decompose =
10029 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10030 SubRegIdx = Decompose.first;
10031 RemIdx = ElementCount::getScalable(Decompose.second);
10032 }
10033
10036 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10037 bool ExactlyVecRegSized =
10038 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10039 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10040
10041 // 1. If the Idx has been completely eliminated and this subvector's size is
10042 // a vector register or a multiple thereof, or the surrounding elements are
10043 // undef, then this is a subvector insert which naturally aligns to a vector
10044 // register. These can easily be handled using subregister manipulation.
10045 // 2. If the subvector isn't an exact multiple of a valid register group size,
10046 // then the insertion must preserve the undisturbed elements of the register.
10047 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10048 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10049 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10050 // of that LMUL=1 type back into the larger vector (resolving to another
10051 // subregister operation). See below for how our VSLIDEUP works. We go via a
10052 // LMUL=1 type to avoid allocating a large register group to hold our
10053 // subvector.
10054 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10055 if (SubVecVT.isFixedLengthVector()) {
10056 // We may get NoSubRegister if inserting at index 0 and the subvec
10057 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10058 if (SubRegIdx == RISCV::NoSubRegister) {
10059 assert(OrigIdx == 0);
10060 return Op;
10061 }
10062
10063 SDValue Insert =
10064 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
10065 if (VecVT.isFixedLengthVector())
10066 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10067 return Insert;
10068 }
10069 return Op;
10070 }
10071
10072 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10073 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10074 // (in our case undisturbed). This means we can set up a subvector insertion
10075 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10076 // size of the subvector.
10077 MVT InterSubVT = ContainerVecVT;
10078 SDValue AlignedExtract = Vec;
10079 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10080 if (SubVecVT.isFixedLengthVector())
10081 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10082 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10083 InterSubVT = getLMUL1VT(ContainerVecVT);
10084 // Extract a subvector equal to the nearest full vector register type. This
10085 // should resolve to a EXTRACT_SUBREG instruction.
10086 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10087 DAG.getVectorIdxConstant(AlignedIdx, DL));
10088 }
10089
10090 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10091 DAG.getUNDEF(InterSubVT), SubVec,
10092 DAG.getVectorIdxConstant(0, DL));
10093
10094 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10095
10096 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10097 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10098
10099 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10101 if (Subtarget.expandVScale(EndIndex) ==
10102 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10103 Policy = RISCVII::TAIL_AGNOSTIC;
10104
10105 // If we're inserting into the lowest elements, use a tail undisturbed
10106 // vmv.v.v.
10107 if (RemIdx.isZero()) {
10108 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10109 SubVec, VL);
10110 } else {
10111 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10112
10113 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10114 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10115
10116 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10117 SlideupAmt, Mask, VL, Policy);
10118 }
10119
10120 // If required, insert this subvector back into the correct vector register.
10121 // This should resolve to an INSERT_SUBREG instruction.
10122 if (ContainerVecVT.bitsGT(InterSubVT))
10123 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10124 DAG.getVectorIdxConstant(AlignedIdx, DL));
10125
10126 if (VecVT.isFixedLengthVector())
10127 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10128
10129 // We might have bitcast from a mask type: cast back to the original type if
10130 // required.
10131 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10132}
10133
10134SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10135 SelectionDAG &DAG) const {
10136 SDValue Vec = Op.getOperand(0);
10137 MVT SubVecVT = Op.getSimpleValueType();
10138 MVT VecVT = Vec.getSimpleValueType();
10139
10140 SDLoc DL(Op);
10141 MVT XLenVT = Subtarget.getXLenVT();
10142 unsigned OrigIdx = Op.getConstantOperandVal(1);
10143 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10144
10145 // We don't have the ability to slide mask vectors down indexed by their i1
10146 // elements; the smallest we can do is i8. Often we are able to bitcast to
10147 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10148 // from a scalable one, we might not necessarily have enough scalable
10149 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10150 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
10151 if (VecVT.getVectorMinNumElements() >= 8 &&
10152 SubVecVT.getVectorMinNumElements() >= 8) {
10153 assert(OrigIdx % 8 == 0 && "Invalid index");
10154 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10155 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10156 "Unexpected mask vector lowering");
10157 OrigIdx /= 8;
10158 SubVecVT =
10159 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10160 SubVecVT.isScalableVector());
10161 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10162 VecVT.isScalableVector());
10163 Vec = DAG.getBitcast(VecVT, Vec);
10164 } else {
10165 // We can't slide this mask vector down, indexed by its i1 elements.
10166 // This poses a problem when we wish to extract a scalable vector which
10167 // can't be re-expressed as a larger type. Just choose the slow path and
10168 // extend to a larger type, then truncate back down.
10169 // TODO: We could probably improve this when extracting certain fixed
10170 // from fixed, where we can extract as i8 and shift the correct element
10171 // right to reach the desired subvector?
10172 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10173 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10174 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10175 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10176 Op.getOperand(1));
10177 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10178 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10179 }
10180 }
10181
10182 // With an index of 0 this is a cast-like subvector, which can be performed
10183 // with subregister operations.
10184 if (OrigIdx == 0)
10185 return Op;
10186
10187 const auto VLen = Subtarget.getRealVLen();
10188
10189 // If the subvector vector is a fixed-length type and we don't know VLEN
10190 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10191 // don't know which register of a LMUL group contains the specific subvector
10192 // as we only know the minimum register size. Therefore we must slide the
10193 // vector group down the full amount.
10194 if (SubVecVT.isFixedLengthVector() && !VLen) {
10195 MVT ContainerVT = VecVT;
10196 if (VecVT.isFixedLengthVector()) {
10197 ContainerVT = getContainerForFixedLengthVector(VecVT);
10198 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10199 }
10200
10201 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10202 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10203 if (auto ShrunkVT =
10204 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10205 ContainerVT = *ShrunkVT;
10206 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10207 DAG.getVectorIdxConstant(0, DL));
10208 }
10209
10210 SDValue Mask =
10211 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10212 // Set the vector length to only the number of elements we care about. This
10213 // avoids sliding down elements we're going to discard straight away.
10214 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
10215 Subtarget);
10216 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10217 SDValue Slidedown =
10218 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10219 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10220 // Now we can use a cast-like subvector extract to get the result.
10221 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10222 DAG.getVectorIdxConstant(0, DL));
10223 return DAG.getBitcast(Op.getValueType(), Slidedown);
10224 }
10225
10226 if (VecVT.isFixedLengthVector()) {
10227 VecVT = getContainerForFixedLengthVector(VecVT);
10228 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10229 }
10230
10231 MVT ContainerSubVecVT = SubVecVT;
10232 if (SubVecVT.isFixedLengthVector())
10233 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10234
10235 unsigned SubRegIdx;
10236 ElementCount RemIdx;
10237 // extract_subvector scales the index by vscale if the subvector is scalable,
10238 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10239 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10240 if (SubVecVT.isFixedLengthVector()) {
10241 assert(VLen);
10242 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10243 auto Decompose =
10245 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10246 SubRegIdx = Decompose.first;
10247 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10248 (OrigIdx % Vscale));
10249 } else {
10250 auto Decompose =
10252 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10253 SubRegIdx = Decompose.first;
10254 RemIdx = ElementCount::getScalable(Decompose.second);
10255 }
10256
10257 // If the Idx has been completely eliminated then this is a subvector extract
10258 // which naturally aligns to a vector register. These can easily be handled
10259 // using subregister manipulation.
10260 if (RemIdx.isZero()) {
10261 if (SubVecVT.isFixedLengthVector()) {
10262 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10263 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10264 }
10265 return Op;
10266 }
10267
10268 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10269 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10270 // divide exactly.
10271 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10272 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10273
10274 // If the vector type is an LMUL-group type, extract a subvector equal to the
10275 // nearest full vector register type.
10276 MVT InterSubVT = VecVT;
10277 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10278 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10279 // we should have successfully decomposed the extract into a subregister.
10280 assert(SubRegIdx != RISCV::NoSubRegister);
10281 InterSubVT = getLMUL1VT(VecVT);
10282 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10283 }
10284
10285 // Slide this vector register down by the desired number of elements in order
10286 // to place the desired subvector starting at element 0.
10287 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10288 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10289 if (SubVecVT.isFixedLengthVector())
10290 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10291 Subtarget);
10292 SDValue Slidedown =
10293 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10294 Vec, SlidedownAmt, Mask, VL);
10295
10296 // Now the vector is in the right position, extract our final subvector. This
10297 // should resolve to a COPY.
10298 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10299 DAG.getVectorIdxConstant(0, DL));
10300
10301 // We might have bitcast from a mask type: cast back to the original type if
10302 // required.
10303 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10304}
10305
10306// Widen a vector's operands to i8, then truncate its results back to the
10307// original type, typically i1. All operand and result types must be the same.
10309 SelectionDAG &DAG) {
10310 MVT VT = N.getSimpleValueType();
10311 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10313 for (SDValue Op : N->ops()) {
10314 assert(Op.getSimpleValueType() == VT &&
10315 "Operands and result must be same type");
10316 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10317 }
10318
10319 unsigned NumVals = N->getNumValues();
10320
10322 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10323 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10324 SmallVector<SDValue, 4> TruncVals;
10325 for (unsigned I = 0; I < NumVals; I++) {
10326 TruncVals.push_back(
10327 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10328 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10329 }
10330
10331 if (TruncVals.size() > 1)
10332 return DAG.getMergeValues(TruncVals, DL);
10333 return TruncVals.front();
10334}
10335
10336SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10337 SelectionDAG &DAG) const {
10338 SDLoc DL(Op);
10339 MVT VecVT = Op.getSimpleValueType();
10340
10341 assert(VecVT.isScalableVector() &&
10342 "vector_interleave on non-scalable vector!");
10343
10344 // 1 bit element vectors need to be widened to e8
10345 if (VecVT.getVectorElementType() == MVT::i1)
10346 return widenVectorOpsToi8(Op, DL, DAG);
10347
10348 // If the VT is LMUL=8, we need to split and reassemble.
10349 if (VecVT.getSizeInBits().getKnownMinValue() ==
10350 (8 * RISCV::RVVBitsPerBlock)) {
10351 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10352 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10353 EVT SplitVT = Op0Lo.getValueType();
10354
10356 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10358 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10359
10360 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10361 ResLo.getValue(0), ResHi.getValue(0));
10362 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10363 ResHi.getValue(1));
10364 return DAG.getMergeValues({Even, Odd}, DL);
10365 }
10366
10367 // Concatenate the two vectors as one vector to deinterleave
10368 MVT ConcatVT =
10371 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10372 Op.getOperand(0), Op.getOperand(1));
10373
10374 // We want to operate on all lanes, so get the mask and VL and mask for it
10375 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10376 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10377
10378 // We can deinterleave through vnsrl.wi if the element type is smaller than
10379 // ELEN
10380 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10381 SDValue Even =
10382 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10383 SDValue Odd =
10384 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10385 return DAG.getMergeValues({Even, Odd}, DL);
10386 }
10387
10388 // For the indices, use the same SEW to avoid an extra vsetvli
10389 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10390 // Create a vector of even indices {0, 2, 4, ...}
10391 SDValue EvenIdx =
10392 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10393 // Create a vector of odd indices {1, 3, 5, ... }
10394 SDValue OddIdx =
10395 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10396
10397 // Gather the even and odd elements into two separate vectors
10398 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10399 Concat, EvenIdx, Passthru, Mask, VL);
10400 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10401 Concat, OddIdx, Passthru, Mask, VL);
10402
10403 // Extract the result half of the gather for even and odd
10404 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10405 DAG.getVectorIdxConstant(0, DL));
10406 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10407 DAG.getVectorIdxConstant(0, DL));
10408
10409 return DAG.getMergeValues({Even, Odd}, DL);
10410}
10411
10412SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10413 SelectionDAG &DAG) const {
10414 SDLoc DL(Op);
10415 MVT VecVT = Op.getSimpleValueType();
10416
10417 assert(VecVT.isScalableVector() &&
10418 "vector_interleave on non-scalable vector!");
10419
10420 // i1 vectors need to be widened to i8
10421 if (VecVT.getVectorElementType() == MVT::i1)
10422 return widenVectorOpsToi8(Op, DL, DAG);
10423
10424 MVT XLenVT = Subtarget.getXLenVT();
10425 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10426
10427 // If the VT is LMUL=8, we need to split and reassemble.
10428 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10429 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10430 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10431 EVT SplitVT = Op0Lo.getValueType();
10432
10434 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10436 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10437
10438 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10439 ResLo.getValue(0), ResLo.getValue(1));
10440 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10441 ResHi.getValue(0), ResHi.getValue(1));
10442 return DAG.getMergeValues({Lo, Hi}, DL);
10443 }
10444
10445 SDValue Interleaved;
10446
10447 // If the element type is smaller than ELEN, then we can interleave with
10448 // vwaddu.vv and vwmaccu.vx
10449 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10450 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10451 DAG, Subtarget);
10452 } else {
10453 // Otherwise, fallback to using vrgathere16.vv
10454 MVT ConcatVT =
10457 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10458 Op.getOperand(0), Op.getOperand(1));
10459
10460 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10461
10462 // 0 1 2 3 4 5 6 7 ...
10463 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10464
10465 // 1 1 1 1 1 1 1 1 ...
10466 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10467
10468 // 1 0 1 0 1 0 1 0 ...
10469 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10470 OddMask = DAG.getSetCC(
10471 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10472 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10474
10475 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10476
10477 // Build up the index vector for interleaving the concatenated vector
10478 // 0 0 1 1 2 2 3 3 ...
10479 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10480 // 0 n 1 n+1 2 n+2 3 n+3 ...
10481 Idx =
10482 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10483
10484 // Then perform the interleave
10485 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10486 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10487 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10488 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10489 }
10490
10491 // Extract the two halves from the interleaved result
10492 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10493 DAG.getVectorIdxConstant(0, DL));
10494 SDValue Hi = DAG.getNode(
10495 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10497
10498 return DAG.getMergeValues({Lo, Hi}, DL);
10499}
10500
10501// Lower step_vector to the vid instruction. Any non-identity step value must
10502// be accounted for my manual expansion.
10503SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10504 SelectionDAG &DAG) const {
10505 SDLoc DL(Op);
10506 MVT VT = Op.getSimpleValueType();
10507 assert(VT.isScalableVector() && "Expected scalable vector");
10508 MVT XLenVT = Subtarget.getXLenVT();
10509 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10510 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10511 uint64_t StepValImm = Op.getConstantOperandVal(0);
10512 if (StepValImm != 1) {
10513 if (isPowerOf2_64(StepValImm)) {
10514 SDValue StepVal =
10515 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10516 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10517 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10518 } else {
10519 SDValue StepVal = lowerScalarSplat(
10520 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10521 VL, VT, DL, DAG, Subtarget);
10522 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10523 }
10524 }
10525 return StepVec;
10526}
10527
10528// Implement vector_reverse using vrgather.vv with indices determined by
10529// subtracting the id of each element from (VLMAX-1). This will convert
10530// the indices like so:
10531// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10532// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10533SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10534 SelectionDAG &DAG) const {
10535 SDLoc DL(Op);
10536 MVT VecVT = Op.getSimpleValueType();
10537 if (VecVT.getVectorElementType() == MVT::i1) {
10538 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10539 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10540 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10541 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10542 }
10543 unsigned EltSize = VecVT.getScalarSizeInBits();
10544 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10545 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10546 unsigned MaxVLMAX =
10547 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10548
10549 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10550 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10551
10552 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10553 // to use vrgatherei16.vv.
10554 // TODO: It's also possible to use vrgatherei16.vv for other types to
10555 // decrease register width for the index calculation.
10556 if (MaxVLMAX > 256 && EltSize == 8) {
10557 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10558 // Reverse each half, then reassemble them in reverse order.
10559 // NOTE: It's also possible that after splitting that VLMAX no longer
10560 // requires vrgatherei16.vv.
10561 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10562 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10563 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10564 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10565 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10566 // Reassemble the low and high pieces reversed.
10567 // FIXME: This is a CONCAT_VECTORS.
10568 SDValue Res =
10569 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10570 DAG.getVectorIdxConstant(0, DL));
10571 return DAG.getNode(
10572 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10573 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10574 }
10575
10576 // Just promote the int type to i16 which will double the LMUL.
10577 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10578 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10579 }
10580
10581 MVT XLenVT = Subtarget.getXLenVT();
10582 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10583
10584 // Calculate VLMAX-1 for the desired SEW.
10585 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10586 computeVLMax(VecVT, DL, DAG),
10587 DAG.getConstant(1, DL, XLenVT));
10588
10589 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10590 bool IsRV32E64 =
10591 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10592 SDValue SplatVL;
10593 if (!IsRV32E64)
10594 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10595 else
10596 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10597 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10598
10599 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10600 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10601 DAG.getUNDEF(IntVT), Mask, VL);
10602
10603 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10604 DAG.getUNDEF(VecVT), Mask, VL);
10605}
10606
10607SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10608 SelectionDAG &DAG) const {
10609 SDLoc DL(Op);
10610 SDValue V1 = Op.getOperand(0);
10611 SDValue V2 = Op.getOperand(1);
10612 MVT XLenVT = Subtarget.getXLenVT();
10613 MVT VecVT = Op.getSimpleValueType();
10614
10615 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10616
10617 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10618 SDValue DownOffset, UpOffset;
10619 if (ImmValue >= 0) {
10620 // The operand is a TargetConstant, we need to rebuild it as a regular
10621 // constant.
10622 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10623 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10624 } else {
10625 // The operand is a TargetConstant, we need to rebuild it as a regular
10626 // constant rather than negating the original operand.
10627 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10628 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10629 }
10630
10631 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10632
10633 SDValue SlideDown =
10634 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10635 DownOffset, TrueMask, UpOffset);
10636 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10637 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10639}
10640
10641SDValue
10642RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10643 SelectionDAG &DAG) const {
10644 SDLoc DL(Op);
10645 auto *Load = cast<LoadSDNode>(Op);
10646
10648 Load->getMemoryVT(),
10649 *Load->getMemOperand()) &&
10650 "Expecting a correctly-aligned load");
10651
10652 MVT VT = Op.getSimpleValueType();
10653 MVT XLenVT = Subtarget.getXLenVT();
10654 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10655
10656 // If we know the exact VLEN and our fixed length vector completely fills
10657 // the container, use a whole register load instead.
10658 const auto [MinVLMAX, MaxVLMAX] =
10659 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10660 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10661 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10662 MachineMemOperand *MMO = Load->getMemOperand();
10663 SDValue NewLoad =
10664 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10665 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10666 MMO->getAAInfo(), MMO->getRanges());
10667 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10668 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10669 }
10670
10671 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10672
10673 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10674 SDValue IntID = DAG.getTargetConstant(
10675 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10676 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10677 if (!IsMaskOp)
10678 Ops.push_back(DAG.getUNDEF(ContainerVT));
10679 Ops.push_back(Load->getBasePtr());
10680 Ops.push_back(VL);
10681 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10682 SDValue NewLoad =
10684 Load->getMemoryVT(), Load->getMemOperand());
10685
10686 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10687 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10688}
10689
10690SDValue
10691RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10692 SelectionDAG &DAG) const {
10693 SDLoc DL(Op);
10694 auto *Store = cast<StoreSDNode>(Op);
10695
10697 Store->getMemoryVT(),
10698 *Store->getMemOperand()) &&
10699 "Expecting a correctly-aligned store");
10700
10701 SDValue StoreVal = Store->getValue();
10702 MVT VT = StoreVal.getSimpleValueType();
10703 MVT XLenVT = Subtarget.getXLenVT();
10704
10705 // If the size less than a byte, we need to pad with zeros to make a byte.
10706 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10707 VT = MVT::v8i1;
10708 StoreVal =
10709 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10710 StoreVal, DAG.getVectorIdxConstant(0, DL));
10711 }
10712
10713 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10714
10715 SDValue NewValue =
10716 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10717
10718
10719 // If we know the exact VLEN and our fixed length vector completely fills
10720 // the container, use a whole register store instead.
10721 const auto [MinVLMAX, MaxVLMAX] =
10722 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10723 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10724 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10725 MachineMemOperand *MMO = Store->getMemOperand();
10726 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10727 MMO->getPointerInfo(), MMO->getBaseAlign(),
10728 MMO->getFlags(), MMO->getAAInfo());
10729 }
10730
10731 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10732 Subtarget);
10733
10734 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10735 SDValue IntID = DAG.getTargetConstant(
10736 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10737 return DAG.getMemIntrinsicNode(
10738 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10739 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10740 Store->getMemoryVT(), Store->getMemOperand());
10741}
10742
10743SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10744 SelectionDAG &DAG) const {
10745 SDLoc DL(Op);
10746 MVT VT = Op.getSimpleValueType();
10747
10748 const auto *MemSD = cast<MemSDNode>(Op);
10749 EVT MemVT = MemSD->getMemoryVT();
10750 MachineMemOperand *MMO = MemSD->getMemOperand();
10751 SDValue Chain = MemSD->getChain();
10752 SDValue BasePtr = MemSD->getBasePtr();
10753
10754 SDValue Mask, PassThru, VL;
10755 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10756 Mask = VPLoad->getMask();
10757 PassThru = DAG.getUNDEF(VT);
10758 VL = VPLoad->getVectorLength();
10759 } else {
10760 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10761 Mask = MLoad->getMask();
10762 PassThru = MLoad->getPassThru();
10763 }
10764
10765 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10766
10767 MVT XLenVT = Subtarget.getXLenVT();
10768
10769 MVT ContainerVT = VT;
10770 if (VT.isFixedLengthVector()) {
10771 ContainerVT = getContainerForFixedLengthVector(VT);
10772 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10773 if (!IsUnmasked) {
10774 MVT MaskVT = getMaskTypeFor(ContainerVT);
10775 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10776 }
10777 }
10778
10779 if (!VL)
10780 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10781
10782 unsigned IntID =
10783 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10784 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10785 if (IsUnmasked)
10786 Ops.push_back(DAG.getUNDEF(ContainerVT));
10787 else
10788 Ops.push_back(PassThru);
10789 Ops.push_back(BasePtr);
10790 if (!IsUnmasked)
10791 Ops.push_back(Mask);
10792 Ops.push_back(VL);
10793 if (!IsUnmasked)
10795
10796 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10797
10798 SDValue Result =
10799 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10800 Chain = Result.getValue(1);
10801
10802 if (VT.isFixedLengthVector())
10803 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10804
10805 return DAG.getMergeValues({Result, Chain}, DL);
10806}
10807
10808SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10809 SelectionDAG &DAG) const {
10810 SDLoc DL(Op);
10811
10812 const auto *MemSD = cast<MemSDNode>(Op);
10813 EVT MemVT = MemSD->getMemoryVT();
10814 MachineMemOperand *MMO = MemSD->getMemOperand();
10815 SDValue Chain = MemSD->getChain();
10816 SDValue BasePtr = MemSD->getBasePtr();
10817 SDValue Val, Mask, VL;
10818
10819 bool IsCompressingStore = false;
10820 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10821 Val = VPStore->getValue();
10822 Mask = VPStore->getMask();
10823 VL = VPStore->getVectorLength();
10824 } else {
10825 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10826 Val = MStore->getValue();
10827 Mask = MStore->getMask();
10828 IsCompressingStore = MStore->isCompressingStore();
10829 }
10830
10831 bool IsUnmasked =
10832 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10833
10834 MVT VT = Val.getSimpleValueType();
10835 MVT XLenVT = Subtarget.getXLenVT();
10836
10837 MVT ContainerVT = VT;
10838 if (VT.isFixedLengthVector()) {
10839 ContainerVT = getContainerForFixedLengthVector(VT);
10840
10841 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10842 if (!IsUnmasked || IsCompressingStore) {
10843 MVT MaskVT = getMaskTypeFor(ContainerVT);
10844 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10845 }
10846 }
10847
10848 if (!VL)
10849 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10850
10851 if (IsCompressingStore) {
10852 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10853 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10854 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10855 VL =
10856 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10857 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10858 }
10859
10860 unsigned IntID =
10861 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10862 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10863 Ops.push_back(Val);
10864 Ops.push_back(BasePtr);
10865 if (!IsUnmasked)
10866 Ops.push_back(Mask);
10867 Ops.push_back(VL);
10868
10870 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10871}
10872
10873SDValue
10874RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10875 SelectionDAG &DAG) const {
10876 MVT InVT = Op.getOperand(0).getSimpleValueType();
10877 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10878
10879 MVT VT = Op.getSimpleValueType();
10880
10881 SDValue Op1 =
10882 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10883 SDValue Op2 =
10884 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10885
10886 SDLoc DL(Op);
10887 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10888 DAG, Subtarget);
10889 MVT MaskVT = getMaskTypeFor(ContainerVT);
10890
10891 SDValue Cmp =
10892 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10893 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10894
10895 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10896}
10897
10898SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10899 SelectionDAG &DAG) const {
10900 unsigned Opc = Op.getOpcode();
10901 SDLoc DL(Op);
10902 SDValue Chain = Op.getOperand(0);
10903 SDValue Op1 = Op.getOperand(1);
10904 SDValue Op2 = Op.getOperand(2);
10905 SDValue CC = Op.getOperand(3);
10906 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10907 MVT VT = Op.getSimpleValueType();
10908 MVT InVT = Op1.getSimpleValueType();
10909
10910 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10911 // condition code.
10912 if (Opc == ISD::STRICT_FSETCCS) {
10913 // Expand strict_fsetccs(x, oeq) to
10914 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10915 SDVTList VTList = Op->getVTList();
10916 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10917 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10918 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10919 Op2, OLECCVal);
10920 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10921 Op1, OLECCVal);
10922 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10923 Tmp1.getValue(1), Tmp2.getValue(1));
10924 // Tmp1 and Tmp2 might be the same node.
10925 if (Tmp1 != Tmp2)
10926 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10927 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10928 }
10929
10930 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10931 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10932 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10933 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10934 Op2, OEQCCVal);
10935 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10936 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10937 }
10938 }
10939
10940 MVT ContainerInVT = InVT;
10941 if (InVT.isFixedLengthVector()) {
10942 ContainerInVT = getContainerForFixedLengthVector(InVT);
10943 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10944 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10945 }
10946 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10947
10948 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10949
10950 SDValue Res;
10951 if (Opc == ISD::STRICT_FSETCC &&
10952 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10953 CCVal == ISD::SETOLE)) {
10954 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10955 // active when both input elements are ordered.
10956 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10957 SDValue OrderMask1 = DAG.getNode(
10958 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10959 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10960 True, VL});
10961 SDValue OrderMask2 = DAG.getNode(
10962 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10963 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10964 True, VL});
10965 Mask =
10966 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10967 // Use Mask as the merge operand to let the result be 0 if either of the
10968 // inputs is unordered.
10970 DAG.getVTList(MaskVT, MVT::Other),
10971 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10972 } else {
10973 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10975 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10976 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10977 }
10978
10979 if (VT.isFixedLengthVector()) {
10980 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10981 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10982 }
10983 return Res;
10984}
10985
10986// Lower vector ABS to smax(X, sub(0, X)).
10987SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10988 SDLoc DL(Op);
10989 MVT VT = Op.getSimpleValueType();
10990 SDValue X = Op.getOperand(0);
10991
10992 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10993 "Unexpected type for ISD::ABS");
10994
10995 MVT ContainerVT = VT;
10996 if (VT.isFixedLengthVector()) {
10997 ContainerVT = getContainerForFixedLengthVector(VT);
10998 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10999 }
11000
11001 SDValue Mask, VL;
11002 if (Op->getOpcode() == ISD::VP_ABS) {
11003 Mask = Op->getOperand(1);
11004 if (VT.isFixedLengthVector())
11005 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11006 Subtarget);
11007 VL = Op->getOperand(2);
11008 } else
11009 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11010
11011 SDValue SplatZero = DAG.getNode(
11012 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11013 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11014 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11015 DAG.getUNDEF(ContainerVT), Mask, VL);
11016 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11017 DAG.getUNDEF(ContainerVT), Mask, VL);
11018
11019 if (VT.isFixedLengthVector())
11020 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11021 return Max;
11022}
11023
11024SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11025 SDValue Op, SelectionDAG &DAG) const {
11026 SDLoc DL(Op);
11027 MVT VT = Op.getSimpleValueType();
11028 SDValue Mag = Op.getOperand(0);
11029 SDValue Sign = Op.getOperand(1);
11030 assert(Mag.getValueType() == Sign.getValueType() &&
11031 "Can only handle COPYSIGN with matching types.");
11032
11033 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11034 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11035 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11036
11037 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11038
11039 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11040 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11041
11042 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11043}
11044
11045SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11046 SDValue Op, SelectionDAG &DAG) const {
11047 MVT VT = Op.getSimpleValueType();
11048 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11049
11050 MVT I1ContainerVT =
11051 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11052
11053 SDValue CC =
11054 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11055 SDValue Op1 =
11056 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11057 SDValue Op2 =
11058 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11059
11060 SDLoc DL(Op);
11061 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11062
11063 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11064 Op2, DAG.getUNDEF(ContainerVT), VL);
11065
11066 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11067}
11068
11069SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11070 SelectionDAG &DAG) const {
11071 unsigned NewOpc = getRISCVVLOp(Op);
11072 bool HasMergeOp = hasMergeOp(NewOpc);
11073 bool HasMask = hasMaskOp(NewOpc);
11074
11075 MVT VT = Op.getSimpleValueType();
11076 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11077
11078 // Create list of operands by converting existing ones to scalable types.
11080 for (const SDValue &V : Op->op_values()) {
11081 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11082
11083 // Pass through non-vector operands.
11084 if (!V.getValueType().isVector()) {
11085 Ops.push_back(V);
11086 continue;
11087 }
11088
11089 // "cast" fixed length vector to a scalable vector.
11090 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11091 "Only fixed length vectors are supported!");
11092 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11093 }
11094
11095 SDLoc DL(Op);
11096 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11097 if (HasMergeOp)
11098 Ops.push_back(DAG.getUNDEF(ContainerVT));
11099 if (HasMask)
11100 Ops.push_back(Mask);
11101 Ops.push_back(VL);
11102
11103 // StrictFP operations have two result values. Their lowered result should
11104 // have same result count.
11105 if (Op->isStrictFPOpcode()) {
11106 SDValue ScalableRes =
11107 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11108 Op->getFlags());
11109 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11110 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11111 }
11112
11113 SDValue ScalableRes =
11114 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11115 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11116}
11117
11118// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11119// * Operands of each node are assumed to be in the same order.
11120// * The EVL operand is promoted from i32 to i64 on RV64.
11121// * Fixed-length vectors are converted to their scalable-vector container
11122// types.
11123SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11124 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11125 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
11126
11127 SDLoc DL(Op);
11128 MVT VT = Op.getSimpleValueType();
11130
11131 MVT ContainerVT = VT;
11132 if (VT.isFixedLengthVector())
11133 ContainerVT = getContainerForFixedLengthVector(VT);
11134
11135 for (const auto &OpIdx : enumerate(Op->ops())) {
11136 SDValue V = OpIdx.value();
11137 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11138 // Add dummy merge value before the mask. Or if there isn't a mask, before
11139 // EVL.
11140 if (HasMergeOp) {
11141 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11142 if (MaskIdx) {
11143 if (*MaskIdx == OpIdx.index())
11144 Ops.push_back(DAG.getUNDEF(ContainerVT));
11145 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11146 OpIdx.index()) {
11147 if (Op.getOpcode() == ISD::VP_MERGE) {
11148 // For VP_MERGE, copy the false operand instead of an undef value.
11149 Ops.push_back(Ops.back());
11150 } else {
11151 assert(Op.getOpcode() == ISD::VP_SELECT);
11152 // For VP_SELECT, add an undef value.
11153 Ops.push_back(DAG.getUNDEF(ContainerVT));
11154 }
11155 }
11156 }
11157 // Pass through operands which aren't fixed-length vectors.
11158 if (!V.getValueType().isFixedLengthVector()) {
11159 Ops.push_back(V);
11160 continue;
11161 }
11162 // "cast" fixed length vector to a scalable vector.
11163 MVT OpVT = V.getSimpleValueType();
11164 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11165 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11166 "Only fixed length vectors are supported!");
11167 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11168 }
11169
11170 if (!VT.isFixedLengthVector())
11171 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11172
11173 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11174
11175 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11176}
11177
11178SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11179 SelectionDAG &DAG) const {
11180 SDLoc DL(Op);
11181 MVT VT = Op.getSimpleValueType();
11182
11183 SDValue Src = Op.getOperand(0);
11184 // NOTE: Mask is dropped.
11185 SDValue VL = Op.getOperand(2);
11186
11187 MVT ContainerVT = VT;
11188 if (VT.isFixedLengthVector()) {
11189 ContainerVT = getContainerForFixedLengthVector(VT);
11190 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11191 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11192 }
11193
11194 MVT XLenVT = Subtarget.getXLenVT();
11195 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11196 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11197 DAG.getUNDEF(ContainerVT), Zero, VL);
11198
11199 SDValue SplatValue = DAG.getConstant(
11200 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11201 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11202 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11203
11204 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11205 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11206 if (!VT.isFixedLengthVector())
11207 return Result;
11208 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11209}
11210
11211SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11212 SelectionDAG &DAG) const {
11213 SDLoc DL(Op);
11214 MVT VT = Op.getSimpleValueType();
11215
11216 SDValue Op1 = Op.getOperand(0);
11217 SDValue Op2 = Op.getOperand(1);
11218 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11219 // NOTE: Mask is dropped.
11220 SDValue VL = Op.getOperand(4);
11221
11222 MVT ContainerVT = VT;
11223 if (VT.isFixedLengthVector()) {
11224 ContainerVT = getContainerForFixedLengthVector(VT);
11225 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11226 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11227 }
11228
11230 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11231
11232 switch (Condition) {
11233 default:
11234 break;
11235 // X != Y --> (X^Y)
11236 case ISD::SETNE:
11237 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11238 break;
11239 // X == Y --> ~(X^Y)
11240 case ISD::SETEQ: {
11241 SDValue Temp =
11242 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11243 Result =
11244 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11245 break;
11246 }
11247 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11248 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11249 case ISD::SETGT:
11250 case ISD::SETULT: {
11251 SDValue Temp =
11252 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11253 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11254 break;
11255 }
11256 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11257 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11258 case ISD::SETLT:
11259 case ISD::SETUGT: {
11260 SDValue Temp =
11261 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11262 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11263 break;
11264 }
11265 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11266 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11267 case ISD::SETGE:
11268 case ISD::SETULE: {
11269 SDValue Temp =
11270 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11271 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11272 break;
11273 }
11274 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11275 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11276 case ISD::SETLE:
11277 case ISD::SETUGE: {
11278 SDValue Temp =
11279 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11280 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11281 break;
11282 }
11283 }
11284
11285 if (!VT.isFixedLengthVector())
11286 return Result;
11287 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11288}
11289
11290// Lower Floating-Point/Integer Type-Convert VP SDNodes
11291SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11292 SelectionDAG &DAG) const {
11293 SDLoc DL(Op);
11294
11295 SDValue Src = Op.getOperand(0);
11296 SDValue Mask = Op.getOperand(1);
11297 SDValue VL = Op.getOperand(2);
11298 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11299
11300 MVT DstVT = Op.getSimpleValueType();
11301 MVT SrcVT = Src.getSimpleValueType();
11302 if (DstVT.isFixedLengthVector()) {
11303 DstVT = getContainerForFixedLengthVector(DstVT);
11304 SrcVT = getContainerForFixedLengthVector(SrcVT);
11305 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11306 MVT MaskVT = getMaskTypeFor(DstVT);
11307 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11308 }
11309
11310 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11311 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11312
11314 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11315 if (SrcVT.isInteger()) {
11316 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11317
11318 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11321
11322 // Do we need to do any pre-widening before converting?
11323 if (SrcEltSize == 1) {
11324 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11325 MVT XLenVT = Subtarget.getXLenVT();
11326 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11327 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11328 DAG.getUNDEF(IntVT), Zero, VL);
11329 SDValue One = DAG.getConstant(
11330 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11331 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11332 DAG.getUNDEF(IntVT), One, VL);
11333 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11334 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11335 } else if (DstEltSize > (2 * SrcEltSize)) {
11336 // Widen before converting.
11337 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11338 DstVT.getVectorElementCount());
11339 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11340 }
11341
11342 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11343 } else {
11344 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11345 "Wrong input/output vector types");
11346
11347 // Convert f16 to f32 then convert f32 to i64.
11348 if (DstEltSize > (2 * SrcEltSize)) {
11349 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11350 MVT InterimFVT =
11351 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11352 Src =
11353 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11354 }
11355
11356 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11357 }
11358 } else { // Narrowing + Conversion
11359 if (SrcVT.isInteger()) {
11360 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11361 // First do a narrowing convert to an FP type half the size, then round
11362 // the FP type to a small FP type if needed.
11363
11364 MVT InterimFVT = DstVT;
11365 if (SrcEltSize > (2 * DstEltSize)) {
11366 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11367 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11368 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11369 }
11370
11371 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11372
11373 if (InterimFVT != DstVT) {
11374 Src = Result;
11375 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11376 }
11377 } else {
11378 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11379 "Wrong input/output vector types");
11380 // First do a narrowing conversion to an integer half the size, then
11381 // truncate if needed.
11382
11383 if (DstEltSize == 1) {
11384 // First convert to the same size integer, then convert to mask using
11385 // setcc.
11386 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11387 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11388 DstVT.getVectorElementCount());
11389 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11390
11391 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11392 // otherwise the conversion was undefined.
11393 MVT XLenVT = Subtarget.getXLenVT();
11394 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11395 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11396 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11397 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11398 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11399 DAG.getUNDEF(DstVT), Mask, VL});
11400 } else {
11401 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11402 DstVT.getVectorElementCount());
11403
11404 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11405
11406 while (InterimIVT != DstVT) {
11407 SrcEltSize /= 2;
11408 Src = Result;
11409 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11410 DstVT.getVectorElementCount());
11411 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11412 Src, Mask, VL);
11413 }
11414 }
11415 }
11416 }
11417
11418 MVT VT = Op.getSimpleValueType();
11419 if (!VT.isFixedLengthVector())
11420 return Result;
11421 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11422}
11423
11424SDValue
11425RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11426 SelectionDAG &DAG) const {
11427 SDLoc DL(Op);
11428
11429 SDValue Op1 = Op.getOperand(0);
11430 SDValue Op2 = Op.getOperand(1);
11431 SDValue Offset = Op.getOperand(2);
11432 SDValue Mask = Op.getOperand(3);
11433 SDValue EVL1 = Op.getOperand(4);
11434 SDValue EVL2 = Op.getOperand(5);
11435
11436 const MVT XLenVT = Subtarget.getXLenVT();
11437 MVT VT = Op.getSimpleValueType();
11438 MVT ContainerVT = VT;
11439 if (VT.isFixedLengthVector()) {
11440 ContainerVT = getContainerForFixedLengthVector(VT);
11441 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11442 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11443 MVT MaskVT = getMaskTypeFor(ContainerVT);
11444 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11445 }
11446
11447 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11448 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11449
11450 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11451 if (IsMaskVector) {
11452 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11453
11454 // Expand input operands
11455 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11456 DAG.getUNDEF(ContainerVT),
11457 DAG.getConstant(1, DL, XLenVT), EVL1);
11458 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11459 DAG.getUNDEF(ContainerVT),
11460 DAG.getConstant(0, DL, XLenVT), EVL1);
11461 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11462 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11463
11464 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11465 DAG.getUNDEF(ContainerVT),
11466 DAG.getConstant(1, DL, XLenVT), EVL2);
11467 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11468 DAG.getUNDEF(ContainerVT),
11469 DAG.getConstant(0, DL, XLenVT), EVL2);
11470 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11471 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11472 }
11473
11474 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11475 SDValue DownOffset, UpOffset;
11476 if (ImmValue >= 0) {
11477 // The operand is a TargetConstant, we need to rebuild it as a regular
11478 // constant.
11479 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11480 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11481 } else {
11482 // The operand is a TargetConstant, we need to rebuild it as a regular
11483 // constant rather than negating the original operand.
11484 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11485 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11486 }
11487
11488 SDValue SlideDown =
11489 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11490 Op1, DownOffset, Mask, UpOffset);
11491 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11492 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11493
11494 if (IsMaskVector) {
11495 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11496 Result = DAG.getNode(
11497 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11498 {Result, DAG.getConstant(0, DL, ContainerVT),
11499 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11500 Mask, EVL2});
11501 }
11502
11503 if (!VT.isFixedLengthVector())
11504 return Result;
11505 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11506}
11507
11508SDValue
11509RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11510 SelectionDAG &DAG) const {
11511 SDLoc DL(Op);
11512 MVT VT = Op.getSimpleValueType();
11513 MVT XLenVT = Subtarget.getXLenVT();
11514
11515 SDValue Op1 = Op.getOperand(0);
11516 SDValue Mask = Op.getOperand(1);
11517 SDValue EVL = Op.getOperand(2);
11518
11519 MVT ContainerVT = VT;
11520 if (VT.isFixedLengthVector()) {
11521 ContainerVT = getContainerForFixedLengthVector(VT);
11522 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11523 MVT MaskVT = getMaskTypeFor(ContainerVT);
11524 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11525 }
11526
11527 MVT GatherVT = ContainerVT;
11528 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11529 // Check if we are working with mask vectors
11530 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11531 if (IsMaskVector) {
11532 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11533
11534 // Expand input operand
11535 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11536 DAG.getUNDEF(IndicesVT),
11537 DAG.getConstant(1, DL, XLenVT), EVL);
11538 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11539 DAG.getUNDEF(IndicesVT),
11540 DAG.getConstant(0, DL, XLenVT), EVL);
11541 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11542 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11543 }
11544
11545 unsigned EltSize = GatherVT.getScalarSizeInBits();
11546 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11547 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11548 unsigned MaxVLMAX =
11549 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11550
11551 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11552 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11553 // to use vrgatherei16.vv.
11554 // TODO: It's also possible to use vrgatherei16.vv for other types to
11555 // decrease register width for the index calculation.
11556 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11557 if (MaxVLMAX > 256 && EltSize == 8) {
11558 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11559 // Split the vector in half and reverse each half using a full register
11560 // reverse.
11561 // Swap the halves and concatenate them.
11562 // Slide the concatenated result by (VLMax - VL).
11563 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11564 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11565 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11566
11567 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11568 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11569
11570 // Reassemble the low and high pieces reversed.
11571 // NOTE: this Result is unmasked (because we do not need masks for
11572 // shuffles). If in the future this has to change, we can use a SELECT_VL
11573 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11574 SDValue Result =
11575 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11576
11577 // Slide off any elements from past EVL that were reversed into the low
11578 // elements.
11579 unsigned MinElts = GatherVT.getVectorMinNumElements();
11580 SDValue VLMax =
11581 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11582 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11583
11584 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11585 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11586
11587 if (IsMaskVector) {
11588 // Truncate Result back to a mask vector
11589 Result =
11590 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11591 {Result, DAG.getConstant(0, DL, GatherVT),
11593 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11594 }
11595
11596 if (!VT.isFixedLengthVector())
11597 return Result;
11598 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11599 }
11600
11601 // Just promote the int type to i16 which will double the LMUL.
11602 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11603 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11604 }
11605
11606 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11607 SDValue VecLen =
11608 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11609 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11610 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11611 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11612 DAG.getUNDEF(IndicesVT), Mask, EVL);
11613 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11614 DAG.getUNDEF(GatherVT), Mask, EVL);
11615
11616 if (IsMaskVector) {
11617 // Truncate Result back to a mask vector
11618 Result = DAG.getNode(
11619 RISCVISD::SETCC_VL, DL, ContainerVT,
11620 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11621 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11622 }
11623
11624 if (!VT.isFixedLengthVector())
11625 return Result;
11626 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11627}
11628
11629SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11630 SelectionDAG &DAG) const {
11631 MVT VT = Op.getSimpleValueType();
11632 if (VT.getVectorElementType() != MVT::i1)
11633 return lowerVPOp(Op, DAG);
11634
11635 // It is safe to drop mask parameter as masked-off elements are undef.
11636 SDValue Op1 = Op->getOperand(0);
11637 SDValue Op2 = Op->getOperand(1);
11638 SDValue VL = Op->getOperand(3);
11639
11640 MVT ContainerVT = VT;
11641 const bool IsFixed = VT.isFixedLengthVector();
11642 if (IsFixed) {
11643 ContainerVT = getContainerForFixedLengthVector(VT);
11644 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11645 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11646 }
11647
11648 SDLoc DL(Op);
11649 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11650 if (!IsFixed)
11651 return Val;
11652 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11653}
11654
11655SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11656 SelectionDAG &DAG) const {
11657 SDLoc DL(Op);
11658 MVT XLenVT = Subtarget.getXLenVT();
11659 MVT VT = Op.getSimpleValueType();
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector())
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663
11664 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11665
11666 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11667 // Check if the mask is known to be all ones
11668 SDValue Mask = VPNode->getMask();
11669 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11670
11671 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11672 : Intrinsic::riscv_vlse_mask,
11673 DL, XLenVT);
11674 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11675 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11676 VPNode->getStride()};
11677 if (!IsUnmasked) {
11678 if (VT.isFixedLengthVector()) {
11679 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11680 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11681 }
11682 Ops.push_back(Mask);
11683 }
11684 Ops.push_back(VPNode->getVectorLength());
11685 if (!IsUnmasked) {
11686 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11687 Ops.push_back(Policy);
11688 }
11689
11690 SDValue Result =
11692 VPNode->getMemoryVT(), VPNode->getMemOperand());
11693 SDValue Chain = Result.getValue(1);
11694
11695 if (VT.isFixedLengthVector())
11696 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11697
11698 return DAG.getMergeValues({Result, Chain}, DL);
11699}
11700
11701SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11702 SelectionDAG &DAG) const {
11703 SDLoc DL(Op);
11704 MVT XLenVT = Subtarget.getXLenVT();
11705
11706 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11707 SDValue StoreVal = VPNode->getValue();
11708 MVT VT = StoreVal.getSimpleValueType();
11709 MVT ContainerVT = VT;
11710 if (VT.isFixedLengthVector()) {
11711 ContainerVT = getContainerForFixedLengthVector(VT);
11712 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11713 }
11714
11715 // Check if the mask is known to be all ones
11716 SDValue Mask = VPNode->getMask();
11717 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11718
11719 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11720 : Intrinsic::riscv_vsse_mask,
11721 DL, XLenVT);
11722 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11723 VPNode->getBasePtr(), VPNode->getStride()};
11724 if (!IsUnmasked) {
11725 if (VT.isFixedLengthVector()) {
11726 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11727 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11728 }
11729 Ops.push_back(Mask);
11730 }
11731 Ops.push_back(VPNode->getVectorLength());
11732
11733 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11734 Ops, VPNode->getMemoryVT(),
11735 VPNode->getMemOperand());
11736}
11737
11738// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11739// matched to a RVV indexed load. The RVV indexed load instructions only
11740// support the "unsigned unscaled" addressing mode; indices are implicitly
11741// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11742// signed or scaled indexing is extended to the XLEN value type and scaled
11743// accordingly.
11744SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11745 SelectionDAG &DAG) const {
11746 SDLoc DL(Op);
11747 MVT VT = Op.getSimpleValueType();
11748
11749 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11750 EVT MemVT = MemSD->getMemoryVT();
11751 MachineMemOperand *MMO = MemSD->getMemOperand();
11752 SDValue Chain = MemSD->getChain();
11753 SDValue BasePtr = MemSD->getBasePtr();
11754
11755 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11756 SDValue Index, Mask, PassThru, VL;
11757
11758 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11759 Index = VPGN->getIndex();
11760 Mask = VPGN->getMask();
11761 PassThru = DAG.getUNDEF(VT);
11762 VL = VPGN->getVectorLength();
11763 // VP doesn't support extending loads.
11765 } else {
11766 // Else it must be a MGATHER.
11767 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11768 Index = MGN->getIndex();
11769 Mask = MGN->getMask();
11770 PassThru = MGN->getPassThru();
11771 LoadExtType = MGN->getExtensionType();
11772 }
11773
11774 MVT IndexVT = Index.getSimpleValueType();
11775 MVT XLenVT = Subtarget.getXLenVT();
11776
11778 "Unexpected VTs!");
11779 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11780 // Targets have to explicitly opt-in for extending vector loads.
11781 assert(LoadExtType == ISD::NON_EXTLOAD &&
11782 "Unexpected extending MGATHER/VP_GATHER");
11783
11784 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11785 // the selection of the masked intrinsics doesn't do this for us.
11786 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11787
11788 MVT ContainerVT = VT;
11789 if (VT.isFixedLengthVector()) {
11790 ContainerVT = getContainerForFixedLengthVector(VT);
11791 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11792 ContainerVT.getVectorElementCount());
11793
11794 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11795
11796 if (!IsUnmasked) {
11797 MVT MaskVT = getMaskTypeFor(ContainerVT);
11798 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11799 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11800 }
11801 }
11802
11803 if (!VL)
11804 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11805
11806 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11807 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11808 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11809 }
11810
11811 unsigned IntID =
11812 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11813 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11814 if (IsUnmasked)
11815 Ops.push_back(DAG.getUNDEF(ContainerVT));
11816 else
11817 Ops.push_back(PassThru);
11818 Ops.push_back(BasePtr);
11819 Ops.push_back(Index);
11820 if (!IsUnmasked)
11821 Ops.push_back(Mask);
11822 Ops.push_back(VL);
11823 if (!IsUnmasked)
11825
11826 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11827 SDValue Result =
11828 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11829 Chain = Result.getValue(1);
11830
11831 if (VT.isFixedLengthVector())
11832 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11833
11834 return DAG.getMergeValues({Result, Chain}, DL);
11835}
11836
11837// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11838// matched to a RVV indexed store. The RVV indexed store instructions only
11839// support the "unsigned unscaled" addressing mode; indices are implicitly
11840// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11841// signed or scaled indexing is extended to the XLEN value type and scaled
11842// accordingly.
11843SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11844 SelectionDAG &DAG) const {
11845 SDLoc DL(Op);
11846 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11847 EVT MemVT = MemSD->getMemoryVT();
11848 MachineMemOperand *MMO = MemSD->getMemOperand();
11849 SDValue Chain = MemSD->getChain();
11850 SDValue BasePtr = MemSD->getBasePtr();
11851
11852 [[maybe_unused]] bool IsTruncatingStore = false;
11853 SDValue Index, Mask, Val, VL;
11854
11855 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11856 Index = VPSN->getIndex();
11857 Mask = VPSN->getMask();
11858 Val = VPSN->getValue();
11859 VL = VPSN->getVectorLength();
11860 // VP doesn't support truncating stores.
11861 IsTruncatingStore = false;
11862 } else {
11863 // Else it must be a MSCATTER.
11864 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11865 Index = MSN->getIndex();
11866 Mask = MSN->getMask();
11867 Val = MSN->getValue();
11868 IsTruncatingStore = MSN->isTruncatingStore();
11869 }
11870
11871 MVT VT = Val.getSimpleValueType();
11872 MVT IndexVT = Index.getSimpleValueType();
11873 MVT XLenVT = Subtarget.getXLenVT();
11874
11876 "Unexpected VTs!");
11877 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11878 // Targets have to explicitly opt-in for extending vector loads and
11879 // truncating vector stores.
11880 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11881
11882 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11883 // the selection of the masked intrinsics doesn't do this for us.
11884 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11885
11886 MVT ContainerVT = VT;
11887 if (VT.isFixedLengthVector()) {
11888 ContainerVT = getContainerForFixedLengthVector(VT);
11889 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11890 ContainerVT.getVectorElementCount());
11891
11892 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11893 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11894
11895 if (!IsUnmasked) {
11896 MVT MaskVT = getMaskTypeFor(ContainerVT);
11897 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11898 }
11899 }
11900
11901 if (!VL)
11902 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11903
11904 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11905 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11906 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11907 }
11908
11909 unsigned IntID =
11910 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11911 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11912 Ops.push_back(Val);
11913 Ops.push_back(BasePtr);
11914 Ops.push_back(Index);
11915 if (!IsUnmasked)
11916 Ops.push_back(Mask);
11917 Ops.push_back(VL);
11918
11920 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11921}
11922
11923SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11924 SelectionDAG &DAG) const {
11925 const MVT XLenVT = Subtarget.getXLenVT();
11926 SDLoc DL(Op);
11927 SDValue Chain = Op->getOperand(0);
11928 SDValue SysRegNo = DAG.getTargetConstant(
11929 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11930 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11931 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11932
11933 // Encoding used for rounding mode in RISC-V differs from that used in
11934 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11935 // table, which consists of a sequence of 4-bit fields, each representing
11936 // corresponding FLT_ROUNDS mode.
11937 static const int Table =
11943
11944 SDValue Shift =
11945 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11946 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11947 DAG.getConstant(Table, DL, XLenVT), Shift);
11948 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11949 DAG.getConstant(7, DL, XLenVT));
11950
11951 return DAG.getMergeValues({Masked, Chain}, DL);
11952}
11953
11954SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11955 SelectionDAG &DAG) const {
11956 const MVT XLenVT = Subtarget.getXLenVT();
11957 SDLoc DL(Op);
11958 SDValue Chain = Op->getOperand(0);
11959 SDValue RMValue = Op->getOperand(1);
11960 SDValue SysRegNo = DAG.getTargetConstant(
11961 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11962
11963 // Encoding used for rounding mode in RISC-V differs from that used in
11964 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11965 // a table, which consists of a sequence of 4-bit fields, each representing
11966 // corresponding RISC-V mode.
11967 static const unsigned Table =
11973
11974 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11975
11976 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11977 DAG.getConstant(2, DL, XLenVT));
11978 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11979 DAG.getConstant(Table, DL, XLenVT), Shift);
11980 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11981 DAG.getConstant(0x7, DL, XLenVT));
11982 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11983 RMValue);
11984}
11985
11986SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11987 SelectionDAG &DAG) const {
11989
11990 bool isRISCV64 = Subtarget.is64Bit();
11991 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11992
11993 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11994 return DAG.getFrameIndex(FI, PtrVT);
11995}
11996
11997// Returns the opcode of the target-specific SDNode that implements the 32-bit
11998// form of the given Opcode.
11999static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12000 switch (Opcode) {
12001 default:
12002 llvm_unreachable("Unexpected opcode");
12003 case ISD::SHL:
12004 return RISCVISD::SLLW;
12005 case ISD::SRA:
12006 return RISCVISD::SRAW;
12007 case ISD::SRL:
12008 return RISCVISD::SRLW;
12009 case ISD::SDIV:
12010 return RISCVISD::DIVW;
12011 case ISD::UDIV:
12012 return RISCVISD::DIVUW;
12013 case ISD::UREM:
12014 return RISCVISD::REMUW;
12015 case ISD::ROTL:
12016 return RISCVISD::ROLW;
12017 case ISD::ROTR:
12018 return RISCVISD::RORW;
12019 }
12020}
12021
12022// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12023// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12024// otherwise be promoted to i64, making it difficult to select the
12025// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12026// type i8/i16/i32 is lost.
12028 unsigned ExtOpc = ISD::ANY_EXTEND) {
12029 SDLoc DL(N);
12030 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12031 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12032 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12033 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12034 // ReplaceNodeResults requires we maintain the same type for the return value.
12035 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12036}
12037
12038// Converts the given 32-bit operation to a i64 operation with signed extension
12039// semantic to reduce the signed extension instructions.
12041 SDLoc DL(N);
12042 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12043 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12044 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12045 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12046 DAG.getValueType(MVT::i32));
12047 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12048}
12049
12052 SelectionDAG &DAG) const {
12053 SDLoc DL(N);
12054 switch (N->getOpcode()) {
12055 default:
12056 llvm_unreachable("Don't know how to custom type legalize this operation!");
12059 case ISD::FP_TO_SINT:
12060 case ISD::FP_TO_UINT: {
12061 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12062 "Unexpected custom legalisation");
12063 bool IsStrict = N->isStrictFPOpcode();
12064 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12065 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12066 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12067 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12069 if (!isTypeLegal(Op0.getValueType()))
12070 return;
12071 if (IsStrict) {
12072 SDValue Chain = N->getOperand(0);
12073 // In absense of Zfh, promote f16 to f32, then convert.
12074 if (Op0.getValueType() == MVT::f16 &&
12075 !Subtarget.hasStdExtZfhOrZhinx()) {
12076 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12077 {Chain, Op0});
12078 Chain = Op0.getValue(1);
12079 }
12080 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12082 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12083 SDValue Res = DAG.getNode(
12084 Opc, DL, VTs, Chain, Op0,
12085 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12086 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12087 Results.push_back(Res.getValue(1));
12088 return;
12089 }
12090 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12091 // convert.
12092 if ((Op0.getValueType() == MVT::f16 &&
12093 !Subtarget.hasStdExtZfhOrZhinx()) ||
12094 Op0.getValueType() == MVT::bf16)
12095 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12096
12097 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12098 SDValue Res =
12099 DAG.getNode(Opc, DL, MVT::i64, Op0,
12100 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12101 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12102 return;
12103 }
12104 // If the FP type needs to be softened, emit a library call using the 'si'
12105 // version. If we left it to default legalization we'd end up with 'di'. If
12106 // the FP type doesn't need to be softened just let generic type
12107 // legalization promote the result type.
12108 RTLIB::Libcall LC;
12109 if (IsSigned)
12110 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12111 else
12112 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12113 MakeLibCallOptions CallOptions;
12114 EVT OpVT = Op0.getValueType();
12115 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12116 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12117 SDValue Result;
12118 std::tie(Result, Chain) =
12119 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12120 Results.push_back(Result);
12121 if (IsStrict)
12122 Results.push_back(Chain);
12123 break;
12124 }
12125 case ISD::LROUND: {
12126 SDValue Op0 = N->getOperand(0);
12127 EVT Op0VT = Op0.getValueType();
12128 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12130 if (!isTypeLegal(Op0VT))
12131 return;
12132
12133 // In absense of Zfh, promote f16 to f32, then convert.
12134 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12135 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12136
12137 SDValue Res =
12138 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12139 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12140 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12141 return;
12142 }
12143 // If the FP type needs to be softened, emit a library call to lround. We'll
12144 // need to truncate the result. We assume any value that doesn't fit in i32
12145 // is allowed to return an unspecified value.
12146 RTLIB::Libcall LC =
12147 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12148 MakeLibCallOptions CallOptions;
12149 EVT OpVT = Op0.getValueType();
12150 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12151 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12152 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12153 Results.push_back(Result);
12154 break;
12155 }
12158 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12159 "has custom type legalization on riscv32");
12160
12161 SDValue LoCounter, HiCounter;
12162 MVT XLenVT = Subtarget.getXLenVT();
12163 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12164 LoCounter = DAG.getTargetConstant(
12165 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12166 HiCounter = DAG.getTargetConstant(
12167 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12168 } else {
12169 LoCounter = DAG.getTargetConstant(
12170 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12171 HiCounter = DAG.getTargetConstant(
12172 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12173 }
12174 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12176 N->getOperand(0), LoCounter, HiCounter);
12177
12178 Results.push_back(
12179 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12180 Results.push_back(RCW.getValue(2));
12181 break;
12182 }
12183 case ISD::LOAD: {
12184 if (!ISD::isNON_EXTLoad(N))
12185 return;
12186
12187 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12188 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12189 LoadSDNode *Ld = cast<LoadSDNode>(N);
12190
12191 SDLoc dl(N);
12192 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12193 Ld->getBasePtr(), Ld->getMemoryVT(),
12194 Ld->getMemOperand());
12195 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12196 Results.push_back(Res.getValue(1));
12197 return;
12198 }
12199 case ISD::MUL: {
12200 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12201 unsigned XLen = Subtarget.getXLen();
12202 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12203 if (Size > XLen) {
12204 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12205 SDValue LHS = N->getOperand(0);
12206 SDValue RHS = N->getOperand(1);
12207 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12208
12209 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12210 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12211 // We need exactly one side to be unsigned.
12212 if (LHSIsU == RHSIsU)
12213 return;
12214
12215 auto MakeMULPair = [&](SDValue S, SDValue U) {
12216 MVT XLenVT = Subtarget.getXLenVT();
12217 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12218 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12219 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12220 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12221 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12222 };
12223
12224 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12225 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12226
12227 // The other operand should be signed, but still prefer MULH when
12228 // possible.
12229 if (RHSIsU && LHSIsS && !RHSIsS)
12230 Results.push_back(MakeMULPair(LHS, RHS));
12231 else if (LHSIsU && RHSIsS && !LHSIsS)
12232 Results.push_back(MakeMULPair(RHS, LHS));
12233
12234 return;
12235 }
12236 [[fallthrough]];
12237 }
12238 case ISD::ADD:
12239 case ISD::SUB:
12240 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12241 "Unexpected custom legalisation");
12242 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12243 break;
12244 case ISD::SHL:
12245 case ISD::SRA:
12246 case ISD::SRL:
12247 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12248 "Unexpected custom legalisation");
12249 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12250 // If we can use a BSET instruction, allow default promotion to apply.
12251 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12252 isOneConstant(N->getOperand(0)))
12253 break;
12254 Results.push_back(customLegalizeToWOp(N, DAG));
12255 break;
12256 }
12257
12258 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12259 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12260 // shift amount.
12261 if (N->getOpcode() == ISD::SHL) {
12262 SDLoc DL(N);
12263 SDValue NewOp0 =
12264 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12265 SDValue NewOp1 =
12266 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12267 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12268 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12269 DAG.getValueType(MVT::i32));
12270 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12271 }
12272
12273 break;
12274 case ISD::ROTL:
12275 case ISD::ROTR:
12276 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12277 "Unexpected custom legalisation");
12278 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12279 Subtarget.hasVendorXTHeadBb()) &&
12280 "Unexpected custom legalization");
12281 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12282 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12283 return;
12284 Results.push_back(customLegalizeToWOp(N, DAG));
12285 break;
12286 case ISD::CTTZ:
12288 case ISD::CTLZ:
12289 case ISD::CTLZ_ZERO_UNDEF: {
12290 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12291 "Unexpected custom legalisation");
12292
12293 SDValue NewOp0 =
12294 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12295 bool IsCTZ =
12296 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12297 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12298 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12299 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12300 return;
12301 }
12302 case ISD::SDIV:
12303 case ISD::UDIV:
12304 case ISD::UREM: {
12305 MVT VT = N->getSimpleValueType(0);
12306 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12307 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12308 "Unexpected custom legalisation");
12309 // Don't promote division/remainder by constant since we should expand those
12310 // to multiply by magic constant.
12312 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12313 !isIntDivCheap(N->getValueType(0), Attr))
12314 return;
12315
12316 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12317 // the upper 32 bits. For other types we need to sign or zero extend
12318 // based on the opcode.
12319 unsigned ExtOpc = ISD::ANY_EXTEND;
12320 if (VT != MVT::i32)
12321 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12323
12324 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12325 break;
12326 }
12327 case ISD::SADDO: {
12328 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12329 "Unexpected custom legalisation");
12330
12331 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12332 // use the default legalization.
12333 if (!isa<ConstantSDNode>(N->getOperand(1)))
12334 return;
12335
12336 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12337 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12338 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12339 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12340 DAG.getValueType(MVT::i32));
12341
12342 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12343
12344 // For an addition, the result should be less than one of the operands (LHS)
12345 // if and only if the other operand (RHS) is negative, otherwise there will
12346 // be overflow.
12347 // For a subtraction, the result should be less than one of the operands
12348 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12349 // otherwise there will be overflow.
12350 EVT OType = N->getValueType(1);
12351 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12352 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12353
12354 SDValue Overflow =
12355 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12356 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12357 Results.push_back(Overflow);
12358 return;
12359 }
12360 case ISD::UADDO:
12361 case ISD::USUBO: {
12362 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12363 "Unexpected custom legalisation");
12364 bool IsAdd = N->getOpcode() == ISD::UADDO;
12365 // Create an ADDW or SUBW.
12366 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12367 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12368 SDValue Res =
12369 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12370 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12371 DAG.getValueType(MVT::i32));
12372
12373 SDValue Overflow;
12374 if (IsAdd && isOneConstant(RHS)) {
12375 // Special case uaddo X, 1 overflowed if the addition result is 0.
12376 // The general case (X + C) < C is not necessarily beneficial. Although we
12377 // reduce the live range of X, we may introduce the materialization of
12378 // constant C, especially when the setcc result is used by branch. We have
12379 // no compare with constant and branch instructions.
12380 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12381 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12382 } else if (IsAdd && isAllOnesConstant(RHS)) {
12383 // Special case uaddo X, -1 overflowed if X != 0.
12384 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12385 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12386 } else {
12387 // Sign extend the LHS and perform an unsigned compare with the ADDW
12388 // result. Since the inputs are sign extended from i32, this is equivalent
12389 // to comparing the lower 32 bits.
12390 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12391 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12392 IsAdd ? ISD::SETULT : ISD::SETUGT);
12393 }
12394
12395 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12396 Results.push_back(Overflow);
12397 return;
12398 }
12399 case ISD::UADDSAT:
12400 case ISD::USUBSAT: {
12401 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12402 "Unexpected custom legalisation");
12403 if (Subtarget.hasStdExtZbb()) {
12404 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12405 // sign extend allows overflow of the lower 32 bits to be detected on
12406 // the promoted size.
12407 SDValue LHS =
12408 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12409 SDValue RHS =
12410 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12411 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12412 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12413 return;
12414 }
12415
12416 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12417 // promotion for UADDO/USUBO.
12418 Results.push_back(expandAddSubSat(N, DAG));
12419 return;
12420 }
12421 case ISD::SADDSAT:
12422 case ISD::SSUBSAT: {
12423 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12424 "Unexpected custom legalisation");
12425 Results.push_back(expandAddSubSat(N, DAG));
12426 return;
12427 }
12428 case ISD::ABS: {
12429 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12430 "Unexpected custom legalisation");
12431
12432 if (Subtarget.hasStdExtZbb()) {
12433 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12434 // This allows us to remember that the result is sign extended. Expanding
12435 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12436 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12437 N->getOperand(0));
12438 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12439 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12440 return;
12441 }
12442
12443 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12444 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12445
12446 // Freeze the source so we can increase it's use count.
12447 Src = DAG.getFreeze(Src);
12448
12449 // Copy sign bit to all bits using the sraiw pattern.
12450 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12451 DAG.getValueType(MVT::i32));
12452 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12453 DAG.getConstant(31, DL, MVT::i64));
12454
12455 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12456 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12457
12458 // NOTE: The result is only required to be anyextended, but sext is
12459 // consistent with type legalization of sub.
12460 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12461 DAG.getValueType(MVT::i32));
12462 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12463 return;
12464 }
12465 case ISD::BITCAST: {
12466 EVT VT = N->getValueType(0);
12467 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12468 SDValue Op0 = N->getOperand(0);
12469 EVT Op0VT = Op0.getValueType();
12470 MVT XLenVT = Subtarget.getXLenVT();
12471 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12472 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12473 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12474 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12475 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12476 Subtarget.hasStdExtZfbfmin()) {
12477 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12478 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12479 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12480 Subtarget.hasStdExtFOrZfinx()) {
12481 SDValue FPConv =
12482 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12483 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12484 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12485 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12486 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12487 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12488 NewReg.getValue(0), NewReg.getValue(1));
12489 Results.push_back(RetReg);
12490 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12491 isTypeLegal(Op0VT)) {
12492 // Custom-legalize bitcasts from fixed-length vector types to illegal
12493 // scalar types in order to improve codegen. Bitcast the vector to a
12494 // one-element vector type whose element type is the same as the result
12495 // type, and extract the first element.
12496 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12497 if (isTypeLegal(BVT)) {
12498 SDValue BVec = DAG.getBitcast(BVT, Op0);
12499 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12500 DAG.getVectorIdxConstant(0, DL)));
12501 }
12502 }
12503 break;
12504 }
12505 case RISCVISD::BREV8:
12506 case RISCVISD::ORC_B: {
12507 MVT VT = N->getSimpleValueType(0);
12508 MVT XLenVT = Subtarget.getXLenVT();
12509 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12510 "Unexpected custom legalisation");
12511 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12512 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12513 "Unexpected extension");
12514 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12515 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12516 // ReplaceNodeResults requires we maintain the same type for the return
12517 // value.
12518 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12519 break;
12520 }
12522 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12523 // type is illegal (currently only vXi64 RV32).
12524 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12525 // transferred to the destination register. We issue two of these from the
12526 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12527 // first element.
12528 SDValue Vec = N->getOperand(0);
12529 SDValue Idx = N->getOperand(1);
12530
12531 // The vector type hasn't been legalized yet so we can't issue target
12532 // specific nodes if it needs legalization.
12533 // FIXME: We would manually legalize if it's important.
12534 if (!isTypeLegal(Vec.getValueType()))
12535 return;
12536
12537 MVT VecVT = Vec.getSimpleValueType();
12538
12539 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12540 VecVT.getVectorElementType() == MVT::i64 &&
12541 "Unexpected EXTRACT_VECTOR_ELT legalization");
12542
12543 // If this is a fixed vector, we need to convert it to a scalable vector.
12544 MVT ContainerVT = VecVT;
12545 if (VecVT.isFixedLengthVector()) {
12546 ContainerVT = getContainerForFixedLengthVector(VecVT);
12547 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12548 }
12549
12550 MVT XLenVT = Subtarget.getXLenVT();
12551
12552 // Use a VL of 1 to avoid processing more elements than we need.
12553 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12554
12555 // Unless the index is known to be 0, we must slide the vector down to get
12556 // the desired element into index 0.
12557 if (!isNullConstant(Idx)) {
12558 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12559 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12560 }
12561
12562 // Extract the lower XLEN bits of the correct vector element.
12563 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12564
12565 // To extract the upper XLEN bits of the vector element, shift the first
12566 // element right by 32 bits and re-extract the lower XLEN bits.
12567 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12568 DAG.getUNDEF(ContainerVT),
12569 DAG.getConstant(32, DL, XLenVT), VL);
12570 SDValue LShr32 =
12571 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12572 DAG.getUNDEF(ContainerVT), Mask, VL);
12573
12574 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12575
12576 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12577 break;
12578 }
12580 unsigned IntNo = N->getConstantOperandVal(0);
12581 switch (IntNo) {
12582 default:
12584 "Don't know how to custom type legalize this intrinsic!");
12585 case Intrinsic::experimental_get_vector_length: {
12586 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12587 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12588 return;
12589 }
12590 case Intrinsic::experimental_cttz_elts: {
12591 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12592 Results.push_back(
12593 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12594 return;
12595 }
12596 case Intrinsic::riscv_orc_b:
12597 case Intrinsic::riscv_brev8:
12598 case Intrinsic::riscv_sha256sig0:
12599 case Intrinsic::riscv_sha256sig1:
12600 case Intrinsic::riscv_sha256sum0:
12601 case Intrinsic::riscv_sha256sum1:
12602 case Intrinsic::riscv_sm3p0:
12603 case Intrinsic::riscv_sm3p1: {
12604 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12605 return;
12606 unsigned Opc;
12607 switch (IntNo) {
12608 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12609 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12610 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12611 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12612 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12613 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12614 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12615 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12616 }
12617
12618 SDValue NewOp =
12619 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12620 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12621 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12622 return;
12623 }
12624 case Intrinsic::riscv_sm4ks:
12625 case Intrinsic::riscv_sm4ed: {
12626 unsigned Opc =
12627 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12628 SDValue NewOp0 =
12629 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12630 SDValue NewOp1 =
12631 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12632 SDValue Res =
12633 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12634 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12635 return;
12636 }
12637 case Intrinsic::riscv_mopr: {
12638 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12639 return;
12640 SDValue NewOp =
12641 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12642 SDValue Res = DAG.getNode(
12643 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12644 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12645 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12646 return;
12647 }
12648 case Intrinsic::riscv_moprr: {
12649 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12650 return;
12651 SDValue NewOp0 =
12652 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12653 SDValue NewOp1 =
12654 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12655 SDValue Res = DAG.getNode(
12656 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12657 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12658 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12659 return;
12660 }
12661 case Intrinsic::riscv_clmul: {
12662 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12663 return;
12664
12665 SDValue NewOp0 =
12666 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12667 SDValue NewOp1 =
12668 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12669 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12670 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12671 return;
12672 }
12673 case Intrinsic::riscv_clmulh:
12674 case Intrinsic::riscv_clmulr: {
12675 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12676 return;
12677
12678 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12679 // to the full 128-bit clmul result of multiplying two xlen values.
12680 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12681 // upper 32 bits.
12682 //
12683 // The alternative is to mask the inputs to 32 bits and use clmul, but
12684 // that requires two shifts to mask each input without zext.w.
12685 // FIXME: If the inputs are known zero extended or could be freely
12686 // zero extended, the mask form would be better.
12687 SDValue NewOp0 =
12688 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12689 SDValue NewOp1 =
12690 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12691 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12692 DAG.getConstant(32, DL, MVT::i64));
12693 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12694 DAG.getConstant(32, DL, MVT::i64));
12695 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12697 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12698 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12699 DAG.getConstant(32, DL, MVT::i64));
12700 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12701 return;
12702 }
12703 case Intrinsic::riscv_vmv_x_s: {
12704 EVT VT = N->getValueType(0);
12705 MVT XLenVT = Subtarget.getXLenVT();
12706 if (VT.bitsLT(XLenVT)) {
12707 // Simple case just extract using vmv.x.s and truncate.
12708 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12709 Subtarget.getXLenVT(), N->getOperand(1));
12710 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12711 return;
12712 }
12713
12714 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12715 "Unexpected custom legalization");
12716
12717 // We need to do the move in two steps.
12718 SDValue Vec = N->getOperand(1);
12719 MVT VecVT = Vec.getSimpleValueType();
12720
12721 // First extract the lower XLEN bits of the element.
12722 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12723
12724 // To extract the upper XLEN bits of the vector element, shift the first
12725 // element right by 32 bits and re-extract the lower XLEN bits.
12726 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12727
12728 SDValue ThirtyTwoV =
12729 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12730 DAG.getConstant(32, DL, XLenVT), VL);
12731 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12732 DAG.getUNDEF(VecVT), Mask, VL);
12733 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12734
12735 Results.push_back(
12736 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12737 break;
12738 }
12739 }
12740 break;
12741 }
12742 case ISD::VECREDUCE_ADD:
12743 case ISD::VECREDUCE_AND:
12744 case ISD::VECREDUCE_OR:
12745 case ISD::VECREDUCE_XOR:
12750 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12751 Results.push_back(V);
12752 break;
12753 case ISD::VP_REDUCE_ADD:
12754 case ISD::VP_REDUCE_AND:
12755 case ISD::VP_REDUCE_OR:
12756 case ISD::VP_REDUCE_XOR:
12757 case ISD::VP_REDUCE_SMAX:
12758 case ISD::VP_REDUCE_UMAX:
12759 case ISD::VP_REDUCE_SMIN:
12760 case ISD::VP_REDUCE_UMIN:
12761 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12762 Results.push_back(V);
12763 break;
12764 case ISD::GET_ROUNDING: {
12765 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12766 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12767 Results.push_back(Res.getValue(0));
12768 Results.push_back(Res.getValue(1));
12769 break;
12770 }
12771 }
12772}
12773
12774/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12775/// which corresponds to it.
12776static unsigned getVecReduceOpcode(unsigned Opc) {
12777 switch (Opc) {
12778 default:
12779 llvm_unreachable("Unhandled binary to transfrom reduction");
12780 case ISD::ADD:
12781 return ISD::VECREDUCE_ADD;
12782 case ISD::UMAX:
12783 return ISD::VECREDUCE_UMAX;
12784 case ISD::SMAX:
12785 return ISD::VECREDUCE_SMAX;
12786 case ISD::UMIN:
12787 return ISD::VECREDUCE_UMIN;
12788 case ISD::SMIN:
12789 return ISD::VECREDUCE_SMIN;
12790 case ISD::AND:
12791 return ISD::VECREDUCE_AND;
12792 case ISD::OR:
12793 return ISD::VECREDUCE_OR;
12794 case ISD::XOR:
12795 return ISD::VECREDUCE_XOR;
12796 case ISD::FADD:
12797 // Note: This is the associative form of the generic reduction opcode.
12798 return ISD::VECREDUCE_FADD;
12799 }
12800}
12801
12802/// Perform two related transforms whose purpose is to incrementally recognize
12803/// an explode_vector followed by scalar reduction as a vector reduction node.
12804/// This exists to recover from a deficiency in SLP which can't handle
12805/// forests with multiple roots sharing common nodes. In some cases, one
12806/// of the trees will be vectorized, and the other will remain (unprofitably)
12807/// scalarized.
12808static SDValue
12810 const RISCVSubtarget &Subtarget) {
12811
12812 // This transforms need to run before all integer types have been legalized
12813 // to i64 (so that the vector element type matches the add type), and while
12814 // it's safe to introduce odd sized vector types.
12816 return SDValue();
12817
12818 // Without V, this transform isn't useful. We could form the (illegal)
12819 // operations and let them be scalarized again, but there's really no point.
12820 if (!Subtarget.hasVInstructions())
12821 return SDValue();
12822
12823 const SDLoc DL(N);
12824 const EVT VT = N->getValueType(0);
12825 const unsigned Opc = N->getOpcode();
12826
12827 // For FADD, we only handle the case with reassociation allowed. We
12828 // could handle strict reduction order, but at the moment, there's no
12829 // known reason to, and the complexity isn't worth it.
12830 // TODO: Handle fminnum and fmaxnum here
12831 if (!VT.isInteger() &&
12832 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12833 return SDValue();
12834
12835 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12836 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12837 "Inconsistent mappings");
12838 SDValue LHS = N->getOperand(0);
12839 SDValue RHS = N->getOperand(1);
12840
12841 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12842 return SDValue();
12843
12844 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12845 std::swap(LHS, RHS);
12846
12847 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12848 !isa<ConstantSDNode>(RHS.getOperand(1)))
12849 return SDValue();
12850
12851 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12852 SDValue SrcVec = RHS.getOperand(0);
12853 EVT SrcVecVT = SrcVec.getValueType();
12854 assert(SrcVecVT.getVectorElementType() == VT);
12855 if (SrcVecVT.isScalableVector())
12856 return SDValue();
12857
12858 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12859 return SDValue();
12860
12861 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12862 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12863 // root of our reduction tree. TODO: We could extend this to any two
12864 // adjacent aligned constant indices if desired.
12865 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12866 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12867 uint64_t LHSIdx =
12868 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12869 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12870 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12871 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12872 DAG.getVectorIdxConstant(0, DL));
12873 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12874 }
12875 }
12876
12877 // Match (binop (reduce (extract_subvector V, 0),
12878 // (extract_vector_elt V, sizeof(SubVec))))
12879 // into a reduction of one more element from the original vector V.
12880 if (LHS.getOpcode() != ReduceOpc)
12881 return SDValue();
12882
12883 SDValue ReduceVec = LHS.getOperand(0);
12884 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12885 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12886 isNullConstant(ReduceVec.getOperand(1)) &&
12887 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12888 // For illegal types (e.g. 3xi32), most will be combined again into a
12889 // wider (hopefully legal) type. If this is a terminal state, we are
12890 // relying on type legalization here to produce something reasonable
12891 // and this lowering quality could probably be improved. (TODO)
12892 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12893 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12894 DAG.getVectorIdxConstant(0, DL));
12895 auto Flags = ReduceVec->getFlags();
12896 Flags.intersectWith(N->getFlags());
12897 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12898 }
12899
12900 return SDValue();
12901}
12902
12903
12904// Try to fold (<bop> x, (reduction.<bop> vec, start))
12906 const RISCVSubtarget &Subtarget) {
12907 auto BinOpToRVVReduce = [](unsigned Opc) {
12908 switch (Opc) {
12909 default:
12910 llvm_unreachable("Unhandled binary to transfrom reduction");
12911 case ISD::ADD:
12913 case ISD::UMAX:
12915 case ISD::SMAX:
12917 case ISD::UMIN:
12919 case ISD::SMIN:
12921 case ISD::AND:
12923 case ISD::OR:
12925 case ISD::XOR:
12927 case ISD::FADD:
12929 case ISD::FMAXNUM:
12931 case ISD::FMINNUM:
12933 }
12934 };
12935
12936 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12937 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12938 isNullConstant(V.getOperand(1)) &&
12939 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12940 };
12941
12942 unsigned Opc = N->getOpcode();
12943 unsigned ReduceIdx;
12944 if (IsReduction(N->getOperand(0), Opc))
12945 ReduceIdx = 0;
12946 else if (IsReduction(N->getOperand(1), Opc))
12947 ReduceIdx = 1;
12948 else
12949 return SDValue();
12950
12951 // Skip if FADD disallows reassociation but the combiner needs.
12952 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12953 return SDValue();
12954
12955 SDValue Extract = N->getOperand(ReduceIdx);
12956 SDValue Reduce = Extract.getOperand(0);
12957 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12958 return SDValue();
12959
12960 SDValue ScalarV = Reduce.getOperand(2);
12961 EVT ScalarVT = ScalarV.getValueType();
12962 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12963 ScalarV.getOperand(0)->isUndef() &&
12964 isNullConstant(ScalarV.getOperand(2)))
12965 ScalarV = ScalarV.getOperand(1);
12966
12967 // Make sure that ScalarV is a splat with VL=1.
12968 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12969 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12970 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12971 return SDValue();
12972
12973 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12974 return SDValue();
12975
12976 // Check the scalar of ScalarV is neutral element
12977 // TODO: Deal with value other than neutral element.
12978 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12979 0))
12980 return SDValue();
12981
12982 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12983 // FIXME: We might be able to improve this if operand 0 is undef.
12984 if (!isNonZeroAVL(Reduce.getOperand(5)))
12985 return SDValue();
12986
12987 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12988
12989 SDLoc DL(N);
12990 SDValue NewScalarV =
12991 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12992 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12993
12994 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12995 if (ScalarVT != ScalarV.getValueType())
12996 NewScalarV =
12997 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12998 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12999
13000 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13001 NewScalarV, Reduce.getOperand(3),
13002 Reduce.getOperand(4), Reduce.getOperand(5)};
13003 SDValue NewReduce =
13004 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13005 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13006 Extract.getOperand(1));
13007}
13008
13009// Optimize (add (shl x, c0), (shl y, c1)) ->
13010// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13012 const RISCVSubtarget &Subtarget) {
13013 // Perform this optimization only in the zba extension.
13014 if (!Subtarget.hasStdExtZba())
13015 return SDValue();
13016
13017 // Skip for vector types and larger types.
13018 EVT VT = N->getValueType(0);
13019 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13020 return SDValue();
13021
13022 // The two operand nodes must be SHL and have no other use.
13023 SDValue N0 = N->getOperand(0);
13024 SDValue N1 = N->getOperand(1);
13025 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13026 !N0->hasOneUse() || !N1->hasOneUse())
13027 return SDValue();
13028
13029 // Check c0 and c1.
13030 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13031 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13032 if (!N0C || !N1C)
13033 return SDValue();
13034 int64_t C0 = N0C->getSExtValue();
13035 int64_t C1 = N1C->getSExtValue();
13036 if (C0 <= 0 || C1 <= 0)
13037 return SDValue();
13038
13039 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13040 int64_t Bits = std::min(C0, C1);
13041 int64_t Diff = std::abs(C0 - C1);
13042 if (Diff != 1 && Diff != 2 && Diff != 3)
13043 return SDValue();
13044
13045 // Build nodes.
13046 SDLoc DL(N);
13047 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13048 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13049 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13050 DAG.getConstant(Diff, DL, VT), NS);
13051 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13052}
13053
13054// Combine a constant select operand into its use:
13055//
13056// (and (select cond, -1, c), x)
13057// -> (select cond, x, (and x, c)) [AllOnes=1]
13058// (or (select cond, 0, c), x)
13059// -> (select cond, x, (or x, c)) [AllOnes=0]
13060// (xor (select cond, 0, c), x)
13061// -> (select cond, x, (xor x, c)) [AllOnes=0]
13062// (add (select cond, 0, c), x)
13063// -> (select cond, x, (add x, c)) [AllOnes=0]
13064// (sub x, (select cond, 0, c))
13065// -> (select cond, x, (sub x, c)) [AllOnes=0]
13067 SelectionDAG &DAG, bool AllOnes,
13068 const RISCVSubtarget &Subtarget) {
13069 EVT VT = N->getValueType(0);
13070
13071 // Skip vectors.
13072 if (VT.isVector())
13073 return SDValue();
13074
13075 if (!Subtarget.hasConditionalMoveFusion()) {
13076 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13077 if ((!Subtarget.hasStdExtZicond() &&
13078 !Subtarget.hasVendorXVentanaCondOps()) ||
13079 N->getOpcode() != ISD::AND)
13080 return SDValue();
13081
13082 // Maybe harmful when condition code has multiple use.
13083 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13084 return SDValue();
13085
13086 // Maybe harmful when VT is wider than XLen.
13087 if (VT.getSizeInBits() > Subtarget.getXLen())
13088 return SDValue();
13089 }
13090
13091 if ((Slct.getOpcode() != ISD::SELECT &&
13092 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13093 !Slct.hasOneUse())
13094 return SDValue();
13095
13096 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13098 };
13099
13100 bool SwapSelectOps;
13101 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13102 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13103 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13104 SDValue NonConstantVal;
13105 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13106 SwapSelectOps = false;
13107 NonConstantVal = FalseVal;
13108 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13109 SwapSelectOps = true;
13110 NonConstantVal = TrueVal;
13111 } else
13112 return SDValue();
13113
13114 // Slct is now know to be the desired identity constant when CC is true.
13115 TrueVal = OtherOp;
13116 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13117 // Unless SwapSelectOps says the condition should be false.
13118 if (SwapSelectOps)
13119 std::swap(TrueVal, FalseVal);
13120
13121 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13122 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13123 {Slct.getOperand(0), Slct.getOperand(1),
13124 Slct.getOperand(2), TrueVal, FalseVal});
13125
13126 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13127 {Slct.getOperand(0), TrueVal, FalseVal});
13128}
13129
13130// Attempt combineSelectAndUse on each operand of a commutative operator N.
13132 bool AllOnes,
13133 const RISCVSubtarget &Subtarget) {
13134 SDValue N0 = N->getOperand(0);
13135 SDValue N1 = N->getOperand(1);
13136 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13137 return Result;
13138 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13139 return Result;
13140 return SDValue();
13141}
13142
13143// Transform (add (mul x, c0), c1) ->
13144// (add (mul (add x, c1/c0), c0), c1%c0).
13145// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13146// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13147// to an infinite loop in DAGCombine if transformed.
13148// Or transform (add (mul x, c0), c1) ->
13149// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13150// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13151// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13152// lead to an infinite loop in DAGCombine if transformed.
13153// Or transform (add (mul x, c0), c1) ->
13154// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13155// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13156// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13157// lead to an infinite loop in DAGCombine if transformed.
13158// Or transform (add (mul x, c0), c1) ->
13159// (mul (add x, c1/c0), c0).
13160// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13162 const RISCVSubtarget &Subtarget) {
13163 // Skip for vector types and larger types.
13164 EVT VT = N->getValueType(0);
13165 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13166 return SDValue();
13167 // The first operand node must be a MUL and has no other use.
13168 SDValue N0 = N->getOperand(0);
13169 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13170 return SDValue();
13171 // Check if c0 and c1 match above conditions.
13172 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13173 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13174 if (!N0C || !N1C)
13175 return SDValue();
13176 // If N0C has multiple uses it's possible one of the cases in
13177 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13178 // in an infinite loop.
13179 if (!N0C->hasOneUse())
13180 return SDValue();
13181 int64_t C0 = N0C->getSExtValue();
13182 int64_t C1 = N1C->getSExtValue();
13183 int64_t CA, CB;
13184 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13185 return SDValue();
13186 // Search for proper CA (non-zero) and CB that both are simm12.
13187 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13188 !isInt<12>(C0 * (C1 / C0))) {
13189 CA = C1 / C0;
13190 CB = C1 % C0;
13191 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13192 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13193 CA = C1 / C0 + 1;
13194 CB = C1 % C0 - C0;
13195 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13196 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13197 CA = C1 / C0 - 1;
13198 CB = C1 % C0 + C0;
13199 } else
13200 return SDValue();
13201 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13202 SDLoc DL(N);
13203 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13204 DAG.getConstant(CA, DL, VT));
13205 SDValue New1 =
13206 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
13207 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
13208}
13209
13210// add (zext, zext) -> zext (add (zext, zext))
13211// sub (zext, zext) -> sext (sub (zext, zext))
13212// mul (zext, zext) -> zext (mul (zext, zext))
13213// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13214// udiv (zext, zext) -> zext (udiv (zext, zext))
13215// srem (zext, zext) -> zext (srem (zext, zext))
13216// urem (zext, zext) -> zext (urem (zext, zext))
13217//
13218// where the sum of the extend widths match, and the the range of the bin op
13219// fits inside the width of the narrower bin op. (For profitability on rvv, we
13220// use a power of two for both inner and outer extend.)
13222
13223 EVT VT = N->getValueType(0);
13224 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13225 return SDValue();
13226
13227 SDValue N0 = N->getOperand(0);
13228 SDValue N1 = N->getOperand(1);
13230 return SDValue();
13231 if (!N0.hasOneUse() || !N1.hasOneUse())
13232 return SDValue();
13233
13234 SDValue Src0 = N0.getOperand(0);
13235 SDValue Src1 = N1.getOperand(0);
13236 EVT SrcVT = Src0.getValueType();
13237 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13238 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13239 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13240 return SDValue();
13241
13242 LLVMContext &C = *DAG.getContext();
13244 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13245
13246 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13247 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13248
13249 // Src0 and Src1 are zero extended, so they're always positive if signed.
13250 //
13251 // sub can produce a negative from two positive operands, so it needs sign
13252 // extended. Other nodes produce a positive from two positive operands, so
13253 // zero extend instead.
13254 unsigned OuterExtend =
13255 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13256
13257 return DAG.getNode(
13258 OuterExtend, SDLoc(N), VT,
13259 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13260}
13261
13262// Try to turn (add (xor bool, 1) -1) into (neg bool).
13264 SDValue N0 = N->getOperand(0);
13265 SDValue N1 = N->getOperand(1);
13266 EVT VT = N->getValueType(0);
13267 SDLoc DL(N);
13268
13269 // RHS should be -1.
13270 if (!isAllOnesConstant(N1))
13271 return SDValue();
13272
13273 // Look for (xor X, 1).
13274 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13275 return SDValue();
13276
13277 // First xor input should be 0 or 1.
13279 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13280 return SDValue();
13281
13282 // Emit a negate of the setcc.
13283 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13284 N0.getOperand(0));
13285}
13286
13289 const RISCVSubtarget &Subtarget) {
13290 SelectionDAG &DAG = DCI.DAG;
13291 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13292 return V;
13293 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13294 return V;
13295 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13296 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13297 return V;
13298 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13299 return V;
13300 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13301 return V;
13302 if (SDValue V = combineBinOpOfZExt(N, DAG))
13303 return V;
13304
13305 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13306 // (select lhs, rhs, cc, x, (add x, y))
13307 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13308}
13309
13310// Try to turn a sub boolean RHS and constant LHS into an addi.
13312 SDValue N0 = N->getOperand(0);
13313 SDValue N1 = N->getOperand(1);
13314 EVT VT = N->getValueType(0);
13315 SDLoc DL(N);
13316
13317 // Require a constant LHS.
13318 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13319 if (!N0C)
13320 return SDValue();
13321
13322 // All our optimizations involve subtracting 1 from the immediate and forming
13323 // an ADDI. Make sure the new immediate is valid for an ADDI.
13324 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13325 if (!ImmValMinus1.isSignedIntN(12))
13326 return SDValue();
13327
13328 SDValue NewLHS;
13329 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13330 // (sub constant, (setcc x, y, eq/neq)) ->
13331 // (add (setcc x, y, neq/eq), constant - 1)
13332 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13333 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13334 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13335 return SDValue();
13336 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13337 NewLHS =
13338 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13339 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13340 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13341 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13342 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13343 NewLHS = N1.getOperand(0);
13344 } else
13345 return SDValue();
13346
13347 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13348 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13349}
13350
13351// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13352// non-zero. Replace with orc.b.
13354 const RISCVSubtarget &Subtarget) {
13355 if (!Subtarget.hasStdExtZbb())
13356 return SDValue();
13357
13358 EVT VT = N->getValueType(0);
13359
13360 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13361 return SDValue();
13362
13363 SDValue N0 = N->getOperand(0);
13364 SDValue N1 = N->getOperand(1);
13365
13366 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13367 return SDValue();
13368
13369 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13370 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13371 return SDValue();
13372
13373 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13374 if (!DAG.MaskedValueIsZero(N1, Mask))
13375 return SDValue();
13376
13377 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13378}
13379
13381 const RISCVSubtarget &Subtarget) {
13382 if (SDValue V = combineSubOfBoolean(N, DAG))
13383 return V;
13384
13385 EVT VT = N->getValueType(0);
13386 SDValue N0 = N->getOperand(0);
13387 SDValue N1 = N->getOperand(1);
13388 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13389 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13390 isNullConstant(N1.getOperand(1))) {
13391 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13392 if (CCVal == ISD::SETLT) {
13393 SDLoc DL(N);
13394 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13395 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13396 DAG.getConstant(ShAmt, DL, VT));
13397 }
13398 }
13399
13400 if (SDValue V = combineBinOpOfZExt(N, DAG))
13401 return V;
13402 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13403 return V;
13404
13405 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13406 // (select lhs, rhs, cc, x, (sub x, y))
13407 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13408}
13409
13410// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13411// Legalizing setcc can introduce xors like this. Doing this transform reduces
13412// the number of xors and may allow the xor to fold into a branch condition.
13414 SDValue N0 = N->getOperand(0);
13415 SDValue N1 = N->getOperand(1);
13416 bool IsAnd = N->getOpcode() == ISD::AND;
13417
13418 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13419 return SDValue();
13420
13421 if (!N0.hasOneUse() || !N1.hasOneUse())
13422 return SDValue();
13423
13424 SDValue N01 = N0.getOperand(1);
13425 SDValue N11 = N1.getOperand(1);
13426
13427 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13428 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13429 // operation is And, allow one of the Xors to use -1.
13430 if (isOneConstant(N01)) {
13431 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13432 return SDValue();
13433 } else if (isOneConstant(N11)) {
13434 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13435 if (!(IsAnd && isAllOnesConstant(N01)))
13436 return SDValue();
13437 } else
13438 return SDValue();
13439
13440 EVT VT = N->getValueType(0);
13441
13442 SDValue N00 = N0.getOperand(0);
13443 SDValue N10 = N1.getOperand(0);
13444
13445 // The LHS of the xors needs to be 0/1.
13447 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13448 return SDValue();
13449
13450 // Invert the opcode and insert a new xor.
13451 SDLoc DL(N);
13452 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13453 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13454 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13455}
13456
13457// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13458// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13459// value to an unsigned value. This will be lowered to vmax and series of
13460// vnclipu instructions later. This can be extended to other truncated types
13461// other than i8 by replacing 256 and 255 with the equivalent constants for the
13462// type.
13464 EVT VT = N->getValueType(0);
13465 SDValue N0 = N->getOperand(0);
13466 EVT SrcVT = N0.getValueType();
13467
13468 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13469 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13470 return SDValue();
13471
13472 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13473 return SDValue();
13474
13475 SDValue Cond = N0.getOperand(0);
13476 SDValue True = N0.getOperand(1);
13477 SDValue False = N0.getOperand(2);
13478
13479 if (Cond.getOpcode() != ISD::SETCC)
13480 return SDValue();
13481
13482 // FIXME: Support the version of this pattern with the select operands
13483 // swapped.
13484 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13485 if (CCVal != ISD::SETULT)
13486 return SDValue();
13487
13488 SDValue CondLHS = Cond.getOperand(0);
13489 SDValue CondRHS = Cond.getOperand(1);
13490
13491 if (CondLHS != True)
13492 return SDValue();
13493
13494 unsigned ScalarBits = VT.getScalarSizeInBits();
13495
13496 // FIXME: Support other constants.
13497 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13498 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13499 return SDValue();
13500
13501 if (False.getOpcode() != ISD::SIGN_EXTEND)
13502 return SDValue();
13503
13504 False = False.getOperand(0);
13505
13506 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13507 return SDValue();
13508
13509 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13510 if (!FalseRHSC || !FalseRHSC->isZero())
13511 return SDValue();
13512
13513 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13514 if (CCVal2 != ISD::SETGT)
13515 return SDValue();
13516
13517 // Emit the signed to unsigned saturation pattern.
13518 SDLoc DL(N);
13519 SDValue Max =
13520 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13521 SDValue Min =
13522 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13523 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13524 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13525}
13526
13528 const RISCVSubtarget &Subtarget) {
13529 SDValue N0 = N->getOperand(0);
13530 EVT VT = N->getValueType(0);
13531
13532 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13533 // extending X. This is safe since we only need the LSB after the shift and
13534 // shift amounts larger than 31 would produce poison. If we wait until
13535 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13536 // to use a BEXT instruction.
13537 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13538 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13539 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13540 SDLoc DL(N0);
13541 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13542 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13543 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13544 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13545 }
13546
13547 return combineTruncSelectToSMaxUSat(N, DAG);
13548}
13549
13550// Combines two comparison operation and logic operation to one selection
13551// operation(min, max) and logic operation. Returns new constructed Node if
13552// conditions for optimization are satisfied.
13555 const RISCVSubtarget &Subtarget) {
13556 SelectionDAG &DAG = DCI.DAG;
13557
13558 SDValue N0 = N->getOperand(0);
13559 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13560 // extending X. This is safe since we only need the LSB after the shift and
13561 // shift amounts larger than 31 would produce poison. If we wait until
13562 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13563 // to use a BEXT instruction.
13564 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13565 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13566 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13567 N0.hasOneUse()) {
13568 SDLoc DL(N);
13569 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13570 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13571 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13572 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13573 DAG.getConstant(1, DL, MVT::i64));
13574 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13575 }
13576
13577 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13578 return V;
13579 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13580 return V;
13581
13582 if (DCI.isAfterLegalizeDAG())
13583 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13584 return V;
13585
13586 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13587 // (select lhs, rhs, cc, x, (and x, y))
13588 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13589}
13590
13591// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13592// FIXME: Generalize to other binary operators with same operand.
13594 SelectionDAG &DAG) {
13595 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13596
13597 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13599 !N0.hasOneUse() || !N1.hasOneUse())
13600 return SDValue();
13601
13602 // Should have the same condition.
13603 SDValue Cond = N0.getOperand(1);
13604 if (Cond != N1.getOperand(1))
13605 return SDValue();
13606
13607 SDValue TrueV = N0.getOperand(0);
13608 SDValue FalseV = N1.getOperand(0);
13609
13610 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13611 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13612 !isOneConstant(TrueV.getOperand(1)) ||
13613 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13614 return SDValue();
13615
13616 EVT VT = N->getValueType(0);
13617 SDLoc DL(N);
13618
13619 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13620 Cond);
13621 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13622 Cond);
13623 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13624 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13625}
13626
13628 const RISCVSubtarget &Subtarget) {
13629 SelectionDAG &DAG = DCI.DAG;
13630
13631 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13632 return V;
13633 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13634 return V;
13635
13636 if (DCI.isAfterLegalizeDAG())
13637 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13638 return V;
13639
13640 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13641 // We may be able to pull a common operation out of the true and false value.
13642 SDValue N0 = N->getOperand(0);
13643 SDValue N1 = N->getOperand(1);
13644 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13645 return V;
13646 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13647 return V;
13648
13649 // fold (or (select cond, 0, y), x) ->
13650 // (select cond, x, (or x, y))
13651 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13652}
13653
13655 const RISCVSubtarget &Subtarget) {
13656 SDValue N0 = N->getOperand(0);
13657 SDValue N1 = N->getOperand(1);
13658
13659 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13660 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13661 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13662 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13663 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13664 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13665 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13666 SDLoc DL(N);
13667 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13668 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13669 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13670 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13671 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13672 }
13673
13674 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13675 // NOTE: Assumes ROL being legal means ROLW is legal.
13676 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13677 if (N0.getOpcode() == RISCVISD::SLLW &&
13679 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13680 SDLoc DL(N);
13681 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13682 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13683 }
13684
13685 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13686 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13687 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13688 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13689 if (ConstN00 && CC == ISD::SETLT) {
13690 EVT VT = N0.getValueType();
13691 SDLoc DL(N0);
13692 const APInt &Imm = ConstN00->getAPIntValue();
13693 if ((Imm + 1).isSignedIntN(12))
13694 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13695 DAG.getConstant(Imm + 1, DL, VT), CC);
13696 }
13697 }
13698
13699 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13700 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13701 // would have been promoted to i32, but the setcc would have i64 result.
13702 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13703 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13704 SDValue N00 = N0.getOperand(0);
13705 SDLoc DL(N);
13706 SDValue LHS = N00.getOperand(0);
13707 SDValue RHS = N00.getOperand(1);
13708 SDValue CC = N00.getOperand(2);
13709 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13710 LHS.getValueType());
13711 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13712 LHS, RHS, NotCC);
13713 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13714 }
13715
13716 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13717 return V;
13718 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13719 return V;
13720
13721 // fold (xor (select cond, 0, y), x) ->
13722 // (select cond, x, (xor x, y))
13723 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13724}
13725
13726// Try to expand a scalar multiply to a faster sequence.
13729 const RISCVSubtarget &Subtarget) {
13730
13731 EVT VT = N->getValueType(0);
13732
13733 // LI + MUL is usually smaller than the alternative sequence.
13735 return SDValue();
13736
13737 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13738 return SDValue();
13739
13740 if (VT != Subtarget.getXLenVT())
13741 return SDValue();
13742
13743 const bool HasShlAdd =
13744 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
13745
13746 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13747 if (!CNode)
13748 return SDValue();
13749 uint64_t MulAmt = CNode->getZExtValue();
13750
13751 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13752 // We're adding additional uses of X here, and in principle, we should be freezing
13753 // X before doing so. However, adding freeze here causes real regressions, and no
13754 // other target properly freezes X in these cases either.
13755 SDValue X = N->getOperand(0);
13756
13757 if (HasShlAdd) {
13758 for (uint64_t Divisor : {3, 5, 9}) {
13759 if (MulAmt % Divisor != 0)
13760 continue;
13761 uint64_t MulAmt2 = MulAmt / Divisor;
13762 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13763 if (isPowerOf2_64(MulAmt2)) {
13764 SDLoc DL(N);
13765 SDValue X = N->getOperand(0);
13766 // Put the shift first if we can fold a zext into the
13767 // shift forming a slli.uw.
13768 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13769 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13770 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13771 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13772 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13773 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
13774 Shl);
13775 }
13776 // Otherwise, put rhe shl second so that it can fold with following
13777 // instructions (e.g. sext or add).
13778 SDValue Mul359 =
13779 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13780 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13781 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13782 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13783 }
13784
13785 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13786 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13787 SDLoc DL(N);
13788 SDValue Mul359 =
13789 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13790 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13791 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13792 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13793 Mul359);
13794 }
13795 }
13796
13797 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13798 // shXadd. First check if this a sum of two power of 2s because that's
13799 // easy. Then count how many zeros are up to the first bit.
13800 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13801 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13802 if (ScaleShift >= 1 && ScaleShift < 4) {
13803 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13804 SDLoc DL(N);
13805 SDValue Shift1 =
13806 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13807 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13808 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13809 }
13810 }
13811
13812 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13813 // This is the two instruction form, there are also three instruction
13814 // variants we could implement. e.g.
13815 // (2^(1,2,3) * 3,5,9 + 1) << C2
13816 // 2^(C1>3) * 3,5,9 +/- 1
13817 for (uint64_t Divisor : {3, 5, 9}) {
13818 uint64_t C = MulAmt - 1;
13819 if (C <= Divisor)
13820 continue;
13821 unsigned TZ = llvm::countr_zero(C);
13822 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13823 SDLoc DL(N);
13824 SDValue Mul359 =
13825 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13826 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13827 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13828 DAG.getConstant(TZ, DL, VT), X);
13829 }
13830 }
13831
13832 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13833 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13834 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13835 if (ScaleShift >= 1 && ScaleShift < 4) {
13836 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13837 SDLoc DL(N);
13838 SDValue Shift1 =
13839 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13840 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13841 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13842 DAG.getConstant(ScaleShift, DL, VT), X));
13843 }
13844 }
13845
13846 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13847 for (uint64_t Offset : {3, 5, 9}) {
13848 if (isPowerOf2_64(MulAmt + Offset)) {
13849 SDLoc DL(N);
13850 SDValue Shift1 =
13851 DAG.getNode(ISD::SHL, DL, VT, X,
13852 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13853 SDValue Mul359 =
13854 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13855 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
13856 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13857 }
13858 }
13859 }
13860
13861 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
13862 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
13863 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
13864 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
13865 SDLoc DL(N);
13866 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13867 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
13868 SDValue Shift2 =
13869 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13870 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
13871 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
13872 }
13873
13874 return SDValue();
13875}
13876
13877// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
13878// (bitcast (sra (v2Xi16 (bitcast X)), 15))
13879// Same for other equivalent types with other equivalent constants.
13881 EVT VT = N->getValueType(0);
13882 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13883
13884 // Do this for legal vectors unless they are i1 or i8 vectors.
13885 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
13886 return SDValue();
13887
13888 if (N->getOperand(0).getOpcode() != ISD::AND ||
13889 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
13890 return SDValue();
13891
13892 SDValue And = N->getOperand(0);
13893 SDValue Srl = And.getOperand(0);
13894
13895 APInt V1, V2, V3;
13896 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
13897 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
13899 return SDValue();
13900
13901 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
13902 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
13903 V3 != (HalfSize - 1))
13904 return SDValue();
13905
13906 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
13907 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
13908 VT.getVectorElementCount() * 2);
13909 SDLoc DL(N);
13910 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
13911 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
13912 DAG.getConstant(HalfSize - 1, DL, HalfVT));
13913 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
13914}
13915
13918 const RISCVSubtarget &Subtarget) {
13919 EVT VT = N->getValueType(0);
13920 if (!VT.isVector())
13921 return expandMul(N, DAG, DCI, Subtarget);
13922
13923 SDLoc DL(N);
13924 SDValue N0 = N->getOperand(0);
13925 SDValue N1 = N->getOperand(1);
13926 SDValue MulOper;
13927 unsigned AddSubOpc;
13928
13929 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13930 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13931 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13932 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13933 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13934 AddSubOpc = V->getOpcode();
13935 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13936 SDValue Opnd = V->getOperand(1);
13937 MulOper = V->getOperand(0);
13938 if (AddSubOpc == ISD::SUB)
13939 std::swap(Opnd, MulOper);
13940 if (isOneOrOneSplat(Opnd))
13941 return true;
13942 }
13943 return false;
13944 };
13945
13946 if (IsAddSubWith1(N0)) {
13947 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13948 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13949 }
13950
13951 if (IsAddSubWith1(N1)) {
13952 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13953 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13954 }
13955
13956 if (SDValue V = combineBinOpOfZExt(N, DAG))
13957 return V;
13958
13960 return V;
13961
13962 return SDValue();
13963}
13964
13965/// According to the property that indexed load/store instructions zero-extend
13966/// their indices, try to narrow the type of index operand.
13967static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13968 if (isIndexTypeSigned(IndexType))
13969 return false;
13970
13971 if (!N->hasOneUse())
13972 return false;
13973
13974 EVT VT = N.getValueType();
13975 SDLoc DL(N);
13976
13977 // In general, what we're doing here is seeing if we can sink a truncate to
13978 // a smaller element type into the expression tree building our index.
13979 // TODO: We can generalize this and handle a bunch more cases if useful.
13980
13981 // Narrow a buildvector to the narrowest element type. This requires less
13982 // work and less register pressure at high LMUL, and creates smaller constants
13983 // which may be cheaper to materialize.
13984 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13985 KnownBits Known = DAG.computeKnownBits(N);
13986 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13987 LLVMContext &C = *DAG.getContext();
13988 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13989 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13990 N = DAG.getNode(ISD::TRUNCATE, DL,
13991 VT.changeVectorElementType(ResultVT), N);
13992 return true;
13993 }
13994 }
13995
13996 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13997 if (N.getOpcode() != ISD::SHL)
13998 return false;
13999
14000 SDValue N0 = N.getOperand(0);
14001 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14003 return false;
14004 if (!N0->hasOneUse())
14005 return false;
14006
14007 APInt ShAmt;
14008 SDValue N1 = N.getOperand(1);
14009 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14010 return false;
14011
14012 SDValue Src = N0.getOperand(0);
14013 EVT SrcVT = Src.getValueType();
14014 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14015 unsigned ShAmtV = ShAmt.getZExtValue();
14016 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14017 NewElen = std::max(NewElen, 8U);
14018
14019 // Skip if NewElen is not narrower than the original extended type.
14020 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14021 return false;
14022
14023 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14024 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14025
14026 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14027 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14028 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14029 return true;
14030}
14031
14032// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14033// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14034// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14035// can become a sext.w instead of a shift pair.
14037 const RISCVSubtarget &Subtarget) {
14038 SDValue N0 = N->getOperand(0);
14039 SDValue N1 = N->getOperand(1);
14040 EVT VT = N->getValueType(0);
14041 EVT OpVT = N0.getValueType();
14042
14043 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14044 return SDValue();
14045
14046 // RHS needs to be a constant.
14047 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14048 if (!N1C)
14049 return SDValue();
14050
14051 // LHS needs to be (and X, 0xffffffff).
14052 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14053 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14054 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14055 return SDValue();
14056
14057 // Looking for an equality compare.
14058 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14059 if (!isIntEqualitySetCC(Cond))
14060 return SDValue();
14061
14062 // Don't do this if the sign bit is provably zero, it will be turned back into
14063 // an AND.
14064 APInt SignMask = APInt::getOneBitSet(64, 31);
14065 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14066 return SDValue();
14067
14068 const APInt &C1 = N1C->getAPIntValue();
14069
14070 SDLoc dl(N);
14071 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14072 // to be equal.
14073 if (C1.getActiveBits() > 32)
14074 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14075
14076 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14077 N0.getOperand(0), DAG.getValueType(MVT::i32));
14078 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14079 dl, OpVT), Cond);
14080}
14081
14082static SDValue
14084 const RISCVSubtarget &Subtarget) {
14085 SDValue Src = N->getOperand(0);
14086 EVT VT = N->getValueType(0);
14087
14088 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14089 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14090 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
14091 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14092 Src.getOperand(0));
14093
14094 return SDValue();
14095}
14096
14097namespace {
14098// Forward declaration of the structure holding the necessary information to
14099// apply a combine.
14100struct CombineResult;
14101
14102enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14103/// Helper class for folding sign/zero extensions.
14104/// In particular, this class is used for the following combines:
14105/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14106/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14107/// mul | mul_vl -> vwmul(u) | vwmul_su
14108/// shl | shl_vl -> vwsll
14109/// fadd -> vfwadd | vfwadd_w
14110/// fsub -> vfwsub | vfwsub_w
14111/// fmul -> vfwmul
14112/// An object of this class represents an operand of the operation we want to
14113/// combine.
14114/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14115/// NodeExtensionHelper for `a` and one for `b`.
14116///
14117/// This class abstracts away how the extension is materialized and
14118/// how its number of users affect the combines.
14119///
14120/// In particular:
14121/// - VWADD_W is conceptually == add(op0, sext(op1))
14122/// - VWADDU_W == add(op0, zext(op1))
14123/// - VWSUB_W == sub(op0, sext(op1))
14124/// - VWSUBU_W == sub(op0, zext(op1))
14125/// - VFWADD_W == fadd(op0, fpext(op1))
14126/// - VFWSUB_W == fsub(op0, fpext(op1))
14127/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14128/// zext|sext(smaller_value).
14129struct NodeExtensionHelper {
14130 /// Records if this operand is like being zero extended.
14131 bool SupportsZExt;
14132 /// Records if this operand is like being sign extended.
14133 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14134 /// instance, a splat constant (e.g., 3), would support being both sign and
14135 /// zero extended.
14136 bool SupportsSExt;
14137 /// Records if this operand is like being floating-Point extended.
14138 bool SupportsFPExt;
14139 /// This boolean captures whether we care if this operand would still be
14140 /// around after the folding happens.
14141 bool EnforceOneUse;
14142 /// Original value that this NodeExtensionHelper represents.
14143 SDValue OrigOperand;
14144
14145 /// Get the value feeding the extension or the value itself.
14146 /// E.g., for zext(a), this would return a.
14147 SDValue getSource() const {
14148 switch (OrigOperand.getOpcode()) {
14149 case ISD::ZERO_EXTEND:
14150 case ISD::SIGN_EXTEND:
14151 case RISCVISD::VSEXT_VL:
14152 case RISCVISD::VZEXT_VL:
14154 return OrigOperand.getOperand(0);
14155 default:
14156 return OrigOperand;
14157 }
14158 }
14159
14160 /// Check if this instance represents a splat.
14161 bool isSplat() const {
14162 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14163 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14164 }
14165
14166 /// Get the extended opcode.
14167 unsigned getExtOpc(ExtKind SupportsExt) const {
14168 switch (SupportsExt) {
14169 case ExtKind::SExt:
14170 return RISCVISD::VSEXT_VL;
14171 case ExtKind::ZExt:
14172 return RISCVISD::VZEXT_VL;
14173 case ExtKind::FPExt:
14175 }
14176 llvm_unreachable("Unknown ExtKind enum");
14177 }
14178
14179 /// Get or create a value that can feed \p Root with the given extension \p
14180 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14181 /// operand. \see ::getSource().
14182 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14183 const RISCVSubtarget &Subtarget,
14184 std::optional<ExtKind> SupportsExt) const {
14185 if (!SupportsExt.has_value())
14186 return OrigOperand;
14187
14188 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14189
14190 SDValue Source = getSource();
14191 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
14192 if (Source.getValueType() == NarrowVT)
14193 return Source;
14194
14195 unsigned ExtOpc = getExtOpc(*SupportsExt);
14196
14197 // If we need an extension, we should be changing the type.
14198 SDLoc DL(OrigOperand);
14199 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
14200 switch (OrigOperand.getOpcode()) {
14201 case ISD::ZERO_EXTEND:
14202 case ISD::SIGN_EXTEND:
14203 case RISCVISD::VSEXT_VL:
14204 case RISCVISD::VZEXT_VL:
14206 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
14207 case ISD::SPLAT_VECTOR:
14208 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14210 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14211 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14212 default:
14213 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
14214 // and that operand should already have the right NarrowVT so no
14215 // extension should be required at this point.
14216 llvm_unreachable("Unsupported opcode");
14217 }
14218 }
14219
14220 /// Helper function to get the narrow type for \p Root.
14221 /// The narrow type is the type of \p Root where we divided the size of each
14222 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14223 /// \pre Both the narrow type and the original type should be legal.
14224 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14225 MVT VT = Root->getSimpleValueType(0);
14226
14227 // Determine the narrow size.
14228 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14229
14230 MVT EltVT = SupportsExt == ExtKind::FPExt
14231 ? MVT::getFloatingPointVT(NarrowSize)
14232 : MVT::getIntegerVT(NarrowSize);
14233
14234 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14235 "Trying to extend something we can't represent");
14236 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14237 return NarrowVT;
14238 }
14239
14240 /// Get the opcode to materialize:
14241 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14242 static unsigned getSExtOpcode(unsigned Opcode) {
14243 switch (Opcode) {
14244 case ISD::ADD:
14245 case RISCVISD::ADD_VL:
14248 case ISD::OR:
14249 return RISCVISD::VWADD_VL;
14250 case ISD::SUB:
14251 case RISCVISD::SUB_VL:
14254 return RISCVISD::VWSUB_VL;
14255 case ISD::MUL:
14256 case RISCVISD::MUL_VL:
14257 return RISCVISD::VWMUL_VL;
14258 default:
14259 llvm_unreachable("Unexpected opcode");
14260 }
14261 }
14262
14263 /// Get the opcode to materialize:
14264 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14265 static unsigned getZExtOpcode(unsigned Opcode) {
14266 switch (Opcode) {
14267 case ISD::ADD:
14268 case RISCVISD::ADD_VL:
14271 case ISD::OR:
14272 return RISCVISD::VWADDU_VL;
14273 case ISD::SUB:
14274 case RISCVISD::SUB_VL:
14277 return RISCVISD::VWSUBU_VL;
14278 case ISD::MUL:
14279 case RISCVISD::MUL_VL:
14280 return RISCVISD::VWMULU_VL;
14281 case ISD::SHL:
14282 case RISCVISD::SHL_VL:
14283 return RISCVISD::VWSLL_VL;
14284 default:
14285 llvm_unreachable("Unexpected opcode");
14286 }
14287 }
14288
14289 /// Get the opcode to materialize:
14290 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14291 static unsigned getFPExtOpcode(unsigned Opcode) {
14292 switch (Opcode) {
14293 case RISCVISD::FADD_VL:
14295 return RISCVISD::VFWADD_VL;
14296 case RISCVISD::FSUB_VL:
14298 return RISCVISD::VFWSUB_VL;
14299 case RISCVISD::FMUL_VL:
14300 return RISCVISD::VFWMUL_VL;
14301 default:
14302 llvm_unreachable("Unexpected opcode");
14303 }
14304 }
14305
14306 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14307 /// newOpcode(a, b).
14308 static unsigned getSUOpcode(unsigned Opcode) {
14309 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14310 "SU is only supported for MUL");
14311 return RISCVISD::VWMULSU_VL;
14312 }
14313
14314 /// Get the opcode to materialize
14315 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14316 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14317 switch (Opcode) {
14318 case ISD::ADD:
14319 case RISCVISD::ADD_VL:
14320 case ISD::OR:
14321 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14323 case ISD::SUB:
14324 case RISCVISD::SUB_VL:
14325 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14327 case RISCVISD::FADD_VL:
14328 return RISCVISD::VFWADD_W_VL;
14329 case RISCVISD::FSUB_VL:
14330 return RISCVISD::VFWSUB_W_VL;
14331 default:
14332 llvm_unreachable("Unexpected opcode");
14333 }
14334 }
14335
14336 using CombineToTry = std::function<std::optional<CombineResult>(
14337 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14338 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14339 const RISCVSubtarget &)>;
14340
14341 /// Check if this node needs to be fully folded or extended for all users.
14342 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14343
14344 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14345 const RISCVSubtarget &Subtarget) {
14346 unsigned Opc = OrigOperand.getOpcode();
14347 MVT VT = OrigOperand.getSimpleValueType();
14348
14349 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14350 "Unexpected Opcode");
14351
14352 // The pasthru must be undef for tail agnostic.
14353 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14354 return;
14355
14356 // Get the scalar value.
14357 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14358 : OrigOperand.getOperand(1);
14359
14360 // See if we have enough sign bits or zero bits in the scalar to use a
14361 // widening opcode by splatting to smaller element size.
14362 unsigned EltBits = VT.getScalarSizeInBits();
14363 unsigned ScalarBits = Op.getValueSizeInBits();
14364 // Make sure we're getting all element bits from the scalar register.
14365 // FIXME: Support implicit sign extension of vmv.v.x?
14366 if (ScalarBits < EltBits)
14367 return;
14368
14369 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14370 // If the narrow type cannot be expressed with a legal VMV,
14371 // this is not a valid candidate.
14372 if (NarrowSize < 8)
14373 return;
14374
14375 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14376 SupportsSExt = true;
14377
14378 if (DAG.MaskedValueIsZero(Op,
14379 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14380 SupportsZExt = true;
14381
14382 EnforceOneUse = false;
14383 }
14384
14385 /// Helper method to set the various fields of this struct based on the
14386 /// type of \p Root.
14387 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14388 const RISCVSubtarget &Subtarget) {
14389 SupportsZExt = false;
14390 SupportsSExt = false;
14391 SupportsFPExt = false;
14392 EnforceOneUse = true;
14393 unsigned Opc = OrigOperand.getOpcode();
14394 // For the nodes we handle below, we end up using their inputs directly: see
14395 // getSource(). However since they either don't have a passthru or we check
14396 // that their passthru is undef, we can safely ignore their mask and VL.
14397 switch (Opc) {
14398 case ISD::ZERO_EXTEND:
14399 case ISD::SIGN_EXTEND: {
14400 MVT VT = OrigOperand.getSimpleValueType();
14401 if (!VT.isVector())
14402 break;
14403
14404 SDValue NarrowElt = OrigOperand.getOperand(0);
14405 MVT NarrowVT = NarrowElt.getSimpleValueType();
14406 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14407 if (NarrowVT.getVectorElementType() == MVT::i1)
14408 break;
14409
14410 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14411 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14412 break;
14413 }
14414 case RISCVISD::VZEXT_VL:
14415 SupportsZExt = true;
14416 break;
14417 case RISCVISD::VSEXT_VL:
14418 SupportsSExt = true;
14419 break;
14421 SupportsFPExt = true;
14422 break;
14423 case ISD::SPLAT_VECTOR:
14425 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14426 break;
14427 default:
14428 break;
14429 }
14430 }
14431
14432 /// Check if \p Root supports any extension folding combines.
14433 static bool isSupportedRoot(const SDNode *Root,
14434 const RISCVSubtarget &Subtarget) {
14435 switch (Root->getOpcode()) {
14436 case ISD::ADD:
14437 case ISD::SUB:
14438 case ISD::MUL: {
14439 return Root->getValueType(0).isScalableVector();
14440 }
14441 case ISD::OR: {
14442 return Root->getValueType(0).isScalableVector() &&
14443 Root->getFlags().hasDisjoint();
14444 }
14445 // Vector Widening Integer Add/Sub/Mul Instructions
14446 case RISCVISD::ADD_VL:
14447 case RISCVISD::MUL_VL:
14450 case RISCVISD::SUB_VL:
14453 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14454 case RISCVISD::FADD_VL:
14455 case RISCVISD::FSUB_VL:
14456 case RISCVISD::FMUL_VL:
14459 return true;
14460 case ISD::SHL:
14461 return Root->getValueType(0).isScalableVector() &&
14462 Subtarget.hasStdExtZvbb();
14463 case RISCVISD::SHL_VL:
14464 return Subtarget.hasStdExtZvbb();
14465 default:
14466 return false;
14467 }
14468 }
14469
14470 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14471 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14472 const RISCVSubtarget &Subtarget) {
14473 assert(isSupportedRoot(Root, Subtarget) &&
14474 "Trying to build an helper with an "
14475 "unsupported root");
14476 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14478 OrigOperand = Root->getOperand(OperandIdx);
14479
14480 unsigned Opc = Root->getOpcode();
14481 switch (Opc) {
14482 // We consider
14483 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14484 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14485 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14492 if (OperandIdx == 1) {
14493 SupportsZExt =
14495 SupportsSExt =
14497 SupportsFPExt =
14499 // There's no existing extension here, so we don't have to worry about
14500 // making sure it gets removed.
14501 EnforceOneUse = false;
14502 break;
14503 }
14504 [[fallthrough]];
14505 default:
14506 fillUpExtensionSupport(Root, DAG, Subtarget);
14507 break;
14508 }
14509 }
14510
14511 /// Helper function to get the Mask and VL from \p Root.
14512 static std::pair<SDValue, SDValue>
14513 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14514 const RISCVSubtarget &Subtarget) {
14515 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14516 switch (Root->getOpcode()) {
14517 case ISD::ADD:
14518 case ISD::SUB:
14519 case ISD::MUL:
14520 case ISD::OR:
14521 case ISD::SHL: {
14522 SDLoc DL(Root);
14523 MVT VT = Root->getSimpleValueType(0);
14524 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14525 }
14526 default:
14527 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14528 }
14529 }
14530
14531 /// Helper function to check if \p N is commutative with respect to the
14532 /// foldings that are supported by this class.
14533 static bool isCommutative(const SDNode *N) {
14534 switch (N->getOpcode()) {
14535 case ISD::ADD:
14536 case ISD::MUL:
14537 case ISD::OR:
14538 case RISCVISD::ADD_VL:
14539 case RISCVISD::MUL_VL:
14542 case RISCVISD::FADD_VL:
14543 case RISCVISD::FMUL_VL:
14545 return true;
14546 case ISD::SUB:
14547 case RISCVISD::SUB_VL:
14550 case RISCVISD::FSUB_VL:
14552 case ISD::SHL:
14553 case RISCVISD::SHL_VL:
14554 return false;
14555 default:
14556 llvm_unreachable("Unexpected opcode");
14557 }
14558 }
14559
14560 /// Get a list of combine to try for folding extensions in \p Root.
14561 /// Note that each returned CombineToTry function doesn't actually modify
14562 /// anything. Instead they produce an optional CombineResult that if not None,
14563 /// need to be materialized for the combine to be applied.
14564 /// \see CombineResult::materialize.
14565 /// If the related CombineToTry function returns std::nullopt, that means the
14566 /// combine didn't match.
14567 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14568};
14569
14570/// Helper structure that holds all the necessary information to materialize a
14571/// combine that does some extension folding.
14572struct CombineResult {
14573 /// Opcode to be generated when materializing the combine.
14574 unsigned TargetOpcode;
14575 // No value means no extension is needed.
14576 std::optional<ExtKind> LHSExt;
14577 std::optional<ExtKind> RHSExt;
14578 /// Root of the combine.
14579 SDNode *Root;
14580 /// LHS of the TargetOpcode.
14581 NodeExtensionHelper LHS;
14582 /// RHS of the TargetOpcode.
14583 NodeExtensionHelper RHS;
14584
14585 CombineResult(unsigned TargetOpcode, SDNode *Root,
14586 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14587 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14588 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14589 LHS(LHS), RHS(RHS) {}
14590
14591 /// Return a value that uses TargetOpcode and that can be used to replace
14592 /// Root.
14593 /// The actual replacement is *not* done in that method.
14594 SDValue materialize(SelectionDAG &DAG,
14595 const RISCVSubtarget &Subtarget) const {
14596 SDValue Mask, VL, Merge;
14597 std::tie(Mask, VL) =
14598 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14599 switch (Root->getOpcode()) {
14600 default:
14601 Merge = Root->getOperand(2);
14602 break;
14603 case ISD::ADD:
14604 case ISD::SUB:
14605 case ISD::MUL:
14606 case ISD::OR:
14607 case ISD::SHL:
14608 Merge = DAG.getUNDEF(Root->getValueType(0));
14609 break;
14610 }
14611 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14612 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14613 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14614 Merge, Mask, VL);
14615 }
14616};
14617
14618/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14619/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14620/// are zext) and LHS and RHS can be folded into Root.
14621/// AllowExtMask define which form `ext` can take in this pattern.
14622///
14623/// \note If the pattern can match with both zext and sext, the returned
14624/// CombineResult will feature the zext result.
14625///
14626/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14627/// can be used to apply the pattern.
14628static std::optional<CombineResult>
14629canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14630 const NodeExtensionHelper &RHS,
14631 uint8_t AllowExtMask, SelectionDAG &DAG,
14632 const RISCVSubtarget &Subtarget) {
14633 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14634 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14635 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14636 /*RHSExt=*/{ExtKind::ZExt});
14637 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14638 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14639 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14640 /*RHSExt=*/{ExtKind::SExt});
14641 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14642 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14643 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14644 /*RHSExt=*/{ExtKind::FPExt});
14645 return std::nullopt;
14646}
14647
14648/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14649/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14650/// are zext) and LHS and RHS can be folded into Root.
14651///
14652/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14653/// can be used to apply the pattern.
14654static std::optional<CombineResult>
14655canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14656 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14657 const RISCVSubtarget &Subtarget) {
14658 return canFoldToVWWithSameExtensionImpl(
14659 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14660 Subtarget);
14661}
14662
14663/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14664///
14665/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14666/// can be used to apply the pattern.
14667static std::optional<CombineResult>
14668canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14669 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14670 const RISCVSubtarget &Subtarget) {
14671 if (RHS.SupportsFPExt)
14672 return CombineResult(
14673 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14674 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14675
14676 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14677 // sext/zext?
14678 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14679 // purposes.
14680 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14681 return CombineResult(
14682 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14683 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14684 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14685 return CombineResult(
14686 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14687 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14688 return std::nullopt;
14689}
14690
14691/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14692///
14693/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14694/// can be used to apply the pattern.
14695static std::optional<CombineResult>
14696canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14697 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14698 const RISCVSubtarget &Subtarget) {
14699 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14700 Subtarget);
14701}
14702
14703/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14704///
14705/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14706/// can be used to apply the pattern.
14707static std::optional<CombineResult>
14708canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14709 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14710 const RISCVSubtarget &Subtarget) {
14711 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14712 Subtarget);
14713}
14714
14715/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14716///
14717/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14718/// can be used to apply the pattern.
14719static std::optional<CombineResult>
14720canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14721 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14722 const RISCVSubtarget &Subtarget) {
14723 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14724 Subtarget);
14725}
14726
14727/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14728///
14729/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14730/// can be used to apply the pattern.
14731static std::optional<CombineResult>
14732canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14733 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14734 const RISCVSubtarget &Subtarget) {
14735
14736 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14737 return std::nullopt;
14738 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14739 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14740 /*RHSExt=*/{ExtKind::ZExt});
14741}
14742
14744NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14745 SmallVector<CombineToTry> Strategies;
14746 switch (Root->getOpcode()) {
14747 case ISD::ADD:
14748 case ISD::SUB:
14749 case ISD::OR:
14750 case RISCVISD::ADD_VL:
14751 case RISCVISD::SUB_VL:
14752 case RISCVISD::FADD_VL:
14753 case RISCVISD::FSUB_VL:
14754 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14755 Strategies.push_back(canFoldToVWWithSameExtension);
14756 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14757 Strategies.push_back(canFoldToVW_W);
14758 break;
14759 case RISCVISD::FMUL_VL:
14760 Strategies.push_back(canFoldToVWWithSameExtension);
14761 break;
14762 case ISD::MUL:
14763 case RISCVISD::MUL_VL:
14764 // mul -> vwmul(u)
14765 Strategies.push_back(canFoldToVWWithSameExtension);
14766 // mul -> vwmulsu
14767 Strategies.push_back(canFoldToVW_SU);
14768 break;
14769 case ISD::SHL:
14770 case RISCVISD::SHL_VL:
14771 // shl -> vwsll
14772 Strategies.push_back(canFoldToVWWithZEXT);
14773 break;
14776 // vwadd_w|vwsub_w -> vwadd|vwsub
14777 Strategies.push_back(canFoldToVWWithSEXT);
14778 break;
14781 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14782 Strategies.push_back(canFoldToVWWithZEXT);
14783 break;
14786 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14787 Strategies.push_back(canFoldToVWWithFPEXT);
14788 break;
14789 default:
14790 llvm_unreachable("Unexpected opcode");
14791 }
14792 return Strategies;
14793}
14794} // End anonymous namespace.
14795
14796/// Combine a binary operation to its equivalent VW or VW_W form.
14797/// The supported combines are:
14798/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14799/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14800/// mul | mul_vl -> vwmul(u) | vwmul_su
14801/// shl | shl_vl -> vwsll
14802/// fadd_vl -> vfwadd | vfwadd_w
14803/// fsub_vl -> vfwsub | vfwsub_w
14804/// fmul_vl -> vfwmul
14805/// vwadd_w(u) -> vwadd(u)
14806/// vwsub_w(u) -> vwsub(u)
14807/// vfwadd_w -> vfwadd
14808/// vfwsub_w -> vfwsub
14811 const RISCVSubtarget &Subtarget) {
14812 SelectionDAG &DAG = DCI.DAG;
14813 if (DCI.isBeforeLegalize())
14814 return SDValue();
14815
14816 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14817 return SDValue();
14818
14819 SmallVector<SDNode *> Worklist;
14820 SmallSet<SDNode *, 8> Inserted;
14821 Worklist.push_back(N);
14822 Inserted.insert(N);
14823 SmallVector<CombineResult> CombinesToApply;
14824
14825 while (!Worklist.empty()) {
14826 SDNode *Root = Worklist.pop_back_val();
14827 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14828 return SDValue();
14829
14830 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14831 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14832 auto AppendUsersIfNeeded = [&Worklist,
14833 &Inserted](const NodeExtensionHelper &Op) {
14834 if (Op.needToPromoteOtherUsers()) {
14835 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14836 if (Inserted.insert(TheUse).second)
14837 Worklist.push_back(TheUse);
14838 }
14839 }
14840 };
14841
14842 // Control the compile time by limiting the number of node we look at in
14843 // total.
14844 if (Inserted.size() > ExtensionMaxWebSize)
14845 return SDValue();
14846
14848 NodeExtensionHelper::getSupportedFoldings(N);
14849
14850 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14851 bool Matched = false;
14852 for (int Attempt = 0;
14853 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14854 ++Attempt) {
14855
14856 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14857 FoldingStrategies) {
14858 std::optional<CombineResult> Res =
14859 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14860 if (Res) {
14861 Matched = true;
14862 CombinesToApply.push_back(*Res);
14863 // All the inputs that are extended need to be folded, otherwise
14864 // we would be leaving the old input (since it is may still be used),
14865 // and the new one.
14866 if (Res->LHSExt.has_value())
14867 AppendUsersIfNeeded(LHS);
14868 if (Res->RHSExt.has_value())
14869 AppendUsersIfNeeded(RHS);
14870 break;
14871 }
14872 }
14873 std::swap(LHS, RHS);
14874 }
14875 // Right now we do an all or nothing approach.
14876 if (!Matched)
14877 return SDValue();
14878 }
14879 // Store the value for the replacement of the input node separately.
14880 SDValue InputRootReplacement;
14881 // We do the RAUW after we materialize all the combines, because some replaced
14882 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14883 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14884 // yet-to-be-visited CombinesToApply roots.
14886 ValuesToReplace.reserve(CombinesToApply.size());
14887 for (CombineResult Res : CombinesToApply) {
14888 SDValue NewValue = Res.materialize(DAG, Subtarget);
14889 if (!InputRootReplacement) {
14890 assert(Res.Root == N &&
14891 "First element is expected to be the current node");
14892 InputRootReplacement = NewValue;
14893 } else {
14894 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14895 }
14896 }
14897 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14898 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14899 DCI.AddToWorklist(OldNewValues.second.getNode());
14900 }
14901 return InputRootReplacement;
14902}
14903
14904// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14905// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14906// y will be the Passthru and cond will be the Mask.
14908 unsigned Opc = N->getOpcode();
14911
14912 SDValue Y = N->getOperand(0);
14913 SDValue MergeOp = N->getOperand(1);
14914 unsigned MergeOpc = MergeOp.getOpcode();
14915
14916 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14917 return SDValue();
14918
14919 SDValue X = MergeOp->getOperand(1);
14920
14921 if (!MergeOp.hasOneUse())
14922 return SDValue();
14923
14924 // Passthru should be undef
14925 SDValue Passthru = N->getOperand(2);
14926 if (!Passthru.isUndef())
14927 return SDValue();
14928
14929 // Mask should be all ones
14930 SDValue Mask = N->getOperand(3);
14931 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14932 return SDValue();
14933
14934 // False value of MergeOp should be all zeros
14935 SDValue Z = MergeOp->getOperand(2);
14936
14937 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14938 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14939 Z = Z.getOperand(1);
14940
14941 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14942 return SDValue();
14943
14944 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14945 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14946 N->getFlags());
14947}
14948
14951 const RISCVSubtarget &Subtarget) {
14952 [[maybe_unused]] unsigned Opc = N->getOpcode();
14955
14956 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14957 return V;
14958
14959 return combineVWADDSUBWSelect(N, DCI.DAG);
14960}
14961
14962// Helper function for performMemPairCombine.
14963// Try to combine the memory loads/stores LSNode1 and LSNode2
14964// into a single memory pair operation.
14966 LSBaseSDNode *LSNode2, SDValue BasePtr,
14967 uint64_t Imm) {
14969 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14970
14971 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14972 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14973 return SDValue();
14974
14976 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14977
14978 // The new operation has twice the width.
14979 MVT XLenVT = Subtarget.getXLenVT();
14980 EVT MemVT = LSNode1->getMemoryVT();
14981 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14982 MachineMemOperand *MMO = LSNode1->getMemOperand();
14984 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14985
14986 if (LSNode1->getOpcode() == ISD::LOAD) {
14987 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14988 unsigned Opcode;
14989 if (MemVT == MVT::i32)
14990 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14991 else
14992 Opcode = RISCVISD::TH_LDD;
14993
14994 SDValue Res = DAG.getMemIntrinsicNode(
14995 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14996 {LSNode1->getChain(), BasePtr,
14997 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14998 NewMemVT, NewMMO);
14999
15000 SDValue Node1 =
15001 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15002 SDValue Node2 =
15003 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15004
15005 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15006 return Node1;
15007 } else {
15008 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15009
15010 SDValue Res = DAG.getMemIntrinsicNode(
15011 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15012 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15013 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15014 NewMemVT, NewMMO);
15015
15016 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15017 return Res;
15018 }
15019}
15020
15021// Try to combine two adjacent loads/stores to a single pair instruction from
15022// the XTHeadMemPair vendor extension.
15025 SelectionDAG &DAG = DCI.DAG;
15027 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15028
15029 // Target does not support load/store pair.
15030 if (!Subtarget.hasVendorXTHeadMemPair())
15031 return SDValue();
15032
15033 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15034 EVT MemVT = LSNode1->getMemoryVT();
15035 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15036
15037 // No volatile, indexed or atomic loads/stores.
15038 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15039 return SDValue();
15040
15041 // Function to get a base + constant representation from a memory value.
15042 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15043 if (Ptr->getOpcode() == ISD::ADD)
15044 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15045 return {Ptr->getOperand(0), C1->getZExtValue()};
15046 return {Ptr, 0};
15047 };
15048
15049 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15050
15051 SDValue Chain = N->getOperand(0);
15052 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
15053 UI != UE; ++UI) {
15054 SDUse &Use = UI.getUse();
15055 if (Use.getUser() != N && Use.getResNo() == 0 &&
15056 Use.getUser()->getOpcode() == N->getOpcode()) {
15057 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
15058
15059 // No volatile, indexed or atomic loads/stores.
15060 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15061 continue;
15062
15063 // Check if LSNode1 and LSNode2 have the same type and extension.
15064 if (LSNode1->getOpcode() == ISD::LOAD)
15065 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15066 cast<LoadSDNode>(LSNode1)->getExtensionType())
15067 continue;
15068
15069 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15070 continue;
15071
15072 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15073
15074 // Check if the base pointer is the same for both instruction.
15075 if (Base1 != Base2)
15076 continue;
15077
15078 // Check if the offsets match the XTHeadMemPair encoding contraints.
15079 bool Valid = false;
15080 if (MemVT == MVT::i32) {
15081 // Check for adjacent i32 values and a 2-bit index.
15082 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15083 Valid = true;
15084 } else if (MemVT == MVT::i64) {
15085 // Check for adjacent i64 values and a 2-bit index.
15086 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15087 Valid = true;
15088 }
15089
15090 if (!Valid)
15091 continue;
15092
15093 // Try to combine.
15094 if (SDValue Res =
15095 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15096 return Res;
15097 }
15098 }
15099
15100 return SDValue();
15101}
15102
15103// Fold
15104// (fp_to_int (froundeven X)) -> fcvt X, rne
15105// (fp_to_int (ftrunc X)) -> fcvt X, rtz
15106// (fp_to_int (ffloor X)) -> fcvt X, rdn
15107// (fp_to_int (fceil X)) -> fcvt X, rup
15108// (fp_to_int (fround X)) -> fcvt X, rmm
15109// (fp_to_int (frint X)) -> fcvt X
15112 const RISCVSubtarget &Subtarget) {
15113 SelectionDAG &DAG = DCI.DAG;
15114 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15115 MVT XLenVT = Subtarget.getXLenVT();
15116
15117 SDValue Src = N->getOperand(0);
15118
15119 // Don't do this for strict-fp Src.
15120 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15121 return SDValue();
15122
15123 // Ensure the FP type is legal.
15124 if (!TLI.isTypeLegal(Src.getValueType()))
15125 return SDValue();
15126
15127 // Don't do this for f16 with Zfhmin and not Zfh.
15128 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15129 return SDValue();
15130
15131 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15132 // If the result is invalid, we didn't find a foldable instruction.
15133 if (FRM == RISCVFPRndMode::Invalid)
15134 return SDValue();
15135
15136 SDLoc DL(N);
15137 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
15138 EVT VT = N->getValueType(0);
15139
15140 if (VT.isVector() && TLI.isTypeLegal(VT)) {
15141 MVT SrcVT = Src.getSimpleValueType();
15142 MVT SrcContainerVT = SrcVT;
15143 MVT ContainerVT = VT.getSimpleVT();
15144 SDValue XVal = Src.getOperand(0);
15145
15146 // For widening and narrowing conversions we just combine it into a
15147 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
15148 // end up getting lowered to their appropriate pseudo instructions based on
15149 // their operand types
15150 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
15151 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
15152 return SDValue();
15153
15154 // Make fixed-length vectors scalable first
15155 if (SrcVT.isFixedLengthVector()) {
15156 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
15157 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
15158 ContainerVT =
15159 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
15160 }
15161
15162 auto [Mask, VL] =
15163 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
15164
15165 SDValue FpToInt;
15166 if (FRM == RISCVFPRndMode::RTZ) {
15167 // Use the dedicated trunc static rounding mode if we're truncating so we
15168 // don't need to generate calls to fsrmi/fsrm
15169 unsigned Opc =
15171 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15172 } else if (FRM == RISCVFPRndMode::DYN) {
15173 unsigned Opc =
15175 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15176 } else {
15177 unsigned Opc =
15179 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
15180 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
15181 }
15182
15183 // If converted from fixed-length to scalable, convert back
15184 if (VT.isFixedLengthVector())
15185 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
15186
15187 return FpToInt;
15188 }
15189
15190 // Only handle XLen or i32 types. Other types narrower than XLen will
15191 // eventually be legalized to XLenVT.
15192 if (VT != MVT::i32 && VT != XLenVT)
15193 return SDValue();
15194
15195 unsigned Opc;
15196 if (VT == XLenVT)
15197 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15198 else
15200
15201 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15202 DAG.getTargetConstant(FRM, DL, XLenVT));
15203 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15204}
15205
15206// Fold
15207// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15208// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15209// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15210// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15211// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15212// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15215 const RISCVSubtarget &Subtarget) {
15216 SelectionDAG &DAG = DCI.DAG;
15217 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15218 MVT XLenVT = Subtarget.getXLenVT();
15219
15220 // Only handle XLen types. Other types narrower than XLen will eventually be
15221 // legalized to XLenVT.
15222 EVT DstVT = N->getValueType(0);
15223 if (DstVT != XLenVT)
15224 return SDValue();
15225
15226 SDValue Src = N->getOperand(0);
15227
15228 // Don't do this for strict-fp Src.
15229 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15230 return SDValue();
15231
15232 // Ensure the FP type is also legal.
15233 if (!TLI.isTypeLegal(Src.getValueType()))
15234 return SDValue();
15235
15236 // Don't do this for f16 with Zfhmin and not Zfh.
15237 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15238 return SDValue();
15239
15240 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15241
15242 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15243 if (FRM == RISCVFPRndMode::Invalid)
15244 return SDValue();
15245
15246 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15247
15248 unsigned Opc;
15249 if (SatVT == DstVT)
15250 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15251 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15253 else
15254 return SDValue();
15255 // FIXME: Support other SatVTs by clamping before or after the conversion.
15256
15257 Src = Src.getOperand(0);
15258
15259 SDLoc DL(N);
15260 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15261 DAG.getTargetConstant(FRM, DL, XLenVT));
15262
15263 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15264 // extend.
15265 if (Opc == RISCVISD::FCVT_WU_RV64)
15266 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15267
15268 // RISC-V FP-to-int conversions saturate to the destination register size, but
15269 // don't produce 0 for nan.
15270 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15271 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15272}
15273
15274// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15275// smaller than XLenVT.
15277 const RISCVSubtarget &Subtarget) {
15278 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15279
15280 SDValue Src = N->getOperand(0);
15281 if (Src.getOpcode() != ISD::BSWAP)
15282 return SDValue();
15283
15284 EVT VT = N->getValueType(0);
15285 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15286 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15287 return SDValue();
15288
15289 SDLoc DL(N);
15290 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15291}
15292
15293// Convert from one FMA opcode to another based on whether we are negating the
15294// multiply result and/or the accumulator.
15295// NOTE: Only supports RVV operations with VL.
15296static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15297 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15298 if (NegMul) {
15299 // clang-format off
15300 switch (Opcode) {
15301 default: llvm_unreachable("Unexpected opcode");
15302 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15303 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15304 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15305 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15310 }
15311 // clang-format on
15312 }
15313
15314 // Negating the accumulator changes ADD<->SUB.
15315 if (NegAcc) {
15316 // clang-format off
15317 switch (Opcode) {
15318 default: llvm_unreachable("Unexpected opcode");
15319 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15320 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15321 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15322 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15327 }
15328 // clang-format on
15329 }
15330
15331 return Opcode;
15332}
15333
15335 // Fold FNEG_VL into FMA opcodes.
15336 // The first operand of strict-fp is chain.
15337 unsigned Offset = N->isTargetStrictFPOpcode();
15338 SDValue A = N->getOperand(0 + Offset);
15339 SDValue B = N->getOperand(1 + Offset);
15340 SDValue C = N->getOperand(2 + Offset);
15341 SDValue Mask = N->getOperand(3 + Offset);
15342 SDValue VL = N->getOperand(4 + Offset);
15343
15344 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15345 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15346 V.getOperand(2) == VL) {
15347 // Return the negated input.
15348 V = V.getOperand(0);
15349 return true;
15350 }
15351
15352 return false;
15353 };
15354
15355 bool NegA = invertIfNegative(A);
15356 bool NegB = invertIfNegative(B);
15357 bool NegC = invertIfNegative(C);
15358
15359 // If no operands are negated, we're done.
15360 if (!NegA && !NegB && !NegC)
15361 return SDValue();
15362
15363 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15364 if (N->isTargetStrictFPOpcode())
15365 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15366 {N->getOperand(0), A, B, C, Mask, VL});
15367 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15368 VL);
15369}
15370
15372 const RISCVSubtarget &Subtarget) {
15374 return V;
15375
15376 if (N->getValueType(0).isScalableVector() &&
15377 N->getValueType(0).getVectorElementType() == MVT::f32 &&
15378 (Subtarget.hasVInstructionsF16Minimal() &&
15379 !Subtarget.hasVInstructionsF16())) {
15380 return SDValue();
15381 }
15382
15383 // FIXME: Ignore strict opcodes for now.
15384 if (N->isTargetStrictFPOpcode())
15385 return SDValue();
15386
15387 // Try to form widening FMA.
15388 SDValue Op0 = N->getOperand(0);
15389 SDValue Op1 = N->getOperand(1);
15390 SDValue Mask = N->getOperand(3);
15391 SDValue VL = N->getOperand(4);
15392
15393 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
15395 return SDValue();
15396
15397 // TODO: Refactor to handle more complex cases similar to
15398 // combineBinOp_VLToVWBinOp_VL.
15399 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
15400 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
15401 return SDValue();
15402
15403 // Check the mask and VL are the same.
15404 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15405 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15406 return SDValue();
15407
15408 unsigned NewOpc;
15409 switch (N->getOpcode()) {
15410 default:
15411 llvm_unreachable("Unexpected opcode");
15413 NewOpc = RISCVISD::VFWMADD_VL;
15414 break;
15416 NewOpc = RISCVISD::VFWNMSUB_VL;
15417 break;
15419 NewOpc = RISCVISD::VFWNMADD_VL;
15420 break;
15422 NewOpc = RISCVISD::VFWMSUB_VL;
15423 break;
15424 }
15425
15426 Op0 = Op0.getOperand(0);
15427 Op1 = Op1.getOperand(0);
15428
15429 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15430 N->getOperand(2), Mask, VL);
15431}
15432
15434 const RISCVSubtarget &Subtarget) {
15435 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15436
15437 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15438 return SDValue();
15439
15440 if (!isa<ConstantSDNode>(N->getOperand(1)))
15441 return SDValue();
15442 uint64_t ShAmt = N->getConstantOperandVal(1);
15443 if (ShAmt > 32)
15444 return SDValue();
15445
15446 SDValue N0 = N->getOperand(0);
15447
15448 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15449 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15450 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15451 if (ShAmt < 32 &&
15452 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15453 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15454 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15455 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15456 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15457 if (LShAmt < 32) {
15458 SDLoc ShlDL(N0.getOperand(0));
15459 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15460 N0.getOperand(0).getOperand(0),
15461 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15462 SDLoc DL(N);
15463 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15464 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15465 }
15466 }
15467
15468 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15469 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15470 //
15471 // Also try these folds where an add or sub is in the middle.
15472 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15473 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15474 SDValue Shl;
15475 ConstantSDNode *AddC = nullptr;
15476
15477 // We might have an ADD or SUB between the SRA and SHL.
15478 bool IsAdd = N0.getOpcode() == ISD::ADD;
15479 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15480 // Other operand needs to be a constant we can modify.
15481 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15482 if (!AddC)
15483 return SDValue();
15484
15485 // AddC needs to have at least 32 trailing zeros.
15486 if (AddC->getAPIntValue().countr_zero() < 32)
15487 return SDValue();
15488
15489 // All users should be a shift by constant less than or equal to 32. This
15490 // ensures we'll do this optimization for each of them to produce an
15491 // add/sub+sext_inreg they can all share.
15492 for (SDNode *U : N0->uses()) {
15493 if (U->getOpcode() != ISD::SRA ||
15494 !isa<ConstantSDNode>(U->getOperand(1)) ||
15495 U->getConstantOperandVal(1) > 32)
15496 return SDValue();
15497 }
15498
15499 Shl = N0.getOperand(IsAdd ? 0 : 1);
15500 } else {
15501 // Not an ADD or SUB.
15502 Shl = N0;
15503 }
15504
15505 // Look for a shift left by 32.
15506 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15507 Shl.getConstantOperandVal(1) != 32)
15508 return SDValue();
15509
15510 // We if we didn't look through an add/sub, then the shl should have one use.
15511 // If we did look through an add/sub, the sext_inreg we create is free so
15512 // we're only creating 2 new instructions. It's enough to only remove the
15513 // original sra+add/sub.
15514 if (!AddC && !Shl.hasOneUse())
15515 return SDValue();
15516
15517 SDLoc DL(N);
15518 SDValue In = Shl.getOperand(0);
15519
15520 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15521 // constant.
15522 if (AddC) {
15523 SDValue ShiftedAddC =
15524 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15525 if (IsAdd)
15526 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15527 else
15528 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15529 }
15530
15531 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15532 DAG.getValueType(MVT::i32));
15533 if (ShAmt == 32)
15534 return SExt;
15535
15536 return DAG.getNode(
15537 ISD::SHL, DL, MVT::i64, SExt,
15538 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15539}
15540
15541// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15542// the result is used as the conditon of a br_cc or select_cc we can invert,
15543// inverting the setcc is free, and Z is 0/1. Caller will invert the
15544// br_cc/select_cc.
15546 bool IsAnd = Cond.getOpcode() == ISD::AND;
15547 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15548 return SDValue();
15549
15550 if (!Cond.hasOneUse())
15551 return SDValue();
15552
15553 SDValue Setcc = Cond.getOperand(0);
15554 SDValue Xor = Cond.getOperand(1);
15555 // Canonicalize setcc to LHS.
15556 if (Setcc.getOpcode() != ISD::SETCC)
15557 std::swap(Setcc, Xor);
15558 // LHS should be a setcc and RHS should be an xor.
15559 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15560 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15561 return SDValue();
15562
15563 // If the condition is an And, SimplifyDemandedBits may have changed
15564 // (xor Z, 1) to (not Z).
15565 SDValue Xor1 = Xor.getOperand(1);
15566 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15567 return SDValue();
15568
15569 EVT VT = Cond.getValueType();
15570 SDValue Xor0 = Xor.getOperand(0);
15571
15572 // The LHS of the xor needs to be 0/1.
15574 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15575 return SDValue();
15576
15577 // We can only invert integer setccs.
15578 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15579 if (!SetCCOpVT.isScalarInteger())
15580 return SDValue();
15581
15582 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15583 if (ISD::isIntEqualitySetCC(CCVal)) {
15584 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15585 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15586 Setcc.getOperand(1), CCVal);
15587 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15588 // Invert (setlt 0, X) by converting to (setlt X, 1).
15589 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15590 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15591 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15592 // (setlt X, 1) by converting to (setlt 0, X).
15593 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15594 DAG.getConstant(0, SDLoc(Setcc), VT),
15595 Setcc.getOperand(0), CCVal);
15596 } else
15597 return SDValue();
15598
15599 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15600 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15601}
15602
15603// Perform common combines for BR_CC and SELECT_CC condtions.
15604static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15605 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15606 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15607
15608 // As far as arithmetic right shift always saves the sign,
15609 // shift can be omitted.
15610 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15611 // setge (sra X, N), 0 -> setge X, 0
15612 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15613 LHS.getOpcode() == ISD::SRA) {
15614 LHS = LHS.getOperand(0);
15615 return true;
15616 }
15617
15618 if (!ISD::isIntEqualitySetCC(CCVal))
15619 return false;
15620
15621 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15622 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15623 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15624 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15625 // If we're looking for eq 0 instead of ne 0, we need to invert the
15626 // condition.
15627 bool Invert = CCVal == ISD::SETEQ;
15628 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15629 if (Invert)
15630 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15631
15632 RHS = LHS.getOperand(1);
15633 LHS = LHS.getOperand(0);
15634 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15635
15636 CC = DAG.getCondCode(CCVal);
15637 return true;
15638 }
15639
15640 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15641 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15642 RHS = LHS.getOperand(1);
15643 LHS = LHS.getOperand(0);
15644 return true;
15645 }
15646
15647 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15648 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15649 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15650 SDValue LHS0 = LHS.getOperand(0);
15651 if (LHS0.getOpcode() == ISD::AND &&
15652 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15653 uint64_t Mask = LHS0.getConstantOperandVal(1);
15654 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15655 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15656 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15657 CC = DAG.getCondCode(CCVal);
15658
15659 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15660 LHS = LHS0.getOperand(0);
15661 if (ShAmt != 0)
15662 LHS =
15663 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15664 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15665 return true;
15666 }
15667 }
15668 }
15669
15670 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15671 // This can occur when legalizing some floating point comparisons.
15672 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15673 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15674 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15675 CC = DAG.getCondCode(CCVal);
15676 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15677 return true;
15678 }
15679
15680 if (isNullConstant(RHS)) {
15681 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15682 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15683 CC = DAG.getCondCode(CCVal);
15684 LHS = NewCond;
15685 return true;
15686 }
15687 }
15688
15689 return false;
15690}
15691
15692// Fold
15693// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15694// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15695// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15696// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15698 SDValue TrueVal, SDValue FalseVal,
15699 bool Swapped) {
15700 bool Commutative = true;
15701 unsigned Opc = TrueVal.getOpcode();
15702 switch (Opc) {
15703 default:
15704 return SDValue();
15705 case ISD::SHL:
15706 case ISD::SRA:
15707 case ISD::SRL:
15708 case ISD::SUB:
15709 Commutative = false;
15710 break;
15711 case ISD::ADD:
15712 case ISD::OR:
15713 case ISD::XOR:
15714 break;
15715 }
15716
15717 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15718 return SDValue();
15719
15720 unsigned OpToFold;
15721 if (FalseVal == TrueVal.getOperand(0))
15722 OpToFold = 0;
15723 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15724 OpToFold = 1;
15725 else
15726 return SDValue();
15727
15728 EVT VT = N->getValueType(0);
15729 SDLoc DL(N);
15730 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15731 EVT OtherOpVT = OtherOp.getValueType();
15732 SDValue IdentityOperand =
15733 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15734 if (!Commutative)
15735 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15736 assert(IdentityOperand && "No identity operand!");
15737
15738 if (Swapped)
15739 std::swap(OtherOp, IdentityOperand);
15740 SDValue NewSel =
15741 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15742 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15743}
15744
15745// This tries to get rid of `select` and `icmp` that are being used to handle
15746// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15748 SDValue Cond = N->getOperand(0);
15749
15750 // This represents either CTTZ or CTLZ instruction.
15751 SDValue CountZeroes;
15752
15753 SDValue ValOnZero;
15754
15755 if (Cond.getOpcode() != ISD::SETCC)
15756 return SDValue();
15757
15758 if (!isNullConstant(Cond->getOperand(1)))
15759 return SDValue();
15760
15761 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15762 if (CCVal == ISD::CondCode::SETEQ) {
15763 CountZeroes = N->getOperand(2);
15764 ValOnZero = N->getOperand(1);
15765 } else if (CCVal == ISD::CondCode::SETNE) {
15766 CountZeroes = N->getOperand(1);
15767 ValOnZero = N->getOperand(2);
15768 } else {
15769 return SDValue();
15770 }
15771
15772 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15773 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15774 CountZeroes = CountZeroes.getOperand(0);
15775
15776 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15777 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15778 CountZeroes.getOpcode() != ISD::CTLZ &&
15779 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15780 return SDValue();
15781
15782 if (!isNullConstant(ValOnZero))
15783 return SDValue();
15784
15785 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15786 if (Cond->getOperand(0) != CountZeroesArgument)
15787 return SDValue();
15788
15789 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15790 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15791 CountZeroes.getValueType(), CountZeroesArgument);
15792 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15793 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15794 CountZeroes.getValueType(), CountZeroesArgument);
15795 }
15796
15797 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15798 SDValue BitWidthMinusOne =
15799 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15800
15801 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15802 CountZeroes, BitWidthMinusOne);
15803 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15804}
15805
15807 const RISCVSubtarget &Subtarget) {
15808 SDValue Cond = N->getOperand(0);
15809 SDValue True = N->getOperand(1);
15810 SDValue False = N->getOperand(2);
15811 SDLoc DL(N);
15812 EVT VT = N->getValueType(0);
15813 EVT CondVT = Cond.getValueType();
15814
15815 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15816 return SDValue();
15817
15818 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15819 // BEXTI, where C is power of 2.
15820 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15821 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15822 SDValue LHS = Cond.getOperand(0);
15823 SDValue RHS = Cond.getOperand(1);
15824 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15825 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15826 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15827 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15828 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15829 return DAG.getSelect(DL, VT,
15830 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15831 False, True);
15832 }
15833 }
15834 return SDValue();
15835}
15836
15838 const RISCVSubtarget &Subtarget) {
15839 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15840 return Folded;
15841
15842 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15843 return V;
15844
15845 if (Subtarget.hasConditionalMoveFusion())
15846 return SDValue();
15847
15848 SDValue TrueVal = N->getOperand(1);
15849 SDValue FalseVal = N->getOperand(2);
15850 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15851 return V;
15852 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15853}
15854
15855/// If we have a build_vector where each lane is binop X, C, where C
15856/// is a constant (but not necessarily the same constant on all lanes),
15857/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15858/// We assume that materializing a constant build vector will be no more
15859/// expensive that performing O(n) binops.
15861 const RISCVSubtarget &Subtarget,
15862 const RISCVTargetLowering &TLI) {
15863 SDLoc DL(N);
15864 EVT VT = N->getValueType(0);
15865
15866 assert(!VT.isScalableVector() && "unexpected build vector");
15867
15868 if (VT.getVectorNumElements() == 1)
15869 return SDValue();
15870
15871 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15872 if (!TLI.isBinOp(Opcode))
15873 return SDValue();
15874
15875 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15876 return SDValue();
15877
15878 // This BUILD_VECTOR involves an implicit truncation, and sinking
15879 // truncates through binops is non-trivial.
15880 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15881 return SDValue();
15882
15883 SmallVector<SDValue> LHSOps;
15884 SmallVector<SDValue> RHSOps;
15885 for (SDValue Op : N->ops()) {
15886 if (Op.isUndef()) {
15887 // We can't form a divide or remainder from undef.
15888 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15889 return SDValue();
15890
15891 LHSOps.push_back(Op);
15892 RHSOps.push_back(Op);
15893 continue;
15894 }
15895
15896 // TODO: We can handle operations which have an neutral rhs value
15897 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15898 // of profit in a more explicit manner.
15899 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15900 return SDValue();
15901
15902 LHSOps.push_back(Op.getOperand(0));
15903 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15904 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15905 return SDValue();
15906 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15907 // have different LHS and RHS types.
15908 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15909 return SDValue();
15910
15911 RHSOps.push_back(Op.getOperand(1));
15912 }
15913
15914 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15915 DAG.getBuildVector(VT, DL, RHSOps));
15916}
15917
15919 const RISCVSubtarget &Subtarget,
15920 const RISCVTargetLowering &TLI) {
15921 SDValue InVec = N->getOperand(0);
15922 SDValue InVal = N->getOperand(1);
15923 SDValue EltNo = N->getOperand(2);
15924 SDLoc DL(N);
15925
15926 EVT VT = InVec.getValueType();
15927 if (VT.isScalableVector())
15928 return SDValue();
15929
15930 if (!InVec.hasOneUse())
15931 return SDValue();
15932
15933 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15934 // move the insert_vector_elts into the arms of the binop. Note that
15935 // the new RHS must be a constant.
15936 const unsigned InVecOpcode = InVec->getOpcode();
15937 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15938 InVal.hasOneUse()) {
15939 SDValue InVecLHS = InVec->getOperand(0);
15940 SDValue InVecRHS = InVec->getOperand(1);
15941 SDValue InValLHS = InVal->getOperand(0);
15942 SDValue InValRHS = InVal->getOperand(1);
15943
15945 return SDValue();
15946 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15947 return SDValue();
15948 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15949 // have different LHS and RHS types.
15950 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15951 return SDValue();
15953 InVecLHS, InValLHS, EltNo);
15955 InVecRHS, InValRHS, EltNo);
15956 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15957 }
15958
15959 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15960 // move the insert_vector_elt to the source operand of the concat_vector.
15961 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15962 return SDValue();
15963
15964 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15965 if (!IndexC)
15966 return SDValue();
15967 unsigned Elt = IndexC->getZExtValue();
15968
15969 EVT ConcatVT = InVec.getOperand(0).getValueType();
15970 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15971 return SDValue();
15972 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15973 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15974
15975 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15976 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15977 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15978 ConcatOp, InVal, NewIdx);
15979
15980 SmallVector<SDValue> ConcatOps;
15981 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15982 ConcatOps[ConcatOpIdx] = ConcatOp;
15983 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15984}
15985
15986// If we're concatenating a series of vector loads like
15987// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15988// Then we can turn this into a strided load by widening the vector elements
15989// vlse32 p, stride=n
15991 const RISCVSubtarget &Subtarget,
15992 const RISCVTargetLowering &TLI) {
15993 SDLoc DL(N);
15994 EVT VT = N->getValueType(0);
15995
15996 // Only perform this combine on legal MVTs.
15997 if (!TLI.isTypeLegal(VT))
15998 return SDValue();
15999
16000 // TODO: Potentially extend this to scalable vectors
16001 if (VT.isScalableVector())
16002 return SDValue();
16003
16004 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16005 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16006 !SDValue(BaseLd, 0).hasOneUse())
16007 return SDValue();
16008
16009 EVT BaseLdVT = BaseLd->getValueType(0);
16010
16011 // Go through the loads and check that they're strided
16013 Lds.push_back(BaseLd);
16014 Align Align = BaseLd->getAlign();
16015 for (SDValue Op : N->ops().drop_front()) {
16016 auto *Ld = dyn_cast<LoadSDNode>(Op);
16017 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16018 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16019 Ld->getValueType(0) != BaseLdVT)
16020 return SDValue();
16021
16022 Lds.push_back(Ld);
16023
16024 // The common alignment is the most restrictive (smallest) of all the loads
16025 Align = std::min(Align, Ld->getAlign());
16026 }
16027
16028 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16029 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16030 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16031 // If the load ptrs can be decomposed into a common (Base + Index) with a
16032 // common constant stride, then return the constant stride.
16033 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16034 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16035 if (BIO1.equalBaseIndex(BIO2, DAG))
16036 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16037
16038 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16039 SDValue P1 = Ld1->getBasePtr();
16040 SDValue P2 = Ld2->getBasePtr();
16041 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16042 return {{P2.getOperand(1), false}};
16043 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16044 return {{P1.getOperand(1), true}};
16045
16046 return std::nullopt;
16047 };
16048
16049 // Get the distance between the first and second loads
16050 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16051 if (!BaseDiff)
16052 return SDValue();
16053
16054 // Check all the loads are the same distance apart
16055 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16056 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16057 return SDValue();
16058
16059 // TODO: At this point, we've successfully matched a generalized gather
16060 // load. Maybe we should emit that, and then move the specialized
16061 // matchers above and below into a DAG combine?
16062
16063 // Get the widened scalar type, e.g. v4i8 -> i64
16064 unsigned WideScalarBitWidth =
16065 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16066 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16067
16068 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16069 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16070 if (!TLI.isTypeLegal(WideVecVT))
16071 return SDValue();
16072
16073 // Check that the operation is legal
16074 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16075 return SDValue();
16076
16077 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16078 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
16079 ? std::get<SDValue>(StrideVariant)
16080 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
16081 Lds[0]->getOffset().getValueType());
16082 if (MustNegateStride)
16083 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16084
16085 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
16086 SDValue IntID =
16087 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16088 Subtarget.getXLenVT());
16089
16090 SDValue AllOneMask =
16091 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16092 DAG.getConstant(1, DL, MVT::i1));
16093
16094 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
16095 BaseLd->getBasePtr(), Stride, AllOneMask};
16096
16097 uint64_t MemSize;
16098 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16099 ConstStride && ConstStride->getSExtValue() >= 0)
16100 // total size = (elsize * n) + (stride - elsize) * (n-1)
16101 // = elsize + stride * (n-1)
16102 MemSize = WideScalarVT.getSizeInBits() +
16103 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16104 else
16105 // If Stride isn't constant, then we can't know how much it will load
16107
16109 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16110 Align);
16111
16112 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
16113 Ops, WideVecVT, MMO);
16114 for (SDValue Ld : N->ops())
16115 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16116
16117 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16118}
16119
16121 const RISCVSubtarget &Subtarget) {
16122
16123 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16124
16125 if (N->getValueType(0).isFixedLengthVector())
16126 return SDValue();
16127
16128 SDValue Addend = N->getOperand(0);
16129 SDValue MulOp = N->getOperand(1);
16130
16131 if (N->getOpcode() == RISCVISD::ADD_VL) {
16132 SDValue AddMergeOp = N->getOperand(2);
16133 if (!AddMergeOp.isUndef())
16134 return SDValue();
16135 }
16136
16137 auto IsVWMulOpc = [](unsigned Opc) {
16138 switch (Opc) {
16139 case RISCVISD::VWMUL_VL:
16142 return true;
16143 default:
16144 return false;
16145 }
16146 };
16147
16148 if (!IsVWMulOpc(MulOp.getOpcode()))
16149 std::swap(Addend, MulOp);
16150
16151 if (!IsVWMulOpc(MulOp.getOpcode()))
16152 return SDValue();
16153
16154 SDValue MulMergeOp = MulOp.getOperand(2);
16155
16156 if (!MulMergeOp.isUndef())
16157 return SDValue();
16158
16159 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
16160 const RISCVSubtarget &Subtarget) {
16161 if (N->getOpcode() == ISD::ADD) {
16162 SDLoc DL(N);
16163 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
16164 Subtarget);
16165 }
16166 return std::make_pair(N->getOperand(3), N->getOperand(4));
16167 }(N, DAG, Subtarget);
16168
16169 SDValue MulMask = MulOp.getOperand(3);
16170 SDValue MulVL = MulOp.getOperand(4);
16171
16172 if (AddMask != MulMask || AddVL != MulVL)
16173 return SDValue();
16174
16175 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
16176 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
16177 "Unexpected opcode after VWMACC_VL");
16178 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
16179 "Unexpected opcode after VWMACC_VL!");
16180 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
16181 "Unexpected opcode after VWMUL_VL!");
16182 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
16183 "Unexpected opcode after VWMUL_VL!");
16184
16185 SDLoc DL(N);
16186 EVT VT = N->getValueType(0);
16187 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
16188 AddVL};
16189 return DAG.getNode(Opc, DL, VT, Ops);
16190}
16191
16193 ISD::MemIndexType &IndexType,
16195 if (!DCI.isBeforeLegalize())
16196 return false;
16197
16198 SelectionDAG &DAG = DCI.DAG;
16199 const MVT XLenVT =
16200 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
16201
16202 const EVT IndexVT = Index.getValueType();
16203
16204 // RISC-V indexed loads only support the "unsigned unscaled" addressing
16205 // mode, so anything else must be manually legalized.
16206 if (!isIndexTypeSigned(IndexType))
16207 return false;
16208
16209 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
16210 // Any index legalization should first promote to XLenVT, so we don't lose
16211 // bits when scaling. This may create an illegal index type so we let
16212 // LLVM's legalization take care of the splitting.
16213 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
16215 IndexVT.changeVectorElementType(XLenVT), Index);
16216 }
16217 IndexType = ISD::UNSIGNED_SCALED;
16218 return true;
16219}
16220
16221/// Match the index vector of a scatter or gather node as the shuffle mask
16222/// which performs the rearrangement if possible. Will only match if
16223/// all lanes are touched, and thus replacing the scatter or gather with
16224/// a unit strided access and shuffle is legal.
16226 SmallVector<int> &ShuffleMask) {
16227 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16228 return false;
16230 return false;
16231
16232 const unsigned ElementSize = VT.getScalarStoreSize();
16233 const unsigned NumElems = VT.getVectorNumElements();
16234
16235 // Create the shuffle mask and check all bits active
16236 assert(ShuffleMask.empty());
16237 BitVector ActiveLanes(NumElems);
16238 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16239 // TODO: We've found an active bit of UB, and could be
16240 // more aggressive here if desired.
16241 if (Index->getOperand(i)->isUndef())
16242 return false;
16243 uint64_t C = Index->getConstantOperandVal(i);
16244 if (C % ElementSize != 0)
16245 return false;
16246 C = C / ElementSize;
16247 if (C >= NumElems)
16248 return false;
16249 ShuffleMask.push_back(C);
16250 ActiveLanes.set(C);
16251 }
16252 return ActiveLanes.all();
16253}
16254
16255/// Match the index of a gather or scatter operation as an operation
16256/// with twice the element width and half the number of elements. This is
16257/// generally profitable (if legal) because these operations are linear
16258/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16259/// come out ahead.
16261 Align BaseAlign, const RISCVSubtarget &ST) {
16262 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16263 return false;
16265 return false;
16266
16267 // Attempt a doubling. If we can use a element type 4x or 8x in
16268 // size, this will happen via multiply iterations of the transform.
16269 const unsigned NumElems = VT.getVectorNumElements();
16270 if (NumElems % 2 != 0)
16271 return false;
16272
16273 const unsigned ElementSize = VT.getScalarStoreSize();
16274 const unsigned WiderElementSize = ElementSize * 2;
16275 if (WiderElementSize > ST.getELen()/8)
16276 return false;
16277
16278 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16279 return false;
16280
16281 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16282 // TODO: We've found an active bit of UB, and could be
16283 // more aggressive here if desired.
16284 if (Index->getOperand(i)->isUndef())
16285 return false;
16286 // TODO: This offset check is too strict if we support fully
16287 // misaligned memory operations.
16288 uint64_t C = Index->getConstantOperandVal(i);
16289 if (i % 2 == 0) {
16290 if (C % WiderElementSize != 0)
16291 return false;
16292 continue;
16293 }
16294 uint64_t Last = Index->getConstantOperandVal(i-1);
16295 if (C != Last + ElementSize)
16296 return false;
16297 }
16298 return true;
16299}
16300
16301// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16302// This would be benefit for the cases where X and Y are both the same value
16303// type of low precision vectors. Since the truncate would be lowered into
16304// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16305// restriction, such pattern would be expanded into a series of "vsetvli"
16306// and "vnsrl" instructions later to reach this point.
16308 SDValue Mask = N->getOperand(1);
16309 SDValue VL = N->getOperand(2);
16310
16311 bool IsVLMAX = isAllOnesConstant(VL) ||
16312 (isa<RegisterSDNode>(VL) &&
16313 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16314 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16315 Mask.getOperand(0) != VL)
16316 return SDValue();
16317
16318 auto IsTruncNode = [&](SDValue V) {
16319 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16320 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16321 };
16322
16323 SDValue Op = N->getOperand(0);
16324
16325 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16326 // to distinguish such pattern.
16327 while (IsTruncNode(Op)) {
16328 if (!Op.hasOneUse())
16329 return SDValue();
16330 Op = Op.getOperand(0);
16331 }
16332
16333 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16334 return SDValue();
16335
16336 SDValue N0 = Op.getOperand(0);
16337 SDValue N1 = Op.getOperand(1);
16338 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16339 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16340 return SDValue();
16341
16342 SDValue N00 = N0.getOperand(0);
16343 SDValue N10 = N1.getOperand(0);
16344 if (!N00.getValueType().isVector() ||
16345 N00.getValueType() != N10.getValueType() ||
16346 N->getValueType(0) != N10.getValueType())
16347 return SDValue();
16348
16349 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16350 SDValue SMin =
16351 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16352 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16353 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16354}
16355
16356// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16357// maximum value for the truncated type.
16358// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16359// is the signed maximum value for the truncated type and C2 is the signed
16360// minimum value.
16362 const RISCVSubtarget &Subtarget) {
16363 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16364
16365 MVT VT = N->getSimpleValueType(0);
16366
16367 SDValue Mask = N->getOperand(1);
16368 SDValue VL = N->getOperand(2);
16369
16370 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16371 APInt &SplatVal) {
16372 if (V.getOpcode() != Opc &&
16373 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16374 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16375 return SDValue();
16376
16377 SDValue Op = V.getOperand(1);
16378
16379 // Peek through conversion between fixed and scalable vectors.
16380 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16381 isNullConstant(Op.getOperand(2)) &&
16382 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16383 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16384 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16385 isNullConstant(Op.getOperand(1).getOperand(1)))
16386 Op = Op.getOperand(1).getOperand(0);
16387
16388 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16389 return V.getOperand(0);
16390
16391 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16392 Op.getOperand(2) == VL) {
16393 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16394 SplatVal =
16395 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16396 return V.getOperand(0);
16397 }
16398 }
16399
16400 return SDValue();
16401 };
16402
16403 SDLoc DL(N);
16404
16405 auto DetectUSatPattern = [&](SDValue V) {
16406 APInt LoC, HiC;
16407
16408 // Simple case, V is a UMIN.
16409 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16410 if (HiC.isMask(VT.getScalarSizeInBits()))
16411 return UMinOp;
16412
16413 // If we have an SMAX that removes negative numbers first, then we can match
16414 // SMIN instead of UMIN.
16415 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16416 if (SDValue SMaxOp =
16417 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16418 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16419 return SMinOp;
16420
16421 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16422 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16423 // first.
16424 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16425 if (SDValue SMinOp =
16426 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16427 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16428 HiC.uge(LoC))
16429 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16430 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16431 Mask, VL);
16432
16433 return SDValue();
16434 };
16435
16436 auto DetectSSatPattern = [&](SDValue V) {
16437 unsigned NumDstBits = VT.getScalarSizeInBits();
16438 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16439 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16440 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16441
16442 APInt HiC, LoC;
16443 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16444 if (SDValue SMaxOp =
16445 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16446 if (HiC == SignedMax && LoC == SignedMin)
16447 return SMaxOp;
16448
16449 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16450 if (SDValue SMinOp =
16451 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16452 if (HiC == SignedMax && LoC == SignedMin)
16453 return SMinOp;
16454
16455 return SDValue();
16456 };
16457
16458 SDValue Src = N->getOperand(0);
16459
16460 // Look through multiple layers of truncates.
16461 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16462 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16463 Src.hasOneUse())
16464 Src = Src.getOperand(0);
16465
16466 SDValue Val;
16467 unsigned ClipOpc;
16468 if ((Val = DetectUSatPattern(Src)))
16469 ClipOpc = RISCVISD::VNCLIPU_VL;
16470 else if ((Val = DetectSSatPattern(Src)))
16471 ClipOpc = RISCVISD::VNCLIP_VL;
16472 else
16473 return SDValue();
16474
16475 MVT ValVT = Val.getSimpleValueType();
16476
16477 do {
16478 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16479 ValVT = ValVT.changeVectorElementType(ValEltVT);
16480 // Rounding mode here is arbitrary since we aren't shifting out any bits.
16481 Val = DAG.getNode(
16482 ClipOpc, DL, ValVT,
16483 {Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask,
16485 VL});
16486 } while (ValVT != VT);
16487
16488 return Val;
16489}
16490
16492 DAGCombinerInfo &DCI) const {
16493 SelectionDAG &DAG = DCI.DAG;
16494 const MVT XLenVT = Subtarget.getXLenVT();
16495 SDLoc DL(N);
16496
16497 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16498 // bits are demanded. N will be added to the Worklist if it was not deleted.
16499 // Caller should return SDValue(N, 0) if this returns true.
16500 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16501 SDValue Op = N->getOperand(OpNo);
16502 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16503 if (!SimplifyDemandedBits(Op, Mask, DCI))
16504 return false;
16505
16506 if (N->getOpcode() != ISD::DELETED_NODE)
16507 DCI.AddToWorklist(N);
16508 return true;
16509 };
16510
16511 switch (N->getOpcode()) {
16512 default:
16513 break;
16514 case RISCVISD::SplitF64: {
16515 SDValue Op0 = N->getOperand(0);
16516 // If the input to SplitF64 is just BuildPairF64 then the operation is
16517 // redundant. Instead, use BuildPairF64's operands directly.
16518 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16519 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16520
16521 if (Op0->isUndef()) {
16522 SDValue Lo = DAG.getUNDEF(MVT::i32);
16523 SDValue Hi = DAG.getUNDEF(MVT::i32);
16524 return DCI.CombineTo(N, Lo, Hi);
16525 }
16526
16527 // It's cheaper to materialise two 32-bit integers than to load a double
16528 // from the constant pool and transfer it to integer registers through the
16529 // stack.
16530 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16531 APInt V = C->getValueAPF().bitcastToAPInt();
16532 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16533 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16534 return DCI.CombineTo(N, Lo, Hi);
16535 }
16536
16537 // This is a target-specific version of a DAGCombine performed in
16538 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16539 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16540 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16541 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16542 !Op0.getNode()->hasOneUse())
16543 break;
16544 SDValue NewSplitF64 =
16545 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16546 Op0.getOperand(0));
16547 SDValue Lo = NewSplitF64.getValue(0);
16548 SDValue Hi = NewSplitF64.getValue(1);
16549 APInt SignBit = APInt::getSignMask(32);
16550 if (Op0.getOpcode() == ISD::FNEG) {
16551 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16552 DAG.getConstant(SignBit, DL, MVT::i32));
16553 return DCI.CombineTo(N, Lo, NewHi);
16554 }
16555 assert(Op0.getOpcode() == ISD::FABS);
16556 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16557 DAG.getConstant(~SignBit, DL, MVT::i32));
16558 return DCI.CombineTo(N, Lo, NewHi);
16559 }
16560 case RISCVISD::SLLW:
16561 case RISCVISD::SRAW:
16562 case RISCVISD::SRLW:
16563 case RISCVISD::RORW:
16564 case RISCVISD::ROLW: {
16565 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16566 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16567 SimplifyDemandedLowBitsHelper(1, 5))
16568 return SDValue(N, 0);
16569
16570 break;
16571 }
16572 case RISCVISD::CLZW:
16573 case RISCVISD::CTZW: {
16574 // Only the lower 32 bits of the first operand are read
16575 if (SimplifyDemandedLowBitsHelper(0, 32))
16576 return SDValue(N, 0);
16577 break;
16578 }
16580 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16581 // conversion is unnecessary and can be replaced with the
16582 // FMV_X_ANYEXTW_RV64 operand.
16583 SDValue Op0 = N->getOperand(0);
16585 return Op0.getOperand(0);
16586 break;
16587 }
16590 SDLoc DL(N);
16591 SDValue Op0 = N->getOperand(0);
16592 MVT VT = N->getSimpleValueType(0);
16593 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16594 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16595 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16596 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16597 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16598 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16599 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16600 assert(Op0.getOperand(0).getValueType() == VT &&
16601 "Unexpected value type!");
16602 return Op0.getOperand(0);
16603 }
16604
16605 // This is a target-specific version of a DAGCombine performed in
16606 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16607 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16608 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16609 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16610 !Op0.getNode()->hasOneUse())
16611 break;
16612 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16613 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16614 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16615 if (Op0.getOpcode() == ISD::FNEG)
16616 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16617 DAG.getConstant(SignBit, DL, VT));
16618
16619 assert(Op0.getOpcode() == ISD::FABS);
16620 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16621 DAG.getConstant(~SignBit, DL, VT));
16622 }
16623 case ISD::ABS: {
16624 EVT VT = N->getValueType(0);
16625 SDValue N0 = N->getOperand(0);
16626 // abs (sext) -> zext (abs)
16627 // abs (zext) -> zext (handled elsewhere)
16628 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16629 SDValue Src = N0.getOperand(0);
16630 SDLoc DL(N);
16631 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16632 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16633 }
16634 break;
16635 }
16636 case ISD::ADD: {
16637 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16638 return V;
16639 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16640 return V;
16641 return performADDCombine(N, DCI, Subtarget);
16642 }
16643 case ISD::SUB: {
16644 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16645 return V;
16646 return performSUBCombine(N, DAG, Subtarget);
16647 }
16648 case ISD::AND:
16649 return performANDCombine(N, DCI, Subtarget);
16650 case ISD::OR: {
16651 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16652 return V;
16653 return performORCombine(N, DCI, Subtarget);
16654 }
16655 case ISD::XOR:
16656 return performXORCombine(N, DAG, Subtarget);
16657 case ISD::MUL:
16658 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16659 return V;
16660 return performMULCombine(N, DAG, DCI, Subtarget);
16661 case ISD::SDIV:
16662 case ISD::UDIV:
16663 case ISD::SREM:
16664 case ISD::UREM:
16665 if (SDValue V = combineBinOpOfZExt(N, DAG))
16666 return V;
16667 break;
16668 case ISD::FADD:
16669 case ISD::UMAX:
16670 case ISD::UMIN:
16671 case ISD::SMAX:
16672 case ISD::SMIN:
16673 case ISD::FMAXNUM:
16674 case ISD::FMINNUM: {
16675 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16676 return V;
16677 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16678 return V;
16679 return SDValue();
16680 }
16681 case ISD::SETCC:
16682 return performSETCCCombine(N, DAG, Subtarget);
16684 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16685 case ISD::ZERO_EXTEND:
16686 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16687 // type legalization. This is safe because fp_to_uint produces poison if
16688 // it overflows.
16689 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16690 SDValue Src = N->getOperand(0);
16691 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16692 isTypeLegal(Src.getOperand(0).getValueType()))
16693 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16694 Src.getOperand(0));
16695 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16696 isTypeLegal(Src.getOperand(1).getValueType())) {
16697 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16698 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16699 Src.getOperand(0), Src.getOperand(1));
16700 DCI.CombineTo(N, Res);
16701 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16702 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16703 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16704 }
16705 }
16706 return SDValue();
16708 if (SDValue V = combineTruncOfSraSext(N, DAG))
16709 return V;
16710 return combineTruncToVnclip(N, DAG, Subtarget);
16711 case ISD::TRUNCATE:
16712 return performTRUNCATECombine(N, DAG, Subtarget);
16713 case ISD::SELECT:
16714 return performSELECTCombine(N, DAG, Subtarget);
16716 case RISCVISD::CZERO_NEZ: {
16717 SDValue Val = N->getOperand(0);
16718 SDValue Cond = N->getOperand(1);
16719
16720 unsigned Opc = N->getOpcode();
16721
16722 // czero_eqz x, x -> x
16723 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16724 return Val;
16725
16726 unsigned InvOpc =
16728
16729 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16730 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16731 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16732 SDValue NewCond = Cond.getOperand(0);
16733 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16734 if (DAG.MaskedValueIsZero(NewCond, Mask))
16735 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16736 }
16737 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16738 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16739 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16740 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16741 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16742 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16743 if (ISD::isIntEqualitySetCC(CCVal))
16744 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16745 N->getValueType(0), Val, Cond.getOperand(0));
16746 }
16747 return SDValue();
16748 }
16749 case RISCVISD::SELECT_CC: {
16750 // Transform
16751 SDValue LHS = N->getOperand(0);
16752 SDValue RHS = N->getOperand(1);
16753 SDValue CC = N->getOperand(2);
16754 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16755 SDValue TrueV = N->getOperand(3);
16756 SDValue FalseV = N->getOperand(4);
16757 SDLoc DL(N);
16758 EVT VT = N->getValueType(0);
16759
16760 // If the True and False values are the same, we don't need a select_cc.
16761 if (TrueV == FalseV)
16762 return TrueV;
16763
16764 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16765 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16766 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16767 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16768 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16769 if (CCVal == ISD::CondCode::SETGE)
16770 std::swap(TrueV, FalseV);
16771
16772 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16773 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16774 // Only handle simm12, if it is not in this range, it can be considered as
16775 // register.
16776 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16777 isInt<12>(TrueSImm - FalseSImm)) {
16778 SDValue SRA =
16779 DAG.getNode(ISD::SRA, DL, VT, LHS,
16780 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16781 SDValue AND =
16782 DAG.getNode(ISD::AND, DL, VT, SRA,
16783 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16784 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16785 }
16786
16787 if (CCVal == ISD::CondCode::SETGE)
16788 std::swap(TrueV, FalseV);
16789 }
16790
16791 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16792 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16793 {LHS, RHS, CC, TrueV, FalseV});
16794
16795 if (!Subtarget.hasConditionalMoveFusion()) {
16796 // (select c, -1, y) -> -c | y
16797 if (isAllOnesConstant(TrueV)) {
16798 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16799 SDValue Neg = DAG.getNegative(C, DL, VT);
16800 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16801 }
16802 // (select c, y, -1) -> -!c | y
16803 if (isAllOnesConstant(FalseV)) {
16804 SDValue C =
16805 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16806 SDValue Neg = DAG.getNegative(C, DL, VT);
16807 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16808 }
16809
16810 // (select c, 0, y) -> -!c & y
16811 if (isNullConstant(TrueV)) {
16812 SDValue C =
16813 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16814 SDValue Neg = DAG.getNegative(C, DL, VT);
16815 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16816 }
16817 // (select c, y, 0) -> -c & y
16818 if (isNullConstant(FalseV)) {
16819 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16820 SDValue Neg = DAG.getNegative(C, DL, VT);
16821 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16822 }
16823 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16824 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16825 if (((isOneConstant(FalseV) && LHS == TrueV &&
16826 CCVal == ISD::CondCode::SETNE) ||
16827 (isOneConstant(TrueV) && LHS == FalseV &&
16828 CCVal == ISD::CondCode::SETEQ)) &&
16830 // freeze it to be safe.
16831 LHS = DAG.getFreeze(LHS);
16833 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16834 }
16835 }
16836
16837 // If both true/false are an xor with 1, pull through the select.
16838 // This can occur after op legalization if both operands are setccs that
16839 // require an xor to invert.
16840 // FIXME: Generalize to other binary ops with identical operand?
16841 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16842 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16843 isOneConstant(TrueV.getOperand(1)) &&
16844 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16845 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16846 TrueV.getOperand(0), FalseV.getOperand(0));
16847 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16848 }
16849
16850 return SDValue();
16851 }
16852 case RISCVISD::BR_CC: {
16853 SDValue LHS = N->getOperand(1);
16854 SDValue RHS = N->getOperand(2);
16855 SDValue CC = N->getOperand(3);
16856 SDLoc DL(N);
16857
16858 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16859 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16860 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16861
16862 return SDValue();
16863 }
16864 case ISD::BITREVERSE:
16865 return performBITREVERSECombine(N, DAG, Subtarget);
16866 case ISD::FP_TO_SINT:
16867 case ISD::FP_TO_UINT:
16868 return performFP_TO_INTCombine(N, DCI, Subtarget);
16871 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16872 case ISD::FCOPYSIGN: {
16873 EVT VT = N->getValueType(0);
16874 if (!VT.isVector())
16875 break;
16876 // There is a form of VFSGNJ which injects the negated sign of its second
16877 // operand. Try and bubble any FNEG up after the extend/round to produce
16878 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16879 // TRUNC=1.
16880 SDValue In2 = N->getOperand(1);
16881 // Avoid cases where the extend/round has multiple uses, as duplicating
16882 // those is typically more expensive than removing a fneg.
16883 if (!In2.hasOneUse())
16884 break;
16885 if (In2.getOpcode() != ISD::FP_EXTEND &&
16886 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16887 break;
16888 In2 = In2.getOperand(0);
16889 if (In2.getOpcode() != ISD::FNEG)
16890 break;
16891 SDLoc DL(N);
16892 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16893 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16894 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16895 }
16896 case ISD::MGATHER: {
16897 const auto *MGN = cast<MaskedGatherSDNode>(N);
16898 const EVT VT = N->getValueType(0);
16899 SDValue Index = MGN->getIndex();
16900 SDValue ScaleOp = MGN->getScale();
16901 ISD::MemIndexType IndexType = MGN->getIndexType();
16902 assert(!MGN->isIndexScaled() &&
16903 "Scaled gather/scatter should not be formed");
16904
16905 SDLoc DL(N);
16906 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16907 return DAG.getMaskedGather(
16908 N->getVTList(), MGN->getMemoryVT(), DL,
16909 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16910 MGN->getBasePtr(), Index, ScaleOp},
16911 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16912
16913 if (narrowIndex(Index, IndexType, DAG))
16914 return DAG.getMaskedGather(
16915 N->getVTList(), MGN->getMemoryVT(), DL,
16916 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16917 MGN->getBasePtr(), Index, ScaleOp},
16918 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16919
16920 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16921 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16922 // The sequence will be XLenVT, not the type of Index. Tell
16923 // isSimpleVIDSequence this so we avoid overflow.
16924 if (std::optional<VIDSequence> SimpleVID =
16925 isSimpleVIDSequence(Index, Subtarget.getXLen());
16926 SimpleVID && SimpleVID->StepDenominator == 1) {
16927 const int64_t StepNumerator = SimpleVID->StepNumerator;
16928 const int64_t Addend = SimpleVID->Addend;
16929
16930 // Note: We don't need to check alignment here since (by assumption
16931 // from the existance of the gather), our offsets must be sufficiently
16932 // aligned.
16933
16934 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16935 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16936 assert(IndexType == ISD::UNSIGNED_SCALED);
16937 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16938 DAG.getConstant(Addend, DL, PtrVT));
16939
16940 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16941 SDValue IntID =
16942 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16943 XLenVT);
16944 SDValue Ops[] =
16945 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16946 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16948 Ops, VT, MGN->getMemOperand());
16949 }
16950 }
16951
16952 SmallVector<int> ShuffleMask;
16953 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16954 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16955 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16956 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16957 MGN->getMask(), DAG.getUNDEF(VT),
16958 MGN->getMemoryVT(), MGN->getMemOperand(),
16960 SDValue Shuffle =
16961 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16962 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16963 }
16964
16965 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16966 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16967 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16968 SmallVector<SDValue> NewIndices;
16969 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16970 NewIndices.push_back(Index.getOperand(i));
16971 EVT IndexVT = Index.getValueType()
16973 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16974
16975 unsigned ElementSize = VT.getScalarStoreSize();
16976 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16977 auto EltCnt = VT.getVectorElementCount();
16978 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16979 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16980 EltCnt.divideCoefficientBy(2));
16981 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16982 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16983 EltCnt.divideCoefficientBy(2));
16984 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16985
16986 SDValue Gather =
16987 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16988 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16989 Index, ScaleOp},
16990 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16991 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16992 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16993 }
16994 break;
16995 }
16996 case ISD::MSCATTER:{
16997 const auto *MSN = cast<MaskedScatterSDNode>(N);
16998 SDValue Index = MSN->getIndex();
16999 SDValue ScaleOp = MSN->getScale();
17000 ISD::MemIndexType IndexType = MSN->getIndexType();
17001 assert(!MSN->isIndexScaled() &&
17002 "Scaled gather/scatter should not be formed");
17003
17004 SDLoc DL(N);
17005 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17006 return DAG.getMaskedScatter(
17007 N->getVTList(), MSN->getMemoryVT(), DL,
17008 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17009 Index, ScaleOp},
17010 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17011
17012 if (narrowIndex(Index, IndexType, DAG))
17013 return DAG.getMaskedScatter(
17014 N->getVTList(), MSN->getMemoryVT(), DL,
17015 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17016 Index, ScaleOp},
17017 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17018
17019 EVT VT = MSN->getValue()->getValueType(0);
17020 SmallVector<int> ShuffleMask;
17021 if (!MSN->isTruncatingStore() &&
17022 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17023 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17024 DAG.getUNDEF(VT), ShuffleMask);
17025 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17026 DAG.getUNDEF(XLenVT), MSN->getMask(),
17027 MSN->getMemoryVT(), MSN->getMemOperand(),
17028 ISD::UNINDEXED, false);
17029 }
17030 break;
17031 }
17032 case ISD::VP_GATHER: {
17033 const auto *VPGN = cast<VPGatherSDNode>(N);
17034 SDValue Index = VPGN->getIndex();
17035 SDValue ScaleOp = VPGN->getScale();
17036 ISD::MemIndexType IndexType = VPGN->getIndexType();
17037 assert(!VPGN->isIndexScaled() &&
17038 "Scaled gather/scatter should not be formed");
17039
17040 SDLoc DL(N);
17041 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17042 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17043 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17044 ScaleOp, VPGN->getMask(),
17045 VPGN->getVectorLength()},
17046 VPGN->getMemOperand(), IndexType);
17047
17048 if (narrowIndex(Index, IndexType, DAG))
17049 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17050 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17051 ScaleOp, VPGN->getMask(),
17052 VPGN->getVectorLength()},
17053 VPGN->getMemOperand(), IndexType);
17054
17055 break;
17056 }
17057 case ISD::VP_SCATTER: {
17058 const auto *VPSN = cast<VPScatterSDNode>(N);
17059 SDValue Index = VPSN->getIndex();
17060 SDValue ScaleOp = VPSN->getScale();
17061 ISD::MemIndexType IndexType = VPSN->getIndexType();
17062 assert(!VPSN->isIndexScaled() &&
17063 "Scaled gather/scatter should not be formed");
17064
17065 SDLoc DL(N);
17066 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17067 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17068 {VPSN->getChain(), VPSN->getValue(),
17069 VPSN->getBasePtr(), Index, ScaleOp,
17070 VPSN->getMask(), VPSN->getVectorLength()},
17071 VPSN->getMemOperand(), IndexType);
17072
17073 if (narrowIndex(Index, IndexType, DAG))
17074 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17075 {VPSN->getChain(), VPSN->getValue(),
17076 VPSN->getBasePtr(), Index, ScaleOp,
17077 VPSN->getMask(), VPSN->getVectorLength()},
17078 VPSN->getMemOperand(), IndexType);
17079 break;
17080 }
17081 case RISCVISD::SHL_VL:
17082 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17083 return V;
17084 [[fallthrough]];
17085 case RISCVISD::SRA_VL:
17086 case RISCVISD::SRL_VL: {
17087 SDValue ShAmt = N->getOperand(1);
17089 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17090 SDLoc DL(N);
17091 SDValue VL = N->getOperand(4);
17092 EVT VT = N->getValueType(0);
17093 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17094 ShAmt.getOperand(1), VL);
17095 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
17096 N->getOperand(2), N->getOperand(3), N->getOperand(4));
17097 }
17098 break;
17099 }
17100 case ISD::SRA:
17101 if (SDValue V = performSRACombine(N, DAG, Subtarget))
17102 return V;
17103 [[fallthrough]];
17104 case ISD::SRL:
17105 case ISD::SHL: {
17106 if (N->getOpcode() == ISD::SHL) {
17107 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17108 return V;
17109 }
17110 SDValue ShAmt = N->getOperand(1);
17112 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17113 SDLoc DL(N);
17114 EVT VT = N->getValueType(0);
17115 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17116 ShAmt.getOperand(1),
17117 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
17118 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
17119 }
17120 break;
17121 }
17122 case RISCVISD::ADD_VL:
17123 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
17124 return V;
17125 return combineToVWMACC(N, DAG, Subtarget);
17130 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
17131 case RISCVISD::SUB_VL:
17132 case RISCVISD::MUL_VL:
17133 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
17142 return performVFMADD_VLCombine(N, DAG, Subtarget);
17143 case RISCVISD::FADD_VL:
17144 case RISCVISD::FSUB_VL:
17145 case RISCVISD::FMUL_VL:
17147 case RISCVISD::VFWSUB_W_VL: {
17148 if (N->getValueType(0).isScalableVector() &&
17149 N->getValueType(0).getVectorElementType() == MVT::f32 &&
17150 (Subtarget.hasVInstructionsF16Minimal() &&
17151 !Subtarget.hasVInstructionsF16()))
17152 return SDValue();
17153 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
17154 }
17155 case ISD::LOAD:
17156 case ISD::STORE: {
17157 if (DCI.isAfterLegalizeDAG())
17158 if (SDValue V = performMemPairCombine(N, DCI))
17159 return V;
17160
17161 if (N->getOpcode() != ISD::STORE)
17162 break;
17163
17164 auto *Store = cast<StoreSDNode>(N);
17165 SDValue Chain = Store->getChain();
17166 EVT MemVT = Store->getMemoryVT();
17167 SDValue Val = Store->getValue();
17168 SDLoc DL(N);
17169
17170 bool IsScalarizable =
17171 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
17172 Store->isSimple() &&
17173 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
17174 isPowerOf2_64(MemVT.getSizeInBits()) &&
17175 MemVT.getSizeInBits() <= Subtarget.getXLen();
17176
17177 // If sufficiently aligned we can scalarize stores of constant vectors of
17178 // any power-of-two size up to XLen bits, provided that they aren't too
17179 // expensive to materialize.
17180 // vsetivli zero, 2, e8, m1, ta, ma
17181 // vmv.v.i v8, 4
17182 // vse64.v v8, (a0)
17183 // ->
17184 // li a1, 1028
17185 // sh a1, 0(a0)
17186 if (DCI.isBeforeLegalize() && IsScalarizable &&
17188 // Get the constant vector bits
17189 APInt NewC(Val.getValueSizeInBits(), 0);
17190 uint64_t EltSize = Val.getScalarValueSizeInBits();
17191 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
17192 if (Val.getOperand(i).isUndef())
17193 continue;
17194 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
17195 i * EltSize);
17196 }
17197 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17198
17199 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
17200 true) <= 2 &&
17202 NewVT, *Store->getMemOperand())) {
17203 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
17204 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
17205 Store->getPointerInfo(), Store->getOriginalAlign(),
17206 Store->getMemOperand()->getFlags());
17207 }
17208 }
17209
17210 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
17211 // vsetivli zero, 2, e16, m1, ta, ma
17212 // vle16.v v8, (a0)
17213 // vse16.v v8, (a1)
17214 if (auto *L = dyn_cast<LoadSDNode>(Val);
17215 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
17216 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
17217 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
17218 L->getMemoryVT() == MemVT) {
17219 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17221 NewVT, *Store->getMemOperand()) &&
17223 NewVT, *L->getMemOperand())) {
17224 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17225 L->getPointerInfo(), L->getOriginalAlign(),
17226 L->getMemOperand()->getFlags());
17227 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17228 Store->getPointerInfo(), Store->getOriginalAlign(),
17229 Store->getMemOperand()->getFlags());
17230 }
17231 }
17232
17233 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17234 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17235 // any illegal types.
17236 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17237 (DCI.isAfterLegalizeDAG() &&
17239 isNullConstant(Val.getOperand(1)))) {
17240 SDValue Src = Val.getOperand(0);
17241 MVT VecVT = Src.getSimpleValueType();
17242 // VecVT should be scalable and memory VT should match the element type.
17243 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17244 MemVT == VecVT.getVectorElementType()) {
17245 SDLoc DL(N);
17246 MVT MaskVT = getMaskTypeFor(VecVT);
17247 return DAG.getStoreVP(
17248 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17249 DAG.getConstant(1, DL, MaskVT),
17250 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17251 Store->getMemOperand(), Store->getAddressingMode(),
17252 Store->isTruncatingStore(), /*IsCompress*/ false);
17253 }
17254 }
17255
17256 break;
17257 }
17258 case ISD::SPLAT_VECTOR: {
17259 EVT VT = N->getValueType(0);
17260 // Only perform this combine on legal MVT types.
17261 if (!isTypeLegal(VT))
17262 break;
17263 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17264 DAG, Subtarget))
17265 return Gather;
17266 break;
17267 }
17268 case ISD::BUILD_VECTOR:
17269 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17270 return V;
17271 break;
17273 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17274 return V;
17275 break;
17277 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17278 return V;
17279 break;
17280 case RISCVISD::VFMV_V_F_VL: {
17281 const MVT VT = N->getSimpleValueType(0);
17282 SDValue Passthru = N->getOperand(0);
17283 SDValue Scalar = N->getOperand(1);
17284 SDValue VL = N->getOperand(2);
17285
17286 // If VL is 1, we can use vfmv.s.f.
17287 if (isOneConstant(VL))
17288 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17289 break;
17290 }
17291 case RISCVISD::VMV_V_X_VL: {
17292 const MVT VT = N->getSimpleValueType(0);
17293 SDValue Passthru = N->getOperand(0);
17294 SDValue Scalar = N->getOperand(1);
17295 SDValue VL = N->getOperand(2);
17296
17297 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17298 // scalar input.
17299 unsigned ScalarSize = Scalar.getValueSizeInBits();
17300 unsigned EltWidth = VT.getScalarSizeInBits();
17301 if (ScalarSize > EltWidth && Passthru.isUndef())
17302 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17303 return SDValue(N, 0);
17304
17305 // If VL is 1 and the scalar value won't benefit from immediate, we can
17306 // use vmv.s.x.
17307 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17308 if (isOneConstant(VL) &&
17309 (!Const || Const->isZero() ||
17310 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17311 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17312
17313 break;
17314 }
17315 case RISCVISD::VFMV_S_F_VL: {
17316 SDValue Src = N->getOperand(1);
17317 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17318 // into an undef vector.
17319 // TODO: Could use a vslide or vmv.v.v for non-undef.
17320 if (N->getOperand(0).isUndef() &&
17321 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17322 isNullConstant(Src.getOperand(1)) &&
17323 Src.getOperand(0).getValueType().isScalableVector()) {
17324 EVT VT = N->getValueType(0);
17325 EVT SrcVT = Src.getOperand(0).getValueType();
17327 // Widths match, just return the original vector.
17328 if (SrcVT == VT)
17329 return Src.getOperand(0);
17330 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17331 }
17332 [[fallthrough]];
17333 }
17334 case RISCVISD::VMV_S_X_VL: {
17335 const MVT VT = N->getSimpleValueType(0);
17336 SDValue Passthru = N->getOperand(0);
17337 SDValue Scalar = N->getOperand(1);
17338 SDValue VL = N->getOperand(2);
17339
17340 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17341 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17342 return Scalar.getOperand(0);
17343
17344 // Use M1 or smaller to avoid over constraining register allocation
17345 const MVT M1VT = getLMUL1VT(VT);
17346 if (M1VT.bitsLT(VT)) {
17347 SDValue M1Passthru =
17348 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17349 DAG.getVectorIdxConstant(0, DL));
17350 SDValue Result =
17351 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17352 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17353 DAG.getVectorIdxConstant(0, DL));
17354 return Result;
17355 }
17356
17357 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17358 // higher would involve overly constraining the register allocator for
17359 // no purpose.
17360 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17361 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17362 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17363 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17364
17365 break;
17366 }
17367 case RISCVISD::VMV_X_S: {
17368 SDValue Vec = N->getOperand(0);
17369 MVT VecVT = N->getOperand(0).getSimpleValueType();
17370 const MVT M1VT = getLMUL1VT(VecVT);
17371 if (M1VT.bitsLT(VecVT)) {
17372 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17373 DAG.getVectorIdxConstant(0, DL));
17374 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17375 }
17376 break;
17377 }
17381 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17382 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17383 switch (IntNo) {
17384 // By default we do not combine any intrinsic.
17385 default:
17386 return SDValue();
17387 case Intrinsic::riscv_masked_strided_load: {
17388 MVT VT = N->getSimpleValueType(0);
17389 auto *Load = cast<MemIntrinsicSDNode>(N);
17390 SDValue PassThru = N->getOperand(2);
17391 SDValue Base = N->getOperand(3);
17392 SDValue Stride = N->getOperand(4);
17393 SDValue Mask = N->getOperand(5);
17394
17395 // If the stride is equal to the element size in bytes, we can use
17396 // a masked.load.
17397 const unsigned ElementSize = VT.getScalarStoreSize();
17398 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17399 StrideC && StrideC->getZExtValue() == ElementSize)
17400 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
17401 DAG.getUNDEF(XLenVT), Mask, PassThru,
17402 Load->getMemoryVT(), Load->getMemOperand(),
17404 return SDValue();
17405 }
17406 case Intrinsic::riscv_masked_strided_store: {
17407 auto *Store = cast<MemIntrinsicSDNode>(N);
17408 SDValue Value = N->getOperand(2);
17409 SDValue Base = N->getOperand(3);
17410 SDValue Stride = N->getOperand(4);
17411 SDValue Mask = N->getOperand(5);
17412
17413 // If the stride is equal to the element size in bytes, we can use
17414 // a masked.store.
17415 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
17416 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17417 StrideC && StrideC->getZExtValue() == ElementSize)
17418 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
17419 DAG.getUNDEF(XLenVT), Mask,
17420 Value.getValueType(), Store->getMemOperand(),
17421 ISD::UNINDEXED, false);
17422 return SDValue();
17423 }
17424 case Intrinsic::riscv_vcpop:
17425 case Intrinsic::riscv_vcpop_mask:
17426 case Intrinsic::riscv_vfirst:
17427 case Intrinsic::riscv_vfirst_mask: {
17428 SDValue VL = N->getOperand(2);
17429 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17430 IntNo == Intrinsic::riscv_vfirst_mask)
17431 VL = N->getOperand(3);
17432 if (!isNullConstant(VL))
17433 return SDValue();
17434 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17435 SDLoc DL(N);
17436 EVT VT = N->getValueType(0);
17437 if (IntNo == Intrinsic::riscv_vfirst ||
17438 IntNo == Intrinsic::riscv_vfirst_mask)
17439 return DAG.getConstant(-1, DL, VT);
17440 return DAG.getConstant(0, DL, VT);
17441 }
17442 }
17443 }
17444 case ISD::BITCAST: {
17446 SDValue N0 = N->getOperand(0);
17447 EVT VT = N->getValueType(0);
17448 EVT SrcVT = N0.getValueType();
17449 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17450 // type, widen both sides to avoid a trip through memory.
17451 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17452 VT.isScalarInteger()) {
17453 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17454 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17455 Ops[0] = N0;
17456 SDLoc DL(N);
17457 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17458 N0 = DAG.getBitcast(MVT::i8, N0);
17459 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17460 }
17461
17462 return SDValue();
17463 }
17464 }
17465
17466 return SDValue();
17467}
17468
17470 EVT XVT, unsigned KeptBits) const {
17471 // For vectors, we don't have a preference..
17472 if (XVT.isVector())
17473 return false;
17474
17475 if (XVT != MVT::i32 && XVT != MVT::i64)
17476 return false;
17477
17478 // We can use sext.w for RV64 or an srai 31 on RV32.
17479 if (KeptBits == 32 || KeptBits == 64)
17480 return true;
17481
17482 // With Zbb we can use sext.h/sext.b.
17483 return Subtarget.hasStdExtZbb() &&
17484 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17485 KeptBits == 16);
17486}
17487
17489 const SDNode *N, CombineLevel Level) const {
17490 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17491 N->getOpcode() == ISD::SRL) &&
17492 "Expected shift op");
17493
17494 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17495 // materialised in fewer instructions than `(OP _, c1)`:
17496 //
17497 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17498 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17499 SDValue N0 = N->getOperand(0);
17500 EVT Ty = N0.getValueType();
17501 if (Ty.isScalarInteger() &&
17502 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17503 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17504 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17505 if (C1 && C2) {
17506 const APInt &C1Int = C1->getAPIntValue();
17507 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17508
17509 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17510 // and the combine should happen, to potentially allow further combines
17511 // later.
17512 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17513 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17514 return true;
17515
17516 // We can materialise `c1` in an add immediate, so it's "free", and the
17517 // combine should be prevented.
17518 if (C1Int.getSignificantBits() <= 64 &&
17520 return false;
17521
17522 // Neither constant will fit into an immediate, so find materialisation
17523 // costs.
17524 int C1Cost =
17525 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17526 /*CompressionCost*/ true);
17527 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17528 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17529 /*CompressionCost*/ true);
17530
17531 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17532 // combine should be prevented.
17533 if (C1Cost < ShiftedC1Cost)
17534 return false;
17535 }
17536 }
17537 return true;
17538}
17539
17541 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17542 TargetLoweringOpt &TLO) const {
17543 // Delay this optimization as late as possible.
17544 if (!TLO.LegalOps)
17545 return false;
17546
17547 EVT VT = Op.getValueType();
17548 if (VT.isVector())
17549 return false;
17550
17551 unsigned Opcode = Op.getOpcode();
17552 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17553 return false;
17554
17555 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17556 if (!C)
17557 return false;
17558
17559 const APInt &Mask = C->getAPIntValue();
17560
17561 // Clear all non-demanded bits initially.
17562 APInt ShrunkMask = Mask & DemandedBits;
17563
17564 // Try to make a smaller immediate by setting undemanded bits.
17565
17566 APInt ExpandedMask = Mask | ~DemandedBits;
17567
17568 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17569 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17570 };
17571 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17572 if (NewMask == Mask)
17573 return true;
17574 SDLoc DL(Op);
17575 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17576 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17577 Op.getOperand(0), NewC);
17578 return TLO.CombineTo(Op, NewOp);
17579 };
17580
17581 // If the shrunk mask fits in sign extended 12 bits, let the target
17582 // independent code apply it.
17583 if (ShrunkMask.isSignedIntN(12))
17584 return false;
17585
17586 // And has a few special cases for zext.
17587 if (Opcode == ISD::AND) {
17588 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17589 // otherwise use SLLI + SRLI.
17590 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17591 if (IsLegalMask(NewMask))
17592 return UseMask(NewMask);
17593
17594 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17595 if (VT == MVT::i64) {
17596 APInt NewMask = APInt(64, 0xffffffff);
17597 if (IsLegalMask(NewMask))
17598 return UseMask(NewMask);
17599 }
17600 }
17601
17602 // For the remaining optimizations, we need to be able to make a negative
17603 // number through a combination of mask and undemanded bits.
17604 if (!ExpandedMask.isNegative())
17605 return false;
17606
17607 // What is the fewest number of bits we need to represent the negative number.
17608 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17609
17610 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17611 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17612 // If we can't create a simm12, we shouldn't change opaque constants.
17613 APInt NewMask = ShrunkMask;
17614 if (MinSignedBits <= 12)
17615 NewMask.setBitsFrom(11);
17616 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17617 NewMask.setBitsFrom(31);
17618 else
17619 return false;
17620
17621 // Check that our new mask is a subset of the demanded mask.
17622 assert(IsLegalMask(NewMask));
17623 return UseMask(NewMask);
17624}
17625
17626static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17627 static const uint64_t GREVMasks[] = {
17628 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17629 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17630
17631 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17632 unsigned Shift = 1 << Stage;
17633 if (ShAmt & Shift) {
17634 uint64_t Mask = GREVMasks[Stage];
17635 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17636 if (IsGORC)
17637 Res |= x;
17638 x = Res;
17639 }
17640 }
17641
17642 return x;
17643}
17644
17646 KnownBits &Known,
17647 const APInt &DemandedElts,
17648 const SelectionDAG &DAG,
17649 unsigned Depth) const {
17650 unsigned BitWidth = Known.getBitWidth();
17651 unsigned Opc = Op.getOpcode();
17652 assert((Opc >= ISD::BUILTIN_OP_END ||
17653 Opc == ISD::INTRINSIC_WO_CHAIN ||
17654 Opc == ISD::INTRINSIC_W_CHAIN ||
17655 Opc == ISD::INTRINSIC_VOID) &&
17656 "Should use MaskedValueIsZero if you don't know whether Op"
17657 " is a target node!");
17658
17659 Known.resetAll();
17660 switch (Opc) {
17661 default: break;
17662 case RISCVISD::SELECT_CC: {
17663 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17664 // If we don't know any bits, early out.
17665 if (Known.isUnknown())
17666 break;
17667 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17668
17669 // Only known if known in both the LHS and RHS.
17670 Known = Known.intersectWith(Known2);
17671 break;
17672 }
17675 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17676 // Result is either all zero or operand 0. We can propagate zeros, but not
17677 // ones.
17678 Known.One.clearAllBits();
17679 break;
17680 case RISCVISD::REMUW: {
17681 KnownBits Known2;
17682 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17683 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17684 // We only care about the lower 32 bits.
17685 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17686 // Restore the original width by sign extending.
17687 Known = Known.sext(BitWidth);
17688 break;
17689 }
17690 case RISCVISD::DIVUW: {
17691 KnownBits Known2;
17692 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17693 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17694 // We only care about the lower 32 bits.
17695 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17696 // Restore the original width by sign extending.
17697 Known = Known.sext(BitWidth);
17698 break;
17699 }
17700 case RISCVISD::SLLW: {
17701 KnownBits Known2;
17702 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17703 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17704 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17705 // Restore the original width by sign extending.
17706 Known = Known.sext(BitWidth);
17707 break;
17708 }
17709 case RISCVISD::CTZW: {
17710 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17711 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17712 unsigned LowBits = llvm::bit_width(PossibleTZ);
17713 Known.Zero.setBitsFrom(LowBits);
17714 break;
17715 }
17716 case RISCVISD::CLZW: {
17717 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17718 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17719 unsigned LowBits = llvm::bit_width(PossibleLZ);
17720 Known.Zero.setBitsFrom(LowBits);
17721 break;
17722 }
17723 case RISCVISD::BREV8:
17724 case RISCVISD::ORC_B: {
17725 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17726 // control value of 7 is equivalent to brev8 and orc.b.
17727 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17728 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17729 // To compute zeros, we need to invert the value and invert it back after.
17730 Known.Zero =
17731 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17732 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17733 break;
17734 }
17735 case RISCVISD::READ_VLENB: {
17736 // We can use the minimum and maximum VLEN values to bound VLENB. We
17737 // know VLEN must be a power of two.
17738 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17739 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17740 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17741 Known.Zero.setLowBits(Log2_32(MinVLenB));
17742 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17743 if (MaxVLenB == MinVLenB)
17744 Known.One.setBit(Log2_32(MinVLenB));
17745 break;
17746 }
17747 case RISCVISD::FCLASS: {
17748 // fclass will only set one of the low 10 bits.
17749 Known.Zero.setBitsFrom(10);
17750 break;
17751 }
17754 unsigned IntNo =
17755 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17756 switch (IntNo) {
17757 default:
17758 // We can't do anything for most intrinsics.
17759 break;
17760 case Intrinsic::riscv_vsetvli:
17761 case Intrinsic::riscv_vsetvlimax: {
17762 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17763 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17764 RISCVII::VLMUL VLMUL =
17765 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17766 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17767 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17768 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17769 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17770
17771 // Result of vsetvli must be not larger than AVL.
17772 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17773 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17774
17775 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17776 if (BitWidth > KnownZeroFirstBit)
17777 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17778 break;
17779 }
17780 }
17781 break;
17782 }
17783 }
17784}
17785
17787 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17788 unsigned Depth) const {
17789 switch (Op.getOpcode()) {
17790 default:
17791 break;
17792 case RISCVISD::SELECT_CC: {
17793 unsigned Tmp =
17794 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17795 if (Tmp == 1) return 1; // Early out.
17796 unsigned Tmp2 =
17797 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17798 return std::min(Tmp, Tmp2);
17799 }
17802 // Output is either all zero or operand 0. We can propagate sign bit count
17803 // from operand 0.
17804 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17805 case RISCVISD::ABSW: {
17806 // We expand this at isel to negw+max. The result will have 33 sign bits
17807 // if the input has at least 33 sign bits.
17808 unsigned Tmp =
17809 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17810 if (Tmp < 33) return 1;
17811 return 33;
17812 }
17813 case RISCVISD::SLLW:
17814 case RISCVISD::SRAW:
17815 case RISCVISD::SRLW:
17816 case RISCVISD::DIVW:
17817 case RISCVISD::DIVUW:
17818 case RISCVISD::REMUW:
17819 case RISCVISD::ROLW:
17820 case RISCVISD::RORW:
17825 // TODO: As the result is sign-extended, this is conservatively correct. A
17826 // more precise answer could be calculated for SRAW depending on known
17827 // bits in the shift amount.
17828 return 33;
17829 case RISCVISD::VMV_X_S: {
17830 // The number of sign bits of the scalar result is computed by obtaining the
17831 // element type of the input vector operand, subtracting its width from the
17832 // XLEN, and then adding one (sign bit within the element type). If the
17833 // element type is wider than XLen, the least-significant XLEN bits are
17834 // taken.
17835 unsigned XLen = Subtarget.getXLen();
17836 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17837 if (EltBits <= XLen)
17838 return XLen - EltBits + 1;
17839 break;
17840 }
17842 unsigned IntNo = Op.getConstantOperandVal(1);
17843 switch (IntNo) {
17844 default:
17845 break;
17846 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17847 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17848 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17849 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17850 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17851 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17852 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17853 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17854 case Intrinsic::riscv_masked_cmpxchg_i64:
17855 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17856 // narrow atomic operation. These are implemented using atomic
17857 // operations at the minimum supported atomicrmw/cmpxchg width whose
17858 // result is then sign extended to XLEN. With +A, the minimum width is
17859 // 32 for both 64 and 32.
17860 assert(Subtarget.getXLen() == 64);
17862 assert(Subtarget.hasStdExtA());
17863 return 33;
17864 }
17865 break;
17866 }
17867 }
17868
17869 return 1;
17870}
17871
17873 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17874 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17875
17876 // TODO: Add more target nodes.
17877 switch (Op.getOpcode()) {
17879 // Integer select_cc cannot create poison.
17880 // TODO: What are the FP poison semantics?
17881 // TODO: This instruction blocks poison from the unselected operand, can
17882 // we do anything with that?
17883 return !Op.getValueType().isInteger();
17884 }
17886 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17887}
17888
17889const Constant *
17891 assert(Ld && "Unexpected null LoadSDNode");
17892 if (!ISD::isNormalLoad(Ld))
17893 return nullptr;
17894
17895 SDValue Ptr = Ld->getBasePtr();
17896
17897 // Only constant pools with no offset are supported.
17898 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17899 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17900 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17901 CNode->getOffset() != 0)
17902 return nullptr;
17903
17904 return CNode;
17905 };
17906
17907 // Simple case, LLA.
17908 if (Ptr.getOpcode() == RISCVISD::LLA) {
17909 auto *CNode = GetSupportedConstantPool(Ptr);
17910 if (!CNode || CNode->getTargetFlags() != 0)
17911 return nullptr;
17912
17913 return CNode->getConstVal();
17914 }
17915
17916 // Look for a HI and ADD_LO pair.
17917 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17918 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17919 return nullptr;
17920
17921 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17922 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17923
17924 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17925 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17926 return nullptr;
17927
17928 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17929 return nullptr;
17930
17931 return CNodeLo->getConstVal();
17932}
17933
17935 MachineBasicBlock *BB) {
17936 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17937
17938 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17939 // Should the count have wrapped while it was being read, we need to try
17940 // again.
17941 // For example:
17942 // ```
17943 // read:
17944 // csrrs x3, counterh # load high word of counter
17945 // csrrs x2, counter # load low word of counter
17946 // csrrs x4, counterh # load high word of counter
17947 // bne x3, x4, read # check if high word reads match, otherwise try again
17948 // ```
17949
17950 MachineFunction &MF = *BB->getParent();
17951 const BasicBlock *LLVMBB = BB->getBasicBlock();
17953
17954 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17955 MF.insert(It, LoopMBB);
17956
17957 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17958 MF.insert(It, DoneMBB);
17959
17960 // Transfer the remainder of BB and its successor edges to DoneMBB.
17961 DoneMBB->splice(DoneMBB->begin(), BB,
17962 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17964
17965 BB->addSuccessor(LoopMBB);
17966
17968 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17969 Register LoReg = MI.getOperand(0).getReg();
17970 Register HiReg = MI.getOperand(1).getReg();
17971 int64_t LoCounter = MI.getOperand(2).getImm();
17972 int64_t HiCounter = MI.getOperand(3).getImm();
17973 DebugLoc DL = MI.getDebugLoc();
17974
17976 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17977 .addImm(HiCounter)
17978 .addReg(RISCV::X0);
17979 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17980 .addImm(LoCounter)
17981 .addReg(RISCV::X0);
17982 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17983 .addImm(HiCounter)
17984 .addReg(RISCV::X0);
17985
17986 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17987 .addReg(HiReg)
17988 .addReg(ReadAgainReg)
17989 .addMBB(LoopMBB);
17990
17991 LoopMBB->addSuccessor(LoopMBB);
17992 LoopMBB->addSuccessor(DoneMBB);
17993
17994 MI.eraseFromParent();
17995
17996 return DoneMBB;
17997}
17998
18001 const RISCVSubtarget &Subtarget) {
18002 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18003
18004 MachineFunction &MF = *BB->getParent();
18005 DebugLoc DL = MI.getDebugLoc();
18008 Register LoReg = MI.getOperand(0).getReg();
18009 Register HiReg = MI.getOperand(1).getReg();
18010 Register SrcReg = MI.getOperand(2).getReg();
18011
18012 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18013 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18014
18015 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18016 RI, Register());
18018 MachineMemOperand *MMOLo =
18022 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18023 .addFrameIndex(FI)
18024 .addImm(0)
18025 .addMemOperand(MMOLo);
18026 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18027 .addFrameIndex(FI)
18028 .addImm(4)
18029 .addMemOperand(MMOHi);
18030 MI.eraseFromParent(); // The pseudo instruction is gone now.
18031 return BB;
18032}
18033
18036 const RISCVSubtarget &Subtarget) {
18037 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18038 "Unexpected instruction");
18039
18040 MachineFunction &MF = *BB->getParent();
18041 DebugLoc DL = MI.getDebugLoc();
18044 Register DstReg = MI.getOperand(0).getReg();
18045 Register LoReg = MI.getOperand(1).getReg();
18046 Register HiReg = MI.getOperand(2).getReg();
18047
18048 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18049 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18050
18052 MachineMemOperand *MMOLo =
18056 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18057 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
18058 .addFrameIndex(FI)
18059 .addImm(0)
18060 .addMemOperand(MMOLo);
18061 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18062 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
18063 .addFrameIndex(FI)
18064 .addImm(4)
18065 .addMemOperand(MMOHi);
18066 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
18067 MI.eraseFromParent(); // The pseudo instruction is gone now.
18068 return BB;
18069}
18070
18072 switch (MI.getOpcode()) {
18073 default:
18074 return false;
18075 case RISCV::Select_GPR_Using_CC_GPR:
18076 case RISCV::Select_GPR_Using_CC_Imm:
18077 case RISCV::Select_FPR16_Using_CC_GPR:
18078 case RISCV::Select_FPR16INX_Using_CC_GPR:
18079 case RISCV::Select_FPR32_Using_CC_GPR:
18080 case RISCV::Select_FPR32INX_Using_CC_GPR:
18081 case RISCV::Select_FPR64_Using_CC_GPR:
18082 case RISCV::Select_FPR64INX_Using_CC_GPR:
18083 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18084 return true;
18085 }
18086}
18087
18089 unsigned RelOpcode, unsigned EqOpcode,
18090 const RISCVSubtarget &Subtarget) {
18091 DebugLoc DL = MI.getDebugLoc();
18092 Register DstReg = MI.getOperand(0).getReg();
18093 Register Src1Reg = MI.getOperand(1).getReg();
18094 Register Src2Reg = MI.getOperand(2).getReg();
18096 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18098
18099 // Save the current FFLAGS.
18100 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
18101
18102 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
18103 .addReg(Src1Reg)
18104 .addReg(Src2Reg);
18107
18108 // Restore the FFLAGS.
18109 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18110 .addReg(SavedFFlags, RegState::Kill);
18111
18112 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
18113 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
18114 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
18115 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
18118
18119 // Erase the pseudoinstruction.
18120 MI.eraseFromParent();
18121 return BB;
18122}
18123
18124static MachineBasicBlock *
18126 MachineBasicBlock *ThisMBB,
18127 const RISCVSubtarget &Subtarget) {
18128 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
18129 // Without this, custom-inserter would have generated:
18130 //
18131 // A
18132 // | \
18133 // | B
18134 // | /
18135 // C
18136 // | \
18137 // | D
18138 // | /
18139 // E
18140 //
18141 // A: X = ...; Y = ...
18142 // B: empty
18143 // C: Z = PHI [X, A], [Y, B]
18144 // D: empty
18145 // E: PHI [X, C], [Z, D]
18146 //
18147 // If we lower both Select_FPRX_ in a single step, we can instead generate:
18148 //
18149 // A
18150 // | \
18151 // | C
18152 // | /|
18153 // |/ |
18154 // | |
18155 // | D
18156 // | /
18157 // E
18158 //
18159 // A: X = ...; Y = ...
18160 // D: empty
18161 // E: PHI [X, A], [X, C], [Y, D]
18162
18163 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18164 const DebugLoc &DL = First.getDebugLoc();
18165 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
18166 MachineFunction *F = ThisMBB->getParent();
18167 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
18168 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
18169 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
18170 MachineFunction::iterator It = ++ThisMBB->getIterator();
18171 F->insert(It, FirstMBB);
18172 F->insert(It, SecondMBB);
18173 F->insert(It, SinkMBB);
18174
18175 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
18176 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
18178 ThisMBB->end());
18179 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
18180
18181 // Fallthrough block for ThisMBB.
18182 ThisMBB->addSuccessor(FirstMBB);
18183 // Fallthrough block for FirstMBB.
18184 FirstMBB->addSuccessor(SecondMBB);
18185 ThisMBB->addSuccessor(SinkMBB);
18186 FirstMBB->addSuccessor(SinkMBB);
18187 // This is fallthrough.
18188 SecondMBB->addSuccessor(SinkMBB);
18189
18190 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
18191 Register FLHS = First.getOperand(1).getReg();
18192 Register FRHS = First.getOperand(2).getReg();
18193 // Insert appropriate branch.
18194 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
18195 .addReg(FLHS)
18196 .addReg(FRHS)
18197 .addMBB(SinkMBB);
18198
18199 Register SLHS = Second.getOperand(1).getReg();
18200 Register SRHS = Second.getOperand(2).getReg();
18201 Register Op1Reg4 = First.getOperand(4).getReg();
18202 Register Op1Reg5 = First.getOperand(5).getReg();
18203
18204 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
18205 // Insert appropriate branch.
18206 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
18207 .addReg(SLHS)
18208 .addReg(SRHS)
18209 .addMBB(SinkMBB);
18210
18211 Register DestReg = Second.getOperand(0).getReg();
18212 Register Op2Reg4 = Second.getOperand(4).getReg();
18213 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
18214 .addReg(Op2Reg4)
18215 .addMBB(ThisMBB)
18216 .addReg(Op1Reg4)
18217 .addMBB(FirstMBB)
18218 .addReg(Op1Reg5)
18219 .addMBB(SecondMBB);
18220
18221 // Now remove the Select_FPRX_s.
18222 First.eraseFromParent();
18223 Second.eraseFromParent();
18224 return SinkMBB;
18225}
18226
18229 const RISCVSubtarget &Subtarget) {
18230 // To "insert" Select_* instructions, we actually have to insert the triangle
18231 // control-flow pattern. The incoming instructions know the destination vreg
18232 // to set, the condition code register to branch on, the true/false values to
18233 // select between, and the condcode to use to select the appropriate branch.
18234 //
18235 // We produce the following control flow:
18236 // HeadMBB
18237 // | \
18238 // | IfFalseMBB
18239 // | /
18240 // TailMBB
18241 //
18242 // When we find a sequence of selects we attempt to optimize their emission
18243 // by sharing the control flow. Currently we only handle cases where we have
18244 // multiple selects with the exact same condition (same LHS, RHS and CC).
18245 // The selects may be interleaved with other instructions if the other
18246 // instructions meet some requirements we deem safe:
18247 // - They are not pseudo instructions.
18248 // - They are debug instructions. Otherwise,
18249 // - They do not have side-effects, do not access memory and their inputs do
18250 // not depend on the results of the select pseudo-instructions.
18251 // The TrueV/FalseV operands of the selects cannot depend on the result of
18252 // previous selects in the sequence.
18253 // These conditions could be further relaxed. See the X86 target for a
18254 // related approach and more information.
18255 //
18256 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18257 // is checked here and handled by a separate function -
18258 // EmitLoweredCascadedSelect.
18259 Register LHS = MI.getOperand(1).getReg();
18260 Register RHS;
18261 if (MI.getOperand(2).isReg())
18262 RHS = MI.getOperand(2).getReg();
18263 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18264
18265 SmallVector<MachineInstr *, 4> SelectDebugValues;
18266 SmallSet<Register, 4> SelectDests;
18267 SelectDests.insert(MI.getOperand(0).getReg());
18268
18269 MachineInstr *LastSelectPseudo = &MI;
18270 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18271 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18272 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18273 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18274 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18275 Next->getOperand(5).isKill()) {
18276 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18277 }
18278
18279 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18280 SequenceMBBI != E; ++SequenceMBBI) {
18281 if (SequenceMBBI->isDebugInstr())
18282 continue;
18283 if (isSelectPseudo(*SequenceMBBI)) {
18284 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18285 !SequenceMBBI->getOperand(2).isReg() ||
18286 SequenceMBBI->getOperand(2).getReg() != RHS ||
18287 SequenceMBBI->getOperand(3).getImm() != CC ||
18288 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18289 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18290 break;
18291 LastSelectPseudo = &*SequenceMBBI;
18292 SequenceMBBI->collectDebugValues(SelectDebugValues);
18293 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18294 continue;
18295 }
18296 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18297 SequenceMBBI->mayLoadOrStore() ||
18298 SequenceMBBI->usesCustomInsertionHook())
18299 break;
18300 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18301 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18302 }))
18303 break;
18304 }
18305
18306 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18307 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18308 DebugLoc DL = MI.getDebugLoc();
18310
18311 MachineBasicBlock *HeadMBB = BB;
18312 MachineFunction *F = BB->getParent();
18313 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18314 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18315
18316 F->insert(I, IfFalseMBB);
18317 F->insert(I, TailMBB);
18318
18319 // Transfer debug instructions associated with the selects to TailMBB.
18320 for (MachineInstr *DebugInstr : SelectDebugValues) {
18321 TailMBB->push_back(DebugInstr->removeFromParent());
18322 }
18323
18324 // Move all instructions after the sequence to TailMBB.
18325 TailMBB->splice(TailMBB->end(), HeadMBB,
18326 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18327 // Update machine-CFG edges by transferring all successors of the current
18328 // block to the new block which will contain the Phi nodes for the selects.
18329 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18330 // Set the successors for HeadMBB.
18331 HeadMBB->addSuccessor(IfFalseMBB);
18332 HeadMBB->addSuccessor(TailMBB);
18333
18334 // Insert appropriate branch.
18335 if (MI.getOperand(2).isImm())
18336 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18337 .addReg(LHS)
18338 .addImm(MI.getOperand(2).getImm())
18339 .addMBB(TailMBB);
18340 else
18341 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18342 .addReg(LHS)
18343 .addReg(RHS)
18344 .addMBB(TailMBB);
18345
18346 // IfFalseMBB just falls through to TailMBB.
18347 IfFalseMBB->addSuccessor(TailMBB);
18348
18349 // Create PHIs for all of the select pseudo-instructions.
18350 auto SelectMBBI = MI.getIterator();
18351 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18352 auto InsertionPoint = TailMBB->begin();
18353 while (SelectMBBI != SelectEnd) {
18354 auto Next = std::next(SelectMBBI);
18355 if (isSelectPseudo(*SelectMBBI)) {
18356 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18357 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18358 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18359 .addReg(SelectMBBI->getOperand(4).getReg())
18360 .addMBB(HeadMBB)
18361 .addReg(SelectMBBI->getOperand(5).getReg())
18362 .addMBB(IfFalseMBB);
18363 SelectMBBI->eraseFromParent();
18364 }
18365 SelectMBBI = Next;
18366 }
18367
18368 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18369 return TailMBB;
18370}
18371
18372// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18373static const RISCV::RISCVMaskedPseudoInfo *
18374lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18376 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18377 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18378 const RISCV::RISCVMaskedPseudoInfo *Masked =
18379 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18380 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18381 return Masked;
18382}
18383
18386 unsigned CVTXOpc) {
18387 DebugLoc DL = MI.getDebugLoc();
18388
18390
18392 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18393
18394 // Save the old value of FFLAGS.
18395 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18396
18397 assert(MI.getNumOperands() == 7);
18398
18399 // Emit a VFCVT_X_F
18400 const TargetRegisterInfo *TRI =
18402 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18403 Register Tmp = MRI.createVirtualRegister(RC);
18404 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18405 .add(MI.getOperand(1))
18406 .add(MI.getOperand(2))
18407 .add(MI.getOperand(3))
18408 .add(MachineOperand::CreateImm(7)) // frm = DYN
18409 .add(MI.getOperand(4))
18410 .add(MI.getOperand(5))
18411 .add(MI.getOperand(6))
18412 .add(MachineOperand::CreateReg(RISCV::FRM,
18413 /*IsDef*/ false,
18414 /*IsImp*/ true));
18415
18416 // Emit a VFCVT_F_X
18417 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18418 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18419 // There is no E8 variant for VFCVT_F_X.
18420 assert(Log2SEW >= 4);
18421 unsigned CVTFOpc =
18422 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18423 ->MaskedPseudo;
18424
18425 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18426 .add(MI.getOperand(0))
18427 .add(MI.getOperand(1))
18428 .addReg(Tmp)
18429 .add(MI.getOperand(3))
18430 .add(MachineOperand::CreateImm(7)) // frm = DYN
18431 .add(MI.getOperand(4))
18432 .add(MI.getOperand(5))
18433 .add(MI.getOperand(6))
18434 .add(MachineOperand::CreateReg(RISCV::FRM,
18435 /*IsDef*/ false,
18436 /*IsImp*/ true));
18437
18438 // Restore FFLAGS.
18439 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18440 .addReg(SavedFFLAGS, RegState::Kill);
18441
18442 // Erase the pseudoinstruction.
18443 MI.eraseFromParent();
18444 return BB;
18445}
18446
18448 const RISCVSubtarget &Subtarget) {
18449 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18450 const TargetRegisterClass *RC;
18451 switch (MI.getOpcode()) {
18452 default:
18453 llvm_unreachable("Unexpected opcode");
18454 case RISCV::PseudoFROUND_H:
18455 CmpOpc = RISCV::FLT_H;
18456 F2IOpc = RISCV::FCVT_W_H;
18457 I2FOpc = RISCV::FCVT_H_W;
18458 FSGNJOpc = RISCV::FSGNJ_H;
18459 FSGNJXOpc = RISCV::FSGNJX_H;
18460 RC = &RISCV::FPR16RegClass;
18461 break;
18462 case RISCV::PseudoFROUND_H_INX:
18463 CmpOpc = RISCV::FLT_H_INX;
18464 F2IOpc = RISCV::FCVT_W_H_INX;
18465 I2FOpc = RISCV::FCVT_H_W_INX;
18466 FSGNJOpc = RISCV::FSGNJ_H_INX;
18467 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18468 RC = &RISCV::GPRF16RegClass;
18469 break;
18470 case RISCV::PseudoFROUND_S:
18471 CmpOpc = RISCV::FLT_S;
18472 F2IOpc = RISCV::FCVT_W_S;
18473 I2FOpc = RISCV::FCVT_S_W;
18474 FSGNJOpc = RISCV::FSGNJ_S;
18475 FSGNJXOpc = RISCV::FSGNJX_S;
18476 RC = &RISCV::FPR32RegClass;
18477 break;
18478 case RISCV::PseudoFROUND_S_INX:
18479 CmpOpc = RISCV::FLT_S_INX;
18480 F2IOpc = RISCV::FCVT_W_S_INX;
18481 I2FOpc = RISCV::FCVT_S_W_INX;
18482 FSGNJOpc = RISCV::FSGNJ_S_INX;
18483 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18484 RC = &RISCV::GPRF32RegClass;
18485 break;
18486 case RISCV::PseudoFROUND_D:
18487 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18488 CmpOpc = RISCV::FLT_D;
18489 F2IOpc = RISCV::FCVT_L_D;
18490 I2FOpc = RISCV::FCVT_D_L;
18491 FSGNJOpc = RISCV::FSGNJ_D;
18492 FSGNJXOpc = RISCV::FSGNJX_D;
18493 RC = &RISCV::FPR64RegClass;
18494 break;
18495 case RISCV::PseudoFROUND_D_INX:
18496 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18497 CmpOpc = RISCV::FLT_D_INX;
18498 F2IOpc = RISCV::FCVT_L_D_INX;
18499 I2FOpc = RISCV::FCVT_D_L_INX;
18500 FSGNJOpc = RISCV::FSGNJ_D_INX;
18501 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18502 RC = &RISCV::GPRRegClass;
18503 break;
18504 }
18505
18506 const BasicBlock *BB = MBB->getBasicBlock();
18507 DebugLoc DL = MI.getDebugLoc();
18509
18511 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18512 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18513
18514 F->insert(I, CvtMBB);
18515 F->insert(I, DoneMBB);
18516 // Move all instructions after the sequence to DoneMBB.
18517 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18518 MBB->end());
18519 // Update machine-CFG edges by transferring all successors of the current
18520 // block to the new block which will contain the Phi nodes for the selects.
18522 // Set the successors for MBB.
18523 MBB->addSuccessor(CvtMBB);
18524 MBB->addSuccessor(DoneMBB);
18525
18526 Register DstReg = MI.getOperand(0).getReg();
18527 Register SrcReg = MI.getOperand(1).getReg();
18528 Register MaxReg = MI.getOperand(2).getReg();
18529 int64_t FRM = MI.getOperand(3).getImm();
18530
18531 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18533
18534 Register FabsReg = MRI.createVirtualRegister(RC);
18535 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18536
18537 // Compare the FP value to the max value.
18538 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18539 auto MIB =
18540 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18543
18544 // Insert branch.
18545 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18546 .addReg(CmpReg)
18547 .addReg(RISCV::X0)
18548 .addMBB(DoneMBB);
18549
18550 CvtMBB->addSuccessor(DoneMBB);
18551
18552 // Convert to integer.
18553 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18554 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18557
18558 // Convert back to FP.
18559 Register I2FReg = MRI.createVirtualRegister(RC);
18560 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18563
18564 // Restore the sign bit.
18565 Register CvtReg = MRI.createVirtualRegister(RC);
18566 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18567
18568 // Merge the results.
18569 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18570 .addReg(SrcReg)
18571 .addMBB(MBB)
18572 .addReg(CvtReg)
18573 .addMBB(CvtMBB);
18574
18575 MI.eraseFromParent();
18576 return DoneMBB;
18577}
18578
18581 MachineBasicBlock *BB) const {
18582 switch (MI.getOpcode()) {
18583 default:
18584 llvm_unreachable("Unexpected instr type to insert");
18585 case RISCV::ReadCounterWide:
18586 assert(!Subtarget.is64Bit() &&
18587 "ReadCounterWide is only to be used on riscv32");
18588 return emitReadCounterWidePseudo(MI, BB);
18589 case RISCV::Select_GPR_Using_CC_GPR:
18590 case RISCV::Select_GPR_Using_CC_Imm:
18591 case RISCV::Select_FPR16_Using_CC_GPR:
18592 case RISCV::Select_FPR16INX_Using_CC_GPR:
18593 case RISCV::Select_FPR32_Using_CC_GPR:
18594 case RISCV::Select_FPR32INX_Using_CC_GPR:
18595 case RISCV::Select_FPR64_Using_CC_GPR:
18596 case RISCV::Select_FPR64INX_Using_CC_GPR:
18597 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18598 return emitSelectPseudo(MI, BB, Subtarget);
18599 case RISCV::BuildPairF64Pseudo:
18600 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18601 case RISCV::SplitF64Pseudo:
18602 return emitSplitF64Pseudo(MI, BB, Subtarget);
18603 case RISCV::PseudoQuietFLE_H:
18604 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18605 case RISCV::PseudoQuietFLE_H_INX:
18606 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18607 case RISCV::PseudoQuietFLT_H:
18608 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18609 case RISCV::PseudoQuietFLT_H_INX:
18610 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18611 case RISCV::PseudoQuietFLE_S:
18612 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18613 case RISCV::PseudoQuietFLE_S_INX:
18614 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18615 case RISCV::PseudoQuietFLT_S:
18616 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18617 case RISCV::PseudoQuietFLT_S_INX:
18618 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18619 case RISCV::PseudoQuietFLE_D:
18620 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18621 case RISCV::PseudoQuietFLE_D_INX:
18622 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18623 case RISCV::PseudoQuietFLE_D_IN32X:
18624 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18625 Subtarget);
18626 case RISCV::PseudoQuietFLT_D:
18627 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18628 case RISCV::PseudoQuietFLT_D_INX:
18629 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18630 case RISCV::PseudoQuietFLT_D_IN32X:
18631 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18632 Subtarget);
18633
18634 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18635 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18636 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18637 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18638 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18639 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18640 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18641 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18642 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18643 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18644 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18645 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18646 case RISCV::PseudoFROUND_H:
18647 case RISCV::PseudoFROUND_H_INX:
18648 case RISCV::PseudoFROUND_S:
18649 case RISCV::PseudoFROUND_S_INX:
18650 case RISCV::PseudoFROUND_D:
18651 case RISCV::PseudoFROUND_D_INX:
18652 case RISCV::PseudoFROUND_D_IN32X:
18653 return emitFROUND(MI, BB, Subtarget);
18654 case TargetOpcode::STATEPOINT:
18655 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18656 // while jal call instruction (where statepoint will be lowered at the end)
18657 // has implicit def. This def is early-clobber as it will be set at
18658 // the moment of the call and earlier than any use is read.
18659 // Add this implicit dead def here as a workaround.
18660 MI.addOperand(*MI.getMF(),
18662 RISCV::X1, /*isDef*/ true,
18663 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18664 /*isUndef*/ false, /*isEarlyClobber*/ true));
18665 [[fallthrough]];
18666 case TargetOpcode::STACKMAP:
18667 case TargetOpcode::PATCHPOINT:
18668 if (!Subtarget.is64Bit())
18669 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18670 "supported on 64-bit targets");
18671 return emitPatchPoint(MI, BB);
18672 }
18673}
18674
18676 SDNode *Node) const {
18677 // Add FRM dependency to any instructions with dynamic rounding mode.
18678 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18679 if (Idx < 0) {
18680 // Vector pseudos have FRM index indicated by TSFlags.
18681 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18682 if (Idx < 0)
18683 return;
18684 }
18685 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18686 return;
18687 // If the instruction already reads FRM, don't add another read.
18688 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18689 return;
18690 MI.addOperand(
18691 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18692}
18693
18694// Calling Convention Implementation.
18695// The expectations for frontend ABI lowering vary from target to target.
18696// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18697// details, but this is a longer term goal. For now, we simply try to keep the
18698// role of the frontend as simple and well-defined as possible. The rules can
18699// be summarised as:
18700// * Never split up large scalar arguments. We handle them here.
18701// * If a hardfloat calling convention is being used, and the struct may be
18702// passed in a pair of registers (fp+fp, int+fp), and both registers are
18703// available, then pass as two separate arguments. If either the GPRs or FPRs
18704// are exhausted, then pass according to the rule below.
18705// * If a struct could never be passed in registers or directly in a stack
18706// slot (as it is larger than 2*XLEN and the floating point rules don't
18707// apply), then pass it using a pointer with the byval attribute.
18708// * If a struct is less than 2*XLEN, then coerce to either a two-element
18709// word-sized array or a 2*XLEN scalar (depending on alignment).
18710// * The frontend can determine whether a struct is returned by reference or
18711// not based on its size and fields. If it will be returned by reference, the
18712// frontend must modify the prototype so a pointer with the sret annotation is
18713// passed as the first argument. This is not necessary for large scalar
18714// returns.
18715// * Struct return values and varargs should be coerced to structs containing
18716// register-size fields in the same situations they would be for fixed
18717// arguments.
18718
18719static const MCPhysReg ArgFPR16s[] = {
18720 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18721 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18722};
18723static const MCPhysReg ArgFPR32s[] = {
18724 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18725 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18726};
18727static const MCPhysReg ArgFPR64s[] = {
18728 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18729 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18730};
18731// This is an interim calling convention and it may be changed in the future.
18732static const MCPhysReg ArgVRs[] = {
18733 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18734 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18735 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18736static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18737 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18738 RISCV::V20M2, RISCV::V22M2};
18739static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18740 RISCV::V20M4};
18741static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18742
18744 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18745 // the ILP32E ABI.
18746 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18747 RISCV::X13, RISCV::X14, RISCV::X15,
18748 RISCV::X16, RISCV::X17};
18749 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18750 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18751 RISCV::X13, RISCV::X14, RISCV::X15};
18752
18753 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18754 return ArrayRef(ArgEGPRs);
18755
18756 return ArrayRef(ArgIGPRs);
18757}
18758
18760 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18761 // for save-restore libcall, so we don't use them.
18762 static const MCPhysReg FastCCIGPRs[] = {
18763 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18764 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18765 RISCV::X29, RISCV::X30, RISCV::X31};
18766
18767 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18768 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18769 RISCV::X13, RISCV::X14, RISCV::X15,
18770 RISCV::X7};
18771
18772 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18773 return ArrayRef(FastCCEGPRs);
18774
18775 return ArrayRef(FastCCIGPRs);
18776}
18777
18778// Pass a 2*XLEN argument that has been split into two XLEN values through
18779// registers or the stack as necessary.
18780static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18781 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18782 MVT ValVT2, MVT LocVT2,
18783 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18784 unsigned XLenInBytes = XLen / 8;
18785 const RISCVSubtarget &STI =
18788
18789 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18790 // At least one half can be passed via register.
18791 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18792 VA1.getLocVT(), CCValAssign::Full));
18793 } else {
18794 // Both halves must be passed on the stack, with proper alignment.
18795 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18796 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18797 Align StackAlign(XLenInBytes);
18798 if (!EABI || XLen != 32)
18799 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18800 State.addLoc(
18802 State.AllocateStack(XLenInBytes, StackAlign),
18803 VA1.getLocVT(), CCValAssign::Full));
18805 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18806 LocVT2, CCValAssign::Full));
18807 return false;
18808 }
18809
18810 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18811 // The second half can also be passed via register.
18812 State.addLoc(
18813 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18814 } else {
18815 // The second half is passed via the stack, without additional alignment.
18817 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18818 LocVT2, CCValAssign::Full));
18819 }
18820
18821 return false;
18822}
18823
18824// Implements the RISC-V calling convention. Returns true upon failure.
18825bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18826 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18827 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18828 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18829 RVVArgDispatcher &RVVDispatcher) {
18830 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18831 assert(XLen == 32 || XLen == 64);
18832 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18833
18834 // Static chain parameter must not be passed in normal argument registers,
18835 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18836 if (ArgFlags.isNest()) {
18837 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18838 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18839 return false;
18840 }
18841 }
18842
18843 // Any return value split in to more than two values can't be returned
18844 // directly. Vectors are returned via the available vector registers.
18845 if (!LocVT.isVector() && IsRet && ValNo > 1)
18846 return true;
18847
18848 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18849 // variadic argument, or if no F16/F32 argument registers are available.
18850 bool UseGPRForF16_F32 = true;
18851 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18852 // variadic argument, or if no F64 argument registers are available.
18853 bool UseGPRForF64 = true;
18854
18855 switch (ABI) {
18856 default:
18857 llvm_unreachable("Unexpected ABI");
18860 case RISCVABI::ABI_LP64:
18862 break;
18865 UseGPRForF16_F32 = !IsFixed;
18866 break;
18869 UseGPRForF16_F32 = !IsFixed;
18870 UseGPRForF64 = !IsFixed;
18871 break;
18872 }
18873
18874 // FPR16, FPR32, and FPR64 alias each other.
18875 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18876 UseGPRForF16_F32 = true;
18877 UseGPRForF64 = true;
18878 }
18879
18880 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18881 // similar local variables rather than directly checking against the target
18882 // ABI.
18883
18884 if (UseGPRForF16_F32 &&
18885 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18886 LocVT = XLenVT;
18887 LocInfo = CCValAssign::BCvt;
18888 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18889 LocVT = MVT::i64;
18890 LocInfo = CCValAssign::BCvt;
18891 }
18892
18894
18895 // If this is a variadic argument, the RISC-V calling convention requires
18896 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18897 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18898 // be used regardless of whether the original argument was split during
18899 // legalisation or not. The argument will not be passed by registers if the
18900 // original type is larger than 2*XLEN, so the register alignment rule does
18901 // not apply.
18902 // TODO: To be compatible with GCC's behaviors, we don't align registers
18903 // currently if we are using ILP32E calling convention. This behavior may be
18904 // changed when RV32E/ILP32E is ratified.
18905 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18906 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18907 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18908 ABI != RISCVABI::ABI_ILP32E) {
18909 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18910 // Skip 'odd' register if necessary.
18911 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18912 State.AllocateReg(ArgGPRs);
18913 }
18914
18915 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18916 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18917 State.getPendingArgFlags();
18918
18919 assert(PendingLocs.size() == PendingArgFlags.size() &&
18920 "PendingLocs and PendingArgFlags out of sync");
18921
18922 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18923 // registers are exhausted.
18924 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18925 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18926 // Depending on available argument GPRS, f64 may be passed in a pair of
18927 // GPRs, split between a GPR and the stack, or passed completely on the
18928 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18929 // cases.
18930 Register Reg = State.AllocateReg(ArgGPRs);
18931 if (!Reg) {
18932 unsigned StackOffset = State.AllocateStack(8, Align(8));
18933 State.addLoc(
18934 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18935 return false;
18936 }
18937 LocVT = MVT::i32;
18938 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18939 Register HiReg = State.AllocateReg(ArgGPRs);
18940 if (HiReg) {
18941 State.addLoc(
18942 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18943 } else {
18944 unsigned StackOffset = State.AllocateStack(4, Align(4));
18945 State.addLoc(
18946 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18947 }
18948 return false;
18949 }
18950
18951 // Fixed-length vectors are located in the corresponding scalable-vector
18952 // container types.
18953 if (ValVT.isFixedLengthVector())
18954 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18955
18956 // Split arguments might be passed indirectly, so keep track of the pending
18957 // values. Split vectors are passed via a mix of registers and indirectly, so
18958 // treat them as we would any other argument.
18959 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18960 LocVT = XLenVT;
18961 LocInfo = CCValAssign::Indirect;
18962 PendingLocs.push_back(
18963 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18964 PendingArgFlags.push_back(ArgFlags);
18965 if (!ArgFlags.isSplitEnd()) {
18966 return false;
18967 }
18968 }
18969
18970 // If the split argument only had two elements, it should be passed directly
18971 // in registers or on the stack.
18972 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18973 PendingLocs.size() <= 2) {
18974 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18975 // Apply the normal calling convention rules to the first half of the
18976 // split argument.
18977 CCValAssign VA = PendingLocs[0];
18978 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18979 PendingLocs.clear();
18980 PendingArgFlags.clear();
18981 return CC_RISCVAssign2XLen(
18982 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18983 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18984 }
18985
18986 // Allocate to a register if possible, or else a stack slot.
18987 Register Reg;
18988 unsigned StoreSizeBytes = XLen / 8;
18989 Align StackAlign = Align(XLen / 8);
18990
18991 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18992 Reg = State.AllocateReg(ArgFPR16s);
18993 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18994 Reg = State.AllocateReg(ArgFPR32s);
18995 else if (ValVT == MVT::f64 && !UseGPRForF64)
18996 Reg = State.AllocateReg(ArgFPR64s);
18997 else if (ValVT.isVector()) {
18998 Reg = RVVDispatcher.getNextPhysReg();
18999 if (!Reg) {
19000 // For return values, the vector must be passed fully via registers or
19001 // via the stack.
19002 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
19003 // but we're using all of them.
19004 if (IsRet)
19005 return true;
19006 // Try using a GPR to pass the address
19007 if ((Reg = State.AllocateReg(ArgGPRs))) {
19008 LocVT = XLenVT;
19009 LocInfo = CCValAssign::Indirect;
19010 } else if (ValVT.isScalableVector()) {
19011 LocVT = XLenVT;
19012 LocInfo = CCValAssign::Indirect;
19013 } else {
19014 // Pass fixed-length vectors on the stack.
19015 LocVT = ValVT;
19016 StoreSizeBytes = ValVT.getStoreSize();
19017 // Align vectors to their element sizes, being careful for vXi1
19018 // vectors.
19019 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
19020 }
19021 }
19022 } else {
19023 Reg = State.AllocateReg(ArgGPRs);
19024 }
19025
19026 unsigned StackOffset =
19027 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
19028
19029 // If we reach this point and PendingLocs is non-empty, we must be at the
19030 // end of a split argument that must be passed indirectly.
19031 if (!PendingLocs.empty()) {
19032 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
19033 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
19034
19035 for (auto &It : PendingLocs) {
19036 if (Reg)
19037 It.convertToReg(Reg);
19038 else
19039 It.convertToMem(StackOffset);
19040 State.addLoc(It);
19041 }
19042 PendingLocs.clear();
19043 PendingArgFlags.clear();
19044 return false;
19045 }
19046
19047 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
19048 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
19049 "Expected an XLenVT or vector types at this stage");
19050
19051 if (Reg) {
19052 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19053 return false;
19054 }
19055
19056 // When a scalar floating-point value is passed on the stack, no
19057 // bit-conversion is needed.
19058 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
19059 assert(!ValVT.isVector());
19060 LocVT = ValVT;
19061 LocInfo = CCValAssign::Full;
19062 }
19063 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19064 return false;
19065}
19066
19067template <typename ArgTy>
19068static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
19069 for (const auto &ArgIdx : enumerate(Args)) {
19070 MVT ArgVT = ArgIdx.value().VT;
19071 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
19072 return ArgIdx.index();
19073 }
19074 return std::nullopt;
19075}
19076
19077void RISCVTargetLowering::analyzeInputArgs(
19078 MachineFunction &MF, CCState &CCInfo,
19079 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19080 RISCVCCAssignFn Fn) const {
19081 unsigned NumArgs = Ins.size();
19083
19084 RVVArgDispatcher Dispatcher;
19085 if (IsRet) {
19086 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
19087 } else {
19088 SmallVector<Type *, 4> TypeList;
19089 for (const Argument &Arg : MF.getFunction().args())
19090 TypeList.push_back(Arg.getType());
19091 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
19092 }
19093
19094 for (unsigned i = 0; i != NumArgs; ++i) {
19095 MVT ArgVT = Ins[i].VT;
19096 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19097
19098 Type *ArgTy = nullptr;
19099 if (IsRet)
19100 ArgTy = FType->getReturnType();
19101 else if (Ins[i].isOrigArg())
19102 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19103
19105 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
19106 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
19107 Dispatcher)) {
19108 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19109 << ArgVT << '\n');
19110 llvm_unreachable(nullptr);
19111 }
19112 }
19113}
19114
19115void RISCVTargetLowering::analyzeOutputArgs(
19116 MachineFunction &MF, CCState &CCInfo,
19117 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19118 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19119 unsigned NumArgs = Outs.size();
19120
19121 SmallVector<Type *, 4> TypeList;
19122 if (IsRet)
19123 TypeList.push_back(MF.getFunction().getReturnType());
19124 else if (CLI)
19125 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
19126 TypeList.push_back(Arg.Ty);
19127 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
19128
19129 for (unsigned i = 0; i != NumArgs; i++) {
19130 MVT ArgVT = Outs[i].VT;
19131 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19132 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19133
19135 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
19136 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
19137 Dispatcher)) {
19138 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19139 << ArgVT << "\n");
19140 llvm_unreachable(nullptr);
19141 }
19142 }
19143}
19144
19145// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19146// values.
19148 const CCValAssign &VA, const SDLoc &DL,
19149 const RISCVSubtarget &Subtarget) {
19150 switch (VA.getLocInfo()) {
19151 default:
19152 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19153 case CCValAssign::Full:
19155 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19156 break;
19157 case CCValAssign::BCvt:
19158 if (VA.getLocVT().isInteger() &&
19159 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19160 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19161 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
19162 if (RV64LegalI32) {
19163 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
19164 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
19165 } else {
19166 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19167 }
19168 } else {
19169 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19170 }
19171 break;
19172 }
19173 return Val;
19174}
19175
19176// The caller is responsible for loading the full value if the argument is
19177// passed with CCValAssign::Indirect.
19179 const CCValAssign &VA, const SDLoc &DL,
19180 const ISD::InputArg &In,
19181 const RISCVTargetLowering &TLI) {
19184 EVT LocVT = VA.getLocVT();
19185 SDValue Val;
19186 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19187 Register VReg = RegInfo.createVirtualRegister(RC);
19188 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19189 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19190
19191 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19192 if (In.isOrigArg()) {
19193 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19194 if (OrigArg->getType()->isIntegerTy()) {
19195 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19196 // An input zero extended from i31 can also be considered sign extended.
19197 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19198 (BitWidth < 32 && In.Flags.isZExt())) {
19200 RVFI->addSExt32Register(VReg);
19201 }
19202 }
19203 }
19204
19206 return Val;
19207
19208 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19209}
19210
19212 const CCValAssign &VA, const SDLoc &DL,
19213 const RISCVSubtarget &Subtarget) {
19214 EVT LocVT = VA.getLocVT();
19215
19216 switch (VA.getLocInfo()) {
19217 default:
19218 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19219 case CCValAssign::Full:
19220 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19221 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
19222 break;
19223 case CCValAssign::BCvt:
19224 if (LocVT.isInteger() &&
19225 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19226 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19227 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
19228 if (RV64LegalI32) {
19229 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
19230 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
19231 } else {
19232 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19233 }
19234 } else {
19235 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19236 }
19237 break;
19238 }
19239 return Val;
19240}
19241
19242// The caller is responsible for loading the full value if the argument is
19243// passed with CCValAssign::Indirect.
19245 const CCValAssign &VA, const SDLoc &DL) {
19247 MachineFrameInfo &MFI = MF.getFrameInfo();
19248 EVT LocVT = VA.getLocVT();
19249 EVT ValVT = VA.getValVT();
19251 if (ValVT.isScalableVector()) {
19252 // When the value is a scalable vector, we save the pointer which points to
19253 // the scalable vector value in the stack. The ValVT will be the pointer
19254 // type, instead of the scalable vector type.
19255 ValVT = LocVT;
19256 }
19257 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19258 /*IsImmutable=*/true);
19259 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19260 SDValue Val;
19261
19262 ISD::LoadExtType ExtType;
19263 switch (VA.getLocInfo()) {
19264 default:
19265 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19266 case CCValAssign::Full:
19268 case CCValAssign::BCvt:
19269 ExtType = ISD::NON_EXTLOAD;
19270 break;
19271 }
19272 Val = DAG.getExtLoad(
19273 ExtType, DL, LocVT, Chain, FIN,
19275 return Val;
19276}
19277
19279 const CCValAssign &VA,
19280 const CCValAssign &HiVA,
19281 const SDLoc &DL) {
19282 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19283 "Unexpected VA");
19285 MachineFrameInfo &MFI = MF.getFrameInfo();
19287
19288 assert(VA.isRegLoc() && "Expected register VA assignment");
19289
19290 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19291 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19292 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19293 SDValue Hi;
19294 if (HiVA.isMemLoc()) {
19295 // Second half of f64 is passed on the stack.
19296 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19297 /*IsImmutable=*/true);
19298 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19299 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19301 } else {
19302 // Second half of f64 is passed in another GPR.
19303 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19304 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19305 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19306 }
19307 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19308}
19309
19310// FastCC has less than 1% performance improvement for some particular
19311// benchmark. But theoretically, it may has benenfit for some cases.
19313 unsigned ValNo, MVT ValVT, MVT LocVT,
19314 CCValAssign::LocInfo LocInfo,
19315 ISD::ArgFlagsTy ArgFlags, CCState &State,
19316 bool IsFixed, bool IsRet, Type *OrigTy,
19317 const RISCVTargetLowering &TLI,
19318 RVVArgDispatcher &RVVDispatcher) {
19319 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19320 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19321 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19322 return false;
19323 }
19324 }
19325
19326 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
19327
19328 if (LocVT == MVT::f16 &&
19329 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
19330 static const MCPhysReg FPR16List[] = {
19331 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
19332 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
19333 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
19334 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
19335 if (unsigned Reg = State.AllocateReg(FPR16List)) {
19336 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19337 return false;
19338 }
19339 }
19340
19341 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19342 static const MCPhysReg FPR32List[] = {
19343 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
19344 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
19345 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
19346 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
19347 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19348 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19349 return false;
19350 }
19351 }
19352
19353 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19354 static const MCPhysReg FPR64List[] = {
19355 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
19356 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
19357 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
19358 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
19359 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19360 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19361 return false;
19362 }
19363 }
19364
19365 // Check if there is an available GPR before hitting the stack.
19366 if ((LocVT == MVT::f16 &&
19367 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19368 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19369 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19370 Subtarget.hasStdExtZdinx())) {
19371 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19372 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19373 return false;
19374 }
19375 }
19376
19377 if (LocVT == MVT::f16) {
19378 unsigned Offset2 = State.AllocateStack(2, Align(2));
19379 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19380 return false;
19381 }
19382
19383 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19384 unsigned Offset4 = State.AllocateStack(4, Align(4));
19385 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19386 return false;
19387 }
19388
19389 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19390 unsigned Offset5 = State.AllocateStack(8, Align(8));
19391 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19392 return false;
19393 }
19394
19395 if (LocVT.isVector()) {
19396 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19397 if (AllocatedVReg) {
19398 // Fixed-length vectors are located in the corresponding scalable-vector
19399 // container types.
19400 if (ValVT.isFixedLengthVector())
19401 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19402 State.addLoc(
19403 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19404 } else {
19405 // Try and pass the address via a "fast" GPR.
19406 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19407 LocInfo = CCValAssign::Indirect;
19408 LocVT = TLI.getSubtarget().getXLenVT();
19409 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19410 } else if (ValVT.isFixedLengthVector()) {
19411 auto StackAlign =
19413 unsigned StackOffset =
19414 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19415 State.addLoc(
19416 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19417 } else {
19418 // Can't pass scalable vectors on the stack.
19419 return true;
19420 }
19421 }
19422
19423 return false;
19424 }
19425
19426 return true; // CC didn't match.
19427}
19428
19429bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19430 CCValAssign::LocInfo LocInfo,
19431 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19432 if (ArgFlags.isNest()) {
19434 "Attribute 'nest' is not supported in GHC calling convention");
19435 }
19436
19437 static const MCPhysReg GPRList[] = {
19438 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19439 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19440
19441 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19442 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19443 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19444 if (unsigned Reg = State.AllocateReg(GPRList)) {
19445 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19446 return false;
19447 }
19448 }
19449
19450 const RISCVSubtarget &Subtarget =
19452
19453 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19454 // Pass in STG registers: F1, ..., F6
19455 // fs0 ... fs5
19456 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19457 RISCV::F18_F, RISCV::F19_F,
19458 RISCV::F20_F, RISCV::F21_F};
19459 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19460 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19461 return false;
19462 }
19463 }
19464
19465 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19466 // Pass in STG registers: D1, ..., D6
19467 // fs6 ... fs11
19468 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19469 RISCV::F24_D, RISCV::F25_D,
19470 RISCV::F26_D, RISCV::F27_D};
19471 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19472 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19473 return false;
19474 }
19475 }
19476
19477 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19478 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19479 Subtarget.is64Bit())) {
19480 if (unsigned Reg = State.AllocateReg(GPRList)) {
19481 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19482 return false;
19483 }
19484 }
19485
19486 report_fatal_error("No registers left in GHC calling convention");
19487 return true;
19488}
19489
19490// Transform physical registers into virtual registers.
19492 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19493 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19494 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19495
19497
19498 switch (CallConv) {
19499 default:
19500 report_fatal_error("Unsupported calling convention");
19501 case CallingConv::C:
19502 case CallingConv::Fast:
19504 case CallingConv::GRAAL:
19506 break;
19507 case CallingConv::GHC:
19508 if (Subtarget.hasStdExtE())
19509 report_fatal_error("GHC calling convention is not supported on RVE!");
19510 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19511 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19512 "(Zdinx/D) instruction set extensions");
19513 }
19514
19515 const Function &Func = MF.getFunction();
19516 if (Func.hasFnAttribute("interrupt")) {
19517 if (!Func.arg_empty())
19519 "Functions with the interrupt attribute cannot have arguments!");
19520
19521 StringRef Kind =
19522 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19523
19524 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19526 "Function interrupt attribute argument not supported!");
19527 }
19528
19529 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19530 MVT XLenVT = Subtarget.getXLenVT();
19531 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19532 // Used with vargs to acumulate store chains.
19533 std::vector<SDValue> OutChains;
19534
19535 // Assign locations to all of the incoming arguments.
19537 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19538
19539 if (CallConv == CallingConv::GHC)
19541 else
19542 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19544 : RISCV::CC_RISCV);
19545
19546 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19547 CCValAssign &VA = ArgLocs[i];
19548 SDValue ArgValue;
19549 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19550 // case.
19551 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19552 assert(VA.needsCustom());
19553 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19554 } else if (VA.isRegLoc())
19555 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19556 else
19557 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19558
19559 if (VA.getLocInfo() == CCValAssign::Indirect) {
19560 // If the original argument was split and passed by reference (e.g. i128
19561 // on RV32), we need to load all parts of it here (using the same
19562 // address). Vectors may be partly split to registers and partly to the
19563 // stack, in which case the base address is partly offset and subsequent
19564 // stores are relative to that.
19565 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19567 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19568 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19569 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19570 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19571 CCValAssign &PartVA = ArgLocs[i + 1];
19572 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19573 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19574 if (PartVA.getValVT().isScalableVector())
19575 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19576 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19577 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19579 ++i;
19580 ++InsIdx;
19581 }
19582 continue;
19583 }
19584 InVals.push_back(ArgValue);
19585 }
19586
19587 if (any_of(ArgLocs,
19588 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19589 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19590
19591 if (IsVarArg) {
19592 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19593 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19594 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19595 MachineFrameInfo &MFI = MF.getFrameInfo();
19596 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19598
19599 // Size of the vararg save area. For now, the varargs save area is either
19600 // zero or large enough to hold a0-a7.
19601 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19602 int FI;
19603
19604 // If all registers are allocated, then all varargs must be passed on the
19605 // stack and we don't need to save any argregs.
19606 if (VarArgsSaveSize == 0) {
19607 int VaArgOffset = CCInfo.getStackSize();
19608 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19609 } else {
19610 int VaArgOffset = -VarArgsSaveSize;
19611 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19612
19613 // If saving an odd number of registers then create an extra stack slot to
19614 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19615 // offsets to even-numbered registered remain 2*XLEN-aligned.
19616 if (Idx % 2) {
19618 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19619 VarArgsSaveSize += XLenInBytes;
19620 }
19621
19622 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19623
19624 // Copy the integer registers that may have been used for passing varargs
19625 // to the vararg save area.
19626 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19627 const Register Reg = RegInfo.createVirtualRegister(RC);
19628 RegInfo.addLiveIn(ArgRegs[I], Reg);
19629 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19630 SDValue Store = DAG.getStore(
19631 Chain, DL, ArgValue, FIN,
19632 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19633 OutChains.push_back(Store);
19634 FIN =
19635 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19636 }
19637 }
19638
19639 // Record the frame index of the first variable argument
19640 // which is a value necessary to VASTART.
19641 RVFI->setVarArgsFrameIndex(FI);
19642 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19643 }
19644
19645 // All stores are grouped in one node to allow the matching between
19646 // the size of Ins and InVals. This only happens for vararg functions.
19647 if (!OutChains.empty()) {
19648 OutChains.push_back(Chain);
19649 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19650 }
19651
19652 return Chain;
19653}
19654
19655/// isEligibleForTailCallOptimization - Check whether the call is eligible
19656/// for tail call optimization.
19657/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19658bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19659 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19660 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19661
19662 auto CalleeCC = CLI.CallConv;
19663 auto &Outs = CLI.Outs;
19664 auto &Caller = MF.getFunction();
19665 auto CallerCC = Caller.getCallingConv();
19666
19667 // Exception-handling functions need a special set of instructions to
19668 // indicate a return to the hardware. Tail-calling another function would
19669 // probably break this.
19670 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19671 // should be expanded as new function attributes are introduced.
19672 if (Caller.hasFnAttribute("interrupt"))
19673 return false;
19674
19675 // Do not tail call opt if the stack is used to pass parameters.
19676 if (CCInfo.getStackSize() != 0)
19677 return false;
19678
19679 // Do not tail call opt if any parameters need to be passed indirectly.
19680 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19681 // passed indirectly. So the address of the value will be passed in a
19682 // register, or if not available, then the address is put on the stack. In
19683 // order to pass indirectly, space on the stack often needs to be allocated
19684 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19685 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19686 // are passed CCValAssign::Indirect.
19687 for (auto &VA : ArgLocs)
19688 if (VA.getLocInfo() == CCValAssign::Indirect)
19689 return false;
19690
19691 // Do not tail call opt if either caller or callee uses struct return
19692 // semantics.
19693 auto IsCallerStructRet = Caller.hasStructRetAttr();
19694 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19695 if (IsCallerStructRet || IsCalleeStructRet)
19696 return false;
19697
19698 // The callee has to preserve all registers the caller needs to preserve.
19699 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19700 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19701 if (CalleeCC != CallerCC) {
19702 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19703 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19704 return false;
19705 }
19706
19707 // Byval parameters hand the function a pointer directly into the stack area
19708 // we want to reuse during a tail call. Working around this *is* possible
19709 // but less efficient and uglier in LowerCall.
19710 for (auto &Arg : Outs)
19711 if (Arg.Flags.isByVal())
19712 return false;
19713
19714 return true;
19715}
19716
19718 return DAG.getDataLayout().getPrefTypeAlign(
19719 VT.getTypeForEVT(*DAG.getContext()));
19720}
19721
19722// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19723// and output parameter nodes.
19725 SmallVectorImpl<SDValue> &InVals) const {
19726 SelectionDAG &DAG = CLI.DAG;
19727 SDLoc &DL = CLI.DL;
19729 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19731 SDValue Chain = CLI.Chain;
19732 SDValue Callee = CLI.Callee;
19733 bool &IsTailCall = CLI.IsTailCall;
19734 CallingConv::ID CallConv = CLI.CallConv;
19735 bool IsVarArg = CLI.IsVarArg;
19736 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19737 MVT XLenVT = Subtarget.getXLenVT();
19738
19740
19741 // Analyze the operands of the call, assigning locations to each operand.
19743 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19744
19745 if (CallConv == CallingConv::GHC) {
19746 if (Subtarget.hasStdExtE())
19747 report_fatal_error("GHC calling convention is not supported on RVE!");
19749 } else
19750 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19752 : RISCV::CC_RISCV);
19753
19754 // Check if it's really possible to do a tail call.
19755 if (IsTailCall)
19756 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19757
19758 if (IsTailCall)
19759 ++NumTailCalls;
19760 else if (CLI.CB && CLI.CB->isMustTailCall())
19761 report_fatal_error("failed to perform tail call elimination on a call "
19762 "site marked musttail");
19763
19764 // Get a count of how many bytes are to be pushed on the stack.
19765 unsigned NumBytes = ArgCCInfo.getStackSize();
19766
19767 // Create local copies for byval args
19768 SmallVector<SDValue, 8> ByValArgs;
19769 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19770 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19771 if (!Flags.isByVal())
19772 continue;
19773
19774 SDValue Arg = OutVals[i];
19775 unsigned Size = Flags.getByValSize();
19776 Align Alignment = Flags.getNonZeroByValAlign();
19777
19778 int FI =
19779 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19780 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19781 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19782
19783 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19784 /*IsVolatile=*/false,
19785 /*AlwaysInline=*/false, IsTailCall,
19787 ByValArgs.push_back(FIPtr);
19788 }
19789
19790 if (!IsTailCall)
19791 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19792
19793 // Copy argument values to their designated locations.
19795 SmallVector<SDValue, 8> MemOpChains;
19796 SDValue StackPtr;
19797 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19798 ++i, ++OutIdx) {
19799 CCValAssign &VA = ArgLocs[i];
19800 SDValue ArgValue = OutVals[OutIdx];
19801 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19802
19803 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19804 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19805 assert(VA.isRegLoc() && "Expected register VA assignment");
19806 assert(VA.needsCustom());
19807 SDValue SplitF64 = DAG.getNode(
19808 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19809 SDValue Lo = SplitF64.getValue(0);
19810 SDValue Hi = SplitF64.getValue(1);
19811
19812 Register RegLo = VA.getLocReg();
19813 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19814
19815 // Get the CCValAssign for the Hi part.
19816 CCValAssign &HiVA = ArgLocs[++i];
19817
19818 if (HiVA.isMemLoc()) {
19819 // Second half of f64 is passed on the stack.
19820 if (!StackPtr.getNode())
19821 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19823 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19824 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19825 // Emit the store.
19826 MemOpChains.push_back(
19827 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19828 } else {
19829 // Second half of f64 is passed in another GPR.
19830 Register RegHigh = HiVA.getLocReg();
19831 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19832 }
19833 continue;
19834 }
19835
19836 // Promote the value if needed.
19837 // For now, only handle fully promoted and indirect arguments.
19838 if (VA.getLocInfo() == CCValAssign::Indirect) {
19839 // Store the argument in a stack slot and pass its address.
19840 Align StackAlign =
19841 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19842 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19843 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19844 // If the original argument was split (e.g. i128), we need
19845 // to store the required parts of it here (and pass just one address).
19846 // Vectors may be partly split to registers and partly to the stack, in
19847 // which case the base address is partly offset and subsequent stores are
19848 // relative to that.
19849 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19850 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19851 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19852 // Calculate the total size to store. We don't have access to what we're
19853 // actually storing other than performing the loop and collecting the
19854 // info.
19856 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19857 SDValue PartValue = OutVals[OutIdx + 1];
19858 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19859 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19860 EVT PartVT = PartValue.getValueType();
19861 if (PartVT.isScalableVector())
19862 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19863 StoredSize += PartVT.getStoreSize();
19864 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19865 Parts.push_back(std::make_pair(PartValue, Offset));
19866 ++i;
19867 ++OutIdx;
19868 }
19869 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19870 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19871 MemOpChains.push_back(
19872 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19874 for (const auto &Part : Parts) {
19875 SDValue PartValue = Part.first;
19876 SDValue PartOffset = Part.second;
19878 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19879 MemOpChains.push_back(
19880 DAG.getStore(Chain, DL, PartValue, Address,
19882 }
19883 ArgValue = SpillSlot;
19884 } else {
19885 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19886 }
19887
19888 // Use local copy if it is a byval arg.
19889 if (Flags.isByVal())
19890 ArgValue = ByValArgs[j++];
19891
19892 if (VA.isRegLoc()) {
19893 // Queue up the argument copies and emit them at the end.
19894 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19895 } else {
19896 assert(VA.isMemLoc() && "Argument not register or memory");
19897 assert(!IsTailCall && "Tail call not allowed if stack is used "
19898 "for passing parameters");
19899
19900 // Work out the address of the stack slot.
19901 if (!StackPtr.getNode())
19902 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19904 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19906
19907 // Emit the store.
19908 MemOpChains.push_back(
19909 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19910 }
19911 }
19912
19913 // Join the stores, which are independent of one another.
19914 if (!MemOpChains.empty())
19915 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19916
19917 SDValue Glue;
19918
19919 // Build a sequence of copy-to-reg nodes, chained and glued together.
19920 for (auto &Reg : RegsToPass) {
19921 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19922 Glue = Chain.getValue(1);
19923 }
19924
19925 // Validate that none of the argument registers have been marked as
19926 // reserved, if so report an error. Do the same for the return address if this
19927 // is not a tailcall.
19928 validateCCReservedRegs(RegsToPass, MF);
19929 if (!IsTailCall &&
19932 MF.getFunction(),
19933 "Return address register required, but has been reserved."});
19934
19935 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19936 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19937 // split it and then direct call can be matched by PseudoCALL.
19938 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19939 const GlobalValue *GV = S->getGlobal();
19940 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19941 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19942 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19943 }
19944
19945 // The first call operand is the chain and the second is the target address.
19947 Ops.push_back(Chain);
19948 Ops.push_back(Callee);
19949
19950 // Add argument registers to the end of the list so that they are
19951 // known live into the call.
19952 for (auto &Reg : RegsToPass)
19953 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19954
19955 if (!IsTailCall) {
19956 // Add a register mask operand representing the call-preserved registers.
19957 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19958 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19959 assert(Mask && "Missing call preserved mask for calling convention");
19960 Ops.push_back(DAG.getRegisterMask(Mask));
19961 }
19962
19963 // Glue the call to the argument copies, if any.
19964 if (Glue.getNode())
19965 Ops.push_back(Glue);
19966
19967 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19968 "Unexpected CFI type for a direct call");
19969
19970 // Emit the call.
19971 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19972
19973 if (IsTailCall) {
19975 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19976 if (CLI.CFIType)
19977 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19978 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19979 return Ret;
19980 }
19981
19982 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19983 if (CLI.CFIType)
19984 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19985 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19986 Glue = Chain.getValue(1);
19987
19988 // Mark the end of the call, which is glued to the call itself.
19989 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19990 Glue = Chain.getValue(1);
19991
19992 // Assign locations to each value returned by this call.
19994 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19995 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19996
19997 // Copy all of the result registers out of their specified physreg.
19998 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19999 auto &VA = RVLocs[i];
20000 // Copy the value out
20001 SDValue RetValue =
20002 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20003 // Glue the RetValue to the end of the call sequence
20004 Chain = RetValue.getValue(1);
20005 Glue = RetValue.getValue(2);
20006
20007 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20008 assert(VA.needsCustom());
20009 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20010 MVT::i32, Glue);
20011 Chain = RetValue2.getValue(1);
20012 Glue = RetValue2.getValue(2);
20013 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20014 RetValue2);
20015 }
20016
20017 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20018
20019 InVals.push_back(RetValue);
20020 }
20021
20022 return Chain;
20023}
20024
20026 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20027 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20029 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20030
20031 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
20032
20033 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20034 MVT VT = Outs[i].VT;
20035 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20036 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
20037 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
20038 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
20039 nullptr, *this, Dispatcher))
20040 return false;
20041 }
20042 return true;
20043}
20044
20045SDValue
20047 bool IsVarArg,
20049 const SmallVectorImpl<SDValue> &OutVals,
20050 const SDLoc &DL, SelectionDAG &DAG) const {
20052 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20053
20054 // Stores the assignment of the return value to a location.
20056
20057 // Info about the registers and stack slot.
20058 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20059 *DAG.getContext());
20060
20061 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20062 nullptr, RISCV::CC_RISCV);
20063
20064 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20065 report_fatal_error("GHC functions return void only");
20066
20067 SDValue Glue;
20068 SmallVector<SDValue, 4> RetOps(1, Chain);
20069
20070 // Copy the result values into the output registers.
20071 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20072 SDValue Val = OutVals[OutIdx];
20073 CCValAssign &VA = RVLocs[i];
20074 assert(VA.isRegLoc() && "Can only return in registers!");
20075
20076 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20077 // Handle returning f64 on RV32D with a soft float ABI.
20078 assert(VA.isRegLoc() && "Expected return via registers");
20079 assert(VA.needsCustom());
20080 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20081 DAG.getVTList(MVT::i32, MVT::i32), Val);
20082 SDValue Lo = SplitF64.getValue(0);
20083 SDValue Hi = SplitF64.getValue(1);
20084 Register RegLo = VA.getLocReg();
20085 Register RegHi = RVLocs[++i].getLocReg();
20086
20087 if (STI.isRegisterReservedByUser(RegLo) ||
20088 STI.isRegisterReservedByUser(RegHi))
20090 MF.getFunction(),
20091 "Return value register required, but has been reserved."});
20092
20093 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20094 Glue = Chain.getValue(1);
20095 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20096 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20097 Glue = Chain.getValue(1);
20098 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20099 } else {
20100 // Handle a 'normal' return.
20101 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20102 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20103
20104 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20106 MF.getFunction(),
20107 "Return value register required, but has been reserved."});
20108
20109 // Guarantee that all emitted copies are stuck together.
20110 Glue = Chain.getValue(1);
20111 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20112 }
20113 }
20114
20115 RetOps[0] = Chain; // Update chain.
20116
20117 // Add the glue node if we have it.
20118 if (Glue.getNode()) {
20119 RetOps.push_back(Glue);
20120 }
20121
20122 if (any_of(RVLocs,
20123 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20124 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20125
20126 unsigned RetOpc = RISCVISD::RET_GLUE;
20127 // Interrupt service routines use different return instructions.
20128 const Function &Func = DAG.getMachineFunction().getFunction();
20129 if (Func.hasFnAttribute("interrupt")) {
20130 if (!Func.getReturnType()->isVoidTy())
20132 "Functions with the interrupt attribute must have void return type!");
20133
20135 StringRef Kind =
20136 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20137
20138 if (Kind == "supervisor")
20139 RetOpc = RISCVISD::SRET_GLUE;
20140 else
20141 RetOpc = RISCVISD::MRET_GLUE;
20142 }
20143
20144 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20145}
20146
20147void RISCVTargetLowering::validateCCReservedRegs(
20148 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20149 MachineFunction &MF) const {
20150 const Function &F = MF.getFunction();
20151 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20152
20153 if (llvm::any_of(Regs, [&STI](auto Reg) {
20154 return STI.isRegisterReservedByUser(Reg.first);
20155 }))
20156 F.getContext().diagnose(DiagnosticInfoUnsupported{
20157 F, "Argument register required, but has been reserved."});
20158}
20159
20160// Check if the result of the node is only used as a return value, as
20161// otherwise we can't perform a tail-call.
20163 if (N->getNumValues() != 1)
20164 return false;
20165 if (!N->hasNUsesOfValue(1, 0))
20166 return false;
20167
20168 SDNode *Copy = *N->use_begin();
20169
20170 if (Copy->getOpcode() == ISD::BITCAST) {
20171 return isUsedByReturnOnly(Copy, Chain);
20172 }
20173
20174 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20175 // with soft float ABIs.
20176 if (Copy->getOpcode() != ISD::CopyToReg) {
20177 return false;
20178 }
20179
20180 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20181 // isn't safe to perform a tail call.
20182 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20183 return false;
20184
20185 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20186 bool HasRet = false;
20187 for (SDNode *Node : Copy->uses()) {
20188 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20189 return false;
20190 HasRet = true;
20191 }
20192 if (!HasRet)
20193 return false;
20194
20195 Chain = Copy->getOperand(0);
20196 return true;
20197}
20198
20200 return CI->isTailCall();
20201}
20202
20203const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20204#define NODE_NAME_CASE(NODE) \
20205 case RISCVISD::NODE: \
20206 return "RISCVISD::" #NODE;
20207 // clang-format off
20208 switch ((RISCVISD::NodeType)Opcode) {
20210 break;
20211 NODE_NAME_CASE(RET_GLUE)
20212 NODE_NAME_CASE(SRET_GLUE)
20213 NODE_NAME_CASE(MRET_GLUE)
20214 NODE_NAME_CASE(CALL)
20215 NODE_NAME_CASE(SELECT_CC)
20216 NODE_NAME_CASE(BR_CC)
20217 NODE_NAME_CASE(BuildPairF64)
20218 NODE_NAME_CASE(SplitF64)
20219 NODE_NAME_CASE(TAIL)
20220 NODE_NAME_CASE(ADD_LO)
20221 NODE_NAME_CASE(HI)
20222 NODE_NAME_CASE(LLA)
20223 NODE_NAME_CASE(ADD_TPREL)
20224 NODE_NAME_CASE(MULHSU)
20225 NODE_NAME_CASE(SHL_ADD)
20226 NODE_NAME_CASE(SLLW)
20227 NODE_NAME_CASE(SRAW)
20228 NODE_NAME_CASE(SRLW)
20229 NODE_NAME_CASE(DIVW)
20230 NODE_NAME_CASE(DIVUW)
20231 NODE_NAME_CASE(REMUW)
20232 NODE_NAME_CASE(ROLW)
20233 NODE_NAME_CASE(RORW)
20234 NODE_NAME_CASE(CLZW)
20235 NODE_NAME_CASE(CTZW)
20236 NODE_NAME_CASE(ABSW)
20237 NODE_NAME_CASE(FMV_H_X)
20238 NODE_NAME_CASE(FMV_X_ANYEXTH)
20239 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20240 NODE_NAME_CASE(FMV_W_X_RV64)
20241 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20242 NODE_NAME_CASE(FCVT_X)
20243 NODE_NAME_CASE(FCVT_XU)
20244 NODE_NAME_CASE(FCVT_W_RV64)
20245 NODE_NAME_CASE(FCVT_WU_RV64)
20246 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20247 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20248 NODE_NAME_CASE(FP_ROUND_BF16)
20249 NODE_NAME_CASE(FP_EXTEND_BF16)
20250 NODE_NAME_CASE(FROUND)
20251 NODE_NAME_CASE(FCLASS)
20252 NODE_NAME_CASE(FMAX)
20253 NODE_NAME_CASE(FMIN)
20254 NODE_NAME_CASE(READ_COUNTER_WIDE)
20255 NODE_NAME_CASE(BREV8)
20256 NODE_NAME_CASE(ORC_B)
20257 NODE_NAME_CASE(ZIP)
20258 NODE_NAME_CASE(UNZIP)
20259 NODE_NAME_CASE(CLMUL)
20260 NODE_NAME_CASE(CLMULH)
20261 NODE_NAME_CASE(CLMULR)
20262 NODE_NAME_CASE(MOPR)
20263 NODE_NAME_CASE(MOPRR)
20264 NODE_NAME_CASE(SHA256SIG0)
20265 NODE_NAME_CASE(SHA256SIG1)
20266 NODE_NAME_CASE(SHA256SUM0)
20267 NODE_NAME_CASE(SHA256SUM1)
20268 NODE_NAME_CASE(SM4KS)
20269 NODE_NAME_CASE(SM4ED)
20270 NODE_NAME_CASE(SM3P0)
20271 NODE_NAME_CASE(SM3P1)
20272 NODE_NAME_CASE(TH_LWD)
20273 NODE_NAME_CASE(TH_LWUD)
20274 NODE_NAME_CASE(TH_LDD)
20275 NODE_NAME_CASE(TH_SWD)
20276 NODE_NAME_CASE(TH_SDD)
20277 NODE_NAME_CASE(VMV_V_V_VL)
20278 NODE_NAME_CASE(VMV_V_X_VL)
20279 NODE_NAME_CASE(VFMV_V_F_VL)
20280 NODE_NAME_CASE(VMV_X_S)
20281 NODE_NAME_CASE(VMV_S_X_VL)
20282 NODE_NAME_CASE(VFMV_S_F_VL)
20283 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20284 NODE_NAME_CASE(READ_VLENB)
20285 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20286 NODE_NAME_CASE(VSLIDEUP_VL)
20287 NODE_NAME_CASE(VSLIDE1UP_VL)
20288 NODE_NAME_CASE(VSLIDEDOWN_VL)
20289 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20290 NODE_NAME_CASE(VFSLIDE1UP_VL)
20291 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20292 NODE_NAME_CASE(VID_VL)
20293 NODE_NAME_CASE(VFNCVT_ROD_VL)
20294 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20295 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20296 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20297 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20298 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20299 NODE_NAME_CASE(VECREDUCE_AND_VL)
20300 NODE_NAME_CASE(VECREDUCE_OR_VL)
20301 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20302 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20303 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20304 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20305 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20306 NODE_NAME_CASE(ADD_VL)
20307 NODE_NAME_CASE(AND_VL)
20308 NODE_NAME_CASE(MUL_VL)
20309 NODE_NAME_CASE(OR_VL)
20310 NODE_NAME_CASE(SDIV_VL)
20311 NODE_NAME_CASE(SHL_VL)
20312 NODE_NAME_CASE(SREM_VL)
20313 NODE_NAME_CASE(SRA_VL)
20314 NODE_NAME_CASE(SRL_VL)
20315 NODE_NAME_CASE(ROTL_VL)
20316 NODE_NAME_CASE(ROTR_VL)
20317 NODE_NAME_CASE(SUB_VL)
20318 NODE_NAME_CASE(UDIV_VL)
20319 NODE_NAME_CASE(UREM_VL)
20320 NODE_NAME_CASE(XOR_VL)
20321 NODE_NAME_CASE(AVGFLOORS_VL)
20322 NODE_NAME_CASE(AVGFLOORU_VL)
20323 NODE_NAME_CASE(AVGCEILS_VL)
20324 NODE_NAME_CASE(AVGCEILU_VL)
20325 NODE_NAME_CASE(SADDSAT_VL)
20326 NODE_NAME_CASE(UADDSAT_VL)
20327 NODE_NAME_CASE(SSUBSAT_VL)
20328 NODE_NAME_CASE(USUBSAT_VL)
20329 NODE_NAME_CASE(VNCLIP_VL)
20330 NODE_NAME_CASE(VNCLIPU_VL)
20331 NODE_NAME_CASE(FADD_VL)
20332 NODE_NAME_CASE(FSUB_VL)
20333 NODE_NAME_CASE(FMUL_VL)
20334 NODE_NAME_CASE(FDIV_VL)
20335 NODE_NAME_CASE(FNEG_VL)
20336 NODE_NAME_CASE(FABS_VL)
20337 NODE_NAME_CASE(FSQRT_VL)
20338 NODE_NAME_CASE(FCLASS_VL)
20339 NODE_NAME_CASE(VFMADD_VL)
20340 NODE_NAME_CASE(VFNMADD_VL)
20341 NODE_NAME_CASE(VFMSUB_VL)
20342 NODE_NAME_CASE(VFNMSUB_VL)
20343 NODE_NAME_CASE(VFWMADD_VL)
20344 NODE_NAME_CASE(VFWNMADD_VL)
20345 NODE_NAME_CASE(VFWMSUB_VL)
20346 NODE_NAME_CASE(VFWNMSUB_VL)
20347 NODE_NAME_CASE(FCOPYSIGN_VL)
20348 NODE_NAME_CASE(SMIN_VL)
20349 NODE_NAME_CASE(SMAX_VL)
20350 NODE_NAME_CASE(UMIN_VL)
20351 NODE_NAME_CASE(UMAX_VL)
20352 NODE_NAME_CASE(BITREVERSE_VL)
20353 NODE_NAME_CASE(BSWAP_VL)
20354 NODE_NAME_CASE(CTLZ_VL)
20355 NODE_NAME_CASE(CTTZ_VL)
20356 NODE_NAME_CASE(CTPOP_VL)
20357 NODE_NAME_CASE(VFMIN_VL)
20358 NODE_NAME_CASE(VFMAX_VL)
20359 NODE_NAME_CASE(MULHS_VL)
20360 NODE_NAME_CASE(MULHU_VL)
20361 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20362 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20363 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20364 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20365 NODE_NAME_CASE(VFCVT_X_F_VL)
20366 NODE_NAME_CASE(VFCVT_XU_F_VL)
20367 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20368 NODE_NAME_CASE(SINT_TO_FP_VL)
20369 NODE_NAME_CASE(UINT_TO_FP_VL)
20370 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20371 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20372 NODE_NAME_CASE(FP_EXTEND_VL)
20373 NODE_NAME_CASE(FP_ROUND_VL)
20374 NODE_NAME_CASE(STRICT_FADD_VL)
20375 NODE_NAME_CASE(STRICT_FSUB_VL)
20376 NODE_NAME_CASE(STRICT_FMUL_VL)
20377 NODE_NAME_CASE(STRICT_FDIV_VL)
20378 NODE_NAME_CASE(STRICT_FSQRT_VL)
20379 NODE_NAME_CASE(STRICT_VFMADD_VL)
20380 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20381 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20382 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20383 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20384 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20385 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20386 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20387 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20388 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20389 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20390 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20391 NODE_NAME_CASE(STRICT_FSETCC_VL)
20392 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20393 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20394 NODE_NAME_CASE(VWMUL_VL)
20395 NODE_NAME_CASE(VWMULU_VL)
20396 NODE_NAME_CASE(VWMULSU_VL)
20397 NODE_NAME_CASE(VWADD_VL)
20398 NODE_NAME_CASE(VWADDU_VL)
20399 NODE_NAME_CASE(VWSUB_VL)
20400 NODE_NAME_CASE(VWSUBU_VL)
20401 NODE_NAME_CASE(VWADD_W_VL)
20402 NODE_NAME_CASE(VWADDU_W_VL)
20403 NODE_NAME_CASE(VWSUB_W_VL)
20404 NODE_NAME_CASE(VWSUBU_W_VL)
20405 NODE_NAME_CASE(VWSLL_VL)
20406 NODE_NAME_CASE(VFWMUL_VL)
20407 NODE_NAME_CASE(VFWADD_VL)
20408 NODE_NAME_CASE(VFWSUB_VL)
20409 NODE_NAME_CASE(VFWADD_W_VL)
20410 NODE_NAME_CASE(VFWSUB_W_VL)
20411 NODE_NAME_CASE(VWMACC_VL)
20412 NODE_NAME_CASE(VWMACCU_VL)
20413 NODE_NAME_CASE(VWMACCSU_VL)
20414 NODE_NAME_CASE(VNSRL_VL)
20415 NODE_NAME_CASE(SETCC_VL)
20416 NODE_NAME_CASE(VMERGE_VL)
20417 NODE_NAME_CASE(VMAND_VL)
20418 NODE_NAME_CASE(VMOR_VL)
20419 NODE_NAME_CASE(VMXOR_VL)
20420 NODE_NAME_CASE(VMCLR_VL)
20421 NODE_NAME_CASE(VMSET_VL)
20422 NODE_NAME_CASE(VRGATHER_VX_VL)
20423 NODE_NAME_CASE(VRGATHER_VV_VL)
20424 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20425 NODE_NAME_CASE(VSEXT_VL)
20426 NODE_NAME_CASE(VZEXT_VL)
20427 NODE_NAME_CASE(VCPOP_VL)
20428 NODE_NAME_CASE(VFIRST_VL)
20429 NODE_NAME_CASE(READ_CSR)
20430 NODE_NAME_CASE(WRITE_CSR)
20431 NODE_NAME_CASE(SWAP_CSR)
20432 NODE_NAME_CASE(CZERO_EQZ)
20433 NODE_NAME_CASE(CZERO_NEZ)
20434 NODE_NAME_CASE(SW_GUARDED_BRIND)
20435 NODE_NAME_CASE(SF_VC_XV_SE)
20436 NODE_NAME_CASE(SF_VC_IV_SE)
20437 NODE_NAME_CASE(SF_VC_VV_SE)
20438 NODE_NAME_CASE(SF_VC_FV_SE)
20439 NODE_NAME_CASE(SF_VC_XVV_SE)
20440 NODE_NAME_CASE(SF_VC_IVV_SE)
20441 NODE_NAME_CASE(SF_VC_VVV_SE)
20442 NODE_NAME_CASE(SF_VC_FVV_SE)
20443 NODE_NAME_CASE(SF_VC_XVW_SE)
20444 NODE_NAME_CASE(SF_VC_IVW_SE)
20445 NODE_NAME_CASE(SF_VC_VVW_SE)
20446 NODE_NAME_CASE(SF_VC_FVW_SE)
20447 NODE_NAME_CASE(SF_VC_V_X_SE)
20448 NODE_NAME_CASE(SF_VC_V_I_SE)
20449 NODE_NAME_CASE(SF_VC_V_XV_SE)
20450 NODE_NAME_CASE(SF_VC_V_IV_SE)
20451 NODE_NAME_CASE(SF_VC_V_VV_SE)
20452 NODE_NAME_CASE(SF_VC_V_FV_SE)
20453 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20454 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20455 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20456 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20457 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20458 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20459 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20460 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20461 }
20462 // clang-format on
20463 return nullptr;
20464#undef NODE_NAME_CASE
20465}
20466
20467/// getConstraintType - Given a constraint letter, return the type of
20468/// constraint it is for this target.
20471 if (Constraint.size() == 1) {
20472 switch (Constraint[0]) {
20473 default:
20474 break;
20475 case 'f':
20476 return C_RegisterClass;
20477 case 'I':
20478 case 'J':
20479 case 'K':
20480 return C_Immediate;
20481 case 'A':
20482 return C_Memory;
20483 case 's':
20484 case 'S': // A symbolic address
20485 return C_Other;
20486 }
20487 } else {
20488 if (Constraint == "vr" || Constraint == "vm")
20489 return C_RegisterClass;
20490 }
20491 return TargetLowering::getConstraintType(Constraint);
20492}
20493
20494std::pair<unsigned, const TargetRegisterClass *>
20496 StringRef Constraint,
20497 MVT VT) const {
20498 // First, see if this is a constraint that directly corresponds to a RISC-V
20499 // register class.
20500 if (Constraint.size() == 1) {
20501 switch (Constraint[0]) {
20502 case 'r':
20503 // TODO: Support fixed vectors up to XLen for P extension?
20504 if (VT.isVector())
20505 break;
20506 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20507 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20508 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20509 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20510 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20511 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20512 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20513 case 'f':
20514 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20515 return std::make_pair(0U, &RISCV::FPR16RegClass);
20516 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20517 return std::make_pair(0U, &RISCV::FPR32RegClass);
20518 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20519 return std::make_pair(0U, &RISCV::FPR64RegClass);
20520 break;
20521 default:
20522 break;
20523 }
20524 } else if (Constraint == "vr") {
20525 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20526 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20527 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20528 return std::make_pair(0U, RC);
20529 }
20530 } else if (Constraint == "vm") {
20531 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20532 return std::make_pair(0U, &RISCV::VMV0RegClass);
20533 }
20534
20535 // Clang will correctly decode the usage of register name aliases into their
20536 // official names. However, other frontends like `rustc` do not. This allows
20537 // users of these frontends to use the ABI names for registers in LLVM-style
20538 // register constraints.
20539 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20540 .Case("{zero}", RISCV::X0)
20541 .Case("{ra}", RISCV::X1)
20542 .Case("{sp}", RISCV::X2)
20543 .Case("{gp}", RISCV::X3)
20544 .Case("{tp}", RISCV::X4)
20545 .Case("{t0}", RISCV::X5)
20546 .Case("{t1}", RISCV::X6)
20547 .Case("{t2}", RISCV::X7)
20548 .Cases("{s0}", "{fp}", RISCV::X8)
20549 .Case("{s1}", RISCV::X9)
20550 .Case("{a0}", RISCV::X10)
20551 .Case("{a1}", RISCV::X11)
20552 .Case("{a2}", RISCV::X12)
20553 .Case("{a3}", RISCV::X13)
20554 .Case("{a4}", RISCV::X14)
20555 .Case("{a5}", RISCV::X15)
20556 .Case("{a6}", RISCV::X16)
20557 .Case("{a7}", RISCV::X17)
20558 .Case("{s2}", RISCV::X18)
20559 .Case("{s3}", RISCV::X19)
20560 .Case("{s4}", RISCV::X20)
20561 .Case("{s5}", RISCV::X21)
20562 .Case("{s6}", RISCV::X22)
20563 .Case("{s7}", RISCV::X23)
20564 .Case("{s8}", RISCV::X24)
20565 .Case("{s9}", RISCV::X25)
20566 .Case("{s10}", RISCV::X26)
20567 .Case("{s11}", RISCV::X27)
20568 .Case("{t3}", RISCV::X28)
20569 .Case("{t4}", RISCV::X29)
20570 .Case("{t5}", RISCV::X30)
20571 .Case("{t6}", RISCV::X31)
20572 .Default(RISCV::NoRegister);
20573 if (XRegFromAlias != RISCV::NoRegister)
20574 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20575
20576 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20577 // TableGen record rather than the AsmName to choose registers for InlineAsm
20578 // constraints, plus we want to match those names to the widest floating point
20579 // register type available, manually select floating point registers here.
20580 //
20581 // The second case is the ABI name of the register, so that frontends can also
20582 // use the ABI names in register constraint lists.
20583 if (Subtarget.hasStdExtF()) {
20584 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20585 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20586 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20587 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20588 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20589 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20590 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20591 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20592 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20593 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20594 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20595 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20596 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20597 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20598 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20599 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20600 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20601 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20602 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20603 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20604 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20605 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20606 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20607 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20608 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20609 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20610 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20611 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20612 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20613 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20614 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20615 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20616 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20617 .Default(RISCV::NoRegister);
20618 if (FReg != RISCV::NoRegister) {
20619 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20620 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20621 unsigned RegNo = FReg - RISCV::F0_F;
20622 unsigned DReg = RISCV::F0_D + RegNo;
20623 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20624 }
20625 if (VT == MVT::f32 || VT == MVT::Other)
20626 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20627 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20628 unsigned RegNo = FReg - RISCV::F0_F;
20629 unsigned HReg = RISCV::F0_H + RegNo;
20630 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20631 }
20632 }
20633 }
20634
20635 if (Subtarget.hasVInstructions()) {
20636 Register VReg = StringSwitch<Register>(Constraint.lower())
20637 .Case("{v0}", RISCV::V0)
20638 .Case("{v1}", RISCV::V1)
20639 .Case("{v2}", RISCV::V2)
20640 .Case("{v3}", RISCV::V3)
20641 .Case("{v4}", RISCV::V4)
20642 .Case("{v5}", RISCV::V5)
20643 .Case("{v6}", RISCV::V6)
20644 .Case("{v7}", RISCV::V7)
20645 .Case("{v8}", RISCV::V8)
20646 .Case("{v9}", RISCV::V9)
20647 .Case("{v10}", RISCV::V10)
20648 .Case("{v11}", RISCV::V11)
20649 .Case("{v12}", RISCV::V12)
20650 .Case("{v13}", RISCV::V13)
20651 .Case("{v14}", RISCV::V14)
20652 .Case("{v15}", RISCV::V15)
20653 .Case("{v16}", RISCV::V16)
20654 .Case("{v17}", RISCV::V17)
20655 .Case("{v18}", RISCV::V18)
20656 .Case("{v19}", RISCV::V19)
20657 .Case("{v20}", RISCV::V20)
20658 .Case("{v21}", RISCV::V21)
20659 .Case("{v22}", RISCV::V22)
20660 .Case("{v23}", RISCV::V23)
20661 .Case("{v24}", RISCV::V24)
20662 .Case("{v25}", RISCV::V25)
20663 .Case("{v26}", RISCV::V26)
20664 .Case("{v27}", RISCV::V27)
20665 .Case("{v28}", RISCV::V28)
20666 .Case("{v29}", RISCV::V29)
20667 .Case("{v30}", RISCV::V30)
20668 .Case("{v31}", RISCV::V31)
20669 .Default(RISCV::NoRegister);
20670 if (VReg != RISCV::NoRegister) {
20671 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20672 return std::make_pair(VReg, &RISCV::VMRegClass);
20673 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20674 return std::make_pair(VReg, &RISCV::VRRegClass);
20675 for (const auto *RC :
20676 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20677 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20678 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20679 return std::make_pair(VReg, RC);
20680 }
20681 }
20682 }
20683 }
20684
20685 std::pair<Register, const TargetRegisterClass *> Res =
20687
20688 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20689 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20690 // Subtarget into account.
20691 if (Res.second == &RISCV::GPRF16RegClass ||
20692 Res.second == &RISCV::GPRF32RegClass ||
20693 Res.second == &RISCV::GPRPairRegClass)
20694 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20695
20696 return Res;
20697}
20698
20701 // Currently only support length 1 constraints.
20702 if (ConstraintCode.size() == 1) {
20703 switch (ConstraintCode[0]) {
20704 case 'A':
20706 default:
20707 break;
20708 }
20709 }
20710
20711 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20712}
20713
20715 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20716 SelectionDAG &DAG) const {
20717 // Currently only support length 1 constraints.
20718 if (Constraint.size() == 1) {
20719 switch (Constraint[0]) {
20720 case 'I':
20721 // Validate & create a 12-bit signed immediate operand.
20722 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20723 uint64_t CVal = C->getSExtValue();
20724 if (isInt<12>(CVal))
20725 Ops.push_back(
20726 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20727 }
20728 return;
20729 case 'J':
20730 // Validate & create an integer zero operand.
20731 if (isNullConstant(Op))
20732 Ops.push_back(
20733 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20734 return;
20735 case 'K':
20736 // Validate & create a 5-bit unsigned immediate operand.
20737 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20738 uint64_t CVal = C->getZExtValue();
20739 if (isUInt<5>(CVal))
20740 Ops.push_back(
20741 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20742 }
20743 return;
20744 case 'S':
20746 return;
20747 default:
20748 break;
20749 }
20750 }
20751 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20752}
20753
20755 Instruction *Inst,
20756 AtomicOrdering Ord) const {
20757 if (Subtarget.hasStdExtZtso()) {
20758 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20759 return Builder.CreateFence(Ord);
20760 return nullptr;
20761 }
20762
20763 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20764 return Builder.CreateFence(Ord);
20765 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20766 return Builder.CreateFence(AtomicOrdering::Release);
20767 return nullptr;
20768}
20769
20771 Instruction *Inst,
20772 AtomicOrdering Ord) const {
20773 if (Subtarget.hasStdExtZtso()) {
20774 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20775 return Builder.CreateFence(Ord);
20776 return nullptr;
20777 }
20778
20779 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20780 return Builder.CreateFence(AtomicOrdering::Acquire);
20781 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20784 return nullptr;
20785}
20786
20789 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20790 // point operations can't be used in an lr/sc sequence without breaking the
20791 // forward-progress guarantee.
20792 if (AI->isFloatingPointOperation() ||
20796
20797 // Don't expand forced atomics, we want to have __sync libcalls instead.
20798 if (Subtarget.hasForcedAtomics())
20800
20801 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20802 if (AI->getOperation() == AtomicRMWInst::Nand) {
20803 if (Subtarget.hasStdExtZacas() &&
20804 (Size >= 32 || Subtarget.hasStdExtZabha()))
20806 if (Size < 32)
20808 }
20809
20810 if (Size < 32 && !Subtarget.hasStdExtZabha())
20812
20814}
20815
20816static Intrinsic::ID
20818 if (XLen == 32) {
20819 switch (BinOp) {
20820 default:
20821 llvm_unreachable("Unexpected AtomicRMW BinOp");
20823 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20824 case AtomicRMWInst::Add:
20825 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20826 case AtomicRMWInst::Sub:
20827 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20829 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20830 case AtomicRMWInst::Max:
20831 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20832 case AtomicRMWInst::Min:
20833 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20835 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20837 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20838 }
20839 }
20840
20841 if (XLen == 64) {
20842 switch (BinOp) {
20843 default:
20844 llvm_unreachable("Unexpected AtomicRMW BinOp");
20846 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20847 case AtomicRMWInst::Add:
20848 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20849 case AtomicRMWInst::Sub:
20850 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20852 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20853 case AtomicRMWInst::Max:
20854 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20855 case AtomicRMWInst::Min:
20856 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20858 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20860 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20861 }
20862 }
20863
20864 llvm_unreachable("Unexpected XLen\n");
20865}
20866
20868 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20869 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20870 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20871 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20872 // mask, as this produces better code than the LR/SC loop emitted by
20873 // int_riscv_masked_atomicrmw_xchg.
20874 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20875 isa<ConstantInt>(AI->getValOperand())) {
20876 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20877 if (CVal->isZero())
20878 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20879 Builder.CreateNot(Mask, "Inv_Mask"),
20880 AI->getAlign(), Ord);
20881 if (CVal->isMinusOne())
20882 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20883 AI->getAlign(), Ord);
20884 }
20885
20886 unsigned XLen = Subtarget.getXLen();
20887 Value *Ordering =
20888 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20889 Type *Tys[] = {AlignedAddr->getType()};
20890 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20891 AI->getModule(),
20893
20894 if (XLen == 64) {
20895 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20896 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20897 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20898 }
20899
20900 Value *Result;
20901
20902 // Must pass the shift amount needed to sign extend the loaded value prior
20903 // to performing a signed comparison for min/max. ShiftAmt is the number of
20904 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20905 // is the number of bits to left+right shift the value in order to
20906 // sign-extend.
20907 if (AI->getOperation() == AtomicRMWInst::Min ||
20909 const DataLayout &DL = AI->getDataLayout();
20910 unsigned ValWidth =
20911 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20912 Value *SextShamt =
20913 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20914 Result = Builder.CreateCall(LrwOpScwLoop,
20915 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20916 } else {
20917 Result =
20918 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20919 }
20920
20921 if (XLen == 64)
20922 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20923 return Result;
20924}
20925
20928 AtomicCmpXchgInst *CI) const {
20929 // Don't expand forced atomics, we want to have __sync libcalls instead.
20930 if (Subtarget.hasForcedAtomics())
20932
20934 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20935 (Size == 8 || Size == 16))
20938}
20939
20941 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20942 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20943 unsigned XLen = Subtarget.getXLen();
20944 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20945 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20946 if (XLen == 64) {
20947 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20948 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20949 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20950 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20951 }
20952 Type *Tys[] = {AlignedAddr->getType()};
20953 Function *MaskedCmpXchg =
20954 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20955 Value *Result = Builder.CreateCall(
20956 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20957 if (XLen == 64)
20958 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20959 return Result;
20960}
20961
20963 EVT DataVT) const {
20964 // We have indexed loads for all supported EEW types. Indices are always
20965 // zero extended.
20966 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20967 isTypeLegal(Extend.getValueType()) &&
20968 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20969 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20970}
20971
20973 EVT VT) const {
20974 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20975 return false;
20976
20977 switch (FPVT.getSimpleVT().SimpleTy) {
20978 case MVT::f16:
20979 return Subtarget.hasStdExtZfhmin();
20980 case MVT::f32:
20981 return Subtarget.hasStdExtF();
20982 case MVT::f64:
20983 return Subtarget.hasStdExtD();
20984 default:
20985 return false;
20986 }
20987}
20988
20990 // If we are using the small code model, we can reduce size of jump table
20991 // entry to 4 bytes.
20992 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20995 }
20997}
20998
21000 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21001 unsigned uid, MCContext &Ctx) const {
21002 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21004 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21005}
21006
21008 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21009 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21010 // a power of two as well.
21011 // FIXME: This doesn't work for zve32, but that's already broken
21012 // elsewhere for the same reason.
21013 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21014 static_assert(RISCV::RVVBitsPerBlock == 64,
21015 "RVVBitsPerBlock changed, audit needed");
21016 return true;
21017}
21018
21020 SDValue &Offset,
21022 SelectionDAG &DAG) const {
21023 // Target does not support indexed loads.
21024 if (!Subtarget.hasVendorXTHeadMemIdx())
21025 return false;
21026
21027 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21028 return false;
21029
21030 Base = Op->getOperand(0);
21031 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21032 int64_t RHSC = RHS->getSExtValue();
21033 if (Op->getOpcode() == ISD::SUB)
21034 RHSC = -(uint64_t)RHSC;
21035
21036 // The constants that can be encoded in the THeadMemIdx instructions
21037 // are of the form (sign_extend(imm5) << imm2).
21038 bool isLegalIndexedOffset = false;
21039 for (unsigned i = 0; i < 4; i++)
21040 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21041 isLegalIndexedOffset = true;
21042 break;
21043 }
21044
21045 if (!isLegalIndexedOffset)
21046 return false;
21047
21048 Offset = Op->getOperand(1);
21049 return true;
21050 }
21051
21052 return false;
21053}
21054
21056 SDValue &Offset,
21058 SelectionDAG &DAG) const {
21059 EVT VT;
21060 SDValue Ptr;
21061 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21062 VT = LD->getMemoryVT();
21063 Ptr = LD->getBasePtr();
21064 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21065 VT = ST->getMemoryVT();
21066 Ptr = ST->getBasePtr();
21067 } else
21068 return false;
21069
21070 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21071 return false;
21072
21073 AM = ISD::PRE_INC;
21074 return true;
21075}
21076
21078 SDValue &Base,
21079 SDValue &Offset,
21081 SelectionDAG &DAG) const {
21082 if (Subtarget.hasVendorXCVmem()) {
21083 if (Op->getOpcode() != ISD::ADD)
21084 return false;
21085
21086 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21087 Base = LS->getBasePtr();
21088 else
21089 return false;
21090
21091 if (Base == Op->getOperand(0))
21092 Offset = Op->getOperand(1);
21093 else if (Base == Op->getOperand(1))
21094 Offset = Op->getOperand(0);
21095 else
21096 return false;
21097
21098 AM = ISD::POST_INC;
21099 return true;
21100 }
21101
21102 EVT VT;
21103 SDValue Ptr;
21104 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21105 VT = LD->getMemoryVT();
21106 Ptr = LD->getBasePtr();
21107 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21108 VT = ST->getMemoryVT();
21109 Ptr = ST->getBasePtr();
21110 } else
21111 return false;
21112
21113 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21114 return false;
21115 // Post-indexing updates the base, so it's not a valid transform
21116 // if that's not the same as the load's pointer.
21117 if (Ptr != Base)
21118 return false;
21119
21120 AM = ISD::POST_INC;
21121 return true;
21122}
21123
21125 EVT VT) const {
21126 EVT SVT = VT.getScalarType();
21127
21128 if (!SVT.isSimple())
21129 return false;
21130
21131 switch (SVT.getSimpleVT().SimpleTy) {
21132 case MVT::f16:
21133 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21134 : Subtarget.hasStdExtZfhOrZhinx();
21135 case MVT::f32:
21136 return Subtarget.hasStdExtFOrZfinx();
21137 case MVT::f64:
21138 return Subtarget.hasStdExtDOrZdinx();
21139 default:
21140 break;
21141 }
21142
21143 return false;
21144}
21145
21147 // Zacas will use amocas.w which does not require extension.
21148 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21149}
21150
21152 const Constant *PersonalityFn) const {
21153 return RISCV::X10;
21154}
21155
21157 const Constant *PersonalityFn) const {
21158 return RISCV::X11;
21159}
21160
21162 // Return false to suppress the unnecessary extensions if the LibCall
21163 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21164 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21165 Type.getSizeInBits() < Subtarget.getXLen()))
21166 return false;
21167
21168 return true;
21169}
21170
21172 if (Subtarget.is64Bit() && Type == MVT::i32)
21173 return true;
21174
21175 return IsSigned;
21176}
21177
21179 SDValue C) const {
21180 // Check integral scalar types.
21181 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21182 if (!VT.isScalarInteger())
21183 return false;
21184
21185 // Omit the optimization if the sub target has the M extension and the data
21186 // size exceeds XLen.
21187 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21188 return false;
21189
21190 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
21191 // Break the MUL to a SLLI and an ADD/SUB.
21192 const APInt &Imm = ConstNode->getAPIntValue();
21193 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21194 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21195 return true;
21196
21197 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21198 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21199 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21200 (Imm - 8).isPowerOf2()))
21201 return true;
21202
21203 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21204 // a pair of LUI/ADDI.
21205 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21206 ConstNode->hasOneUse()) {
21207 APInt ImmS = Imm.ashr(Imm.countr_zero());
21208 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21209 (1 - ImmS).isPowerOf2())
21210 return true;
21211 }
21212 }
21213
21214 return false;
21215}
21216
21218 SDValue ConstNode) const {
21219 // Let the DAGCombiner decide for vectors.
21220 EVT VT = AddNode.getValueType();
21221 if (VT.isVector())
21222 return true;
21223
21224 // Let the DAGCombiner decide for larger types.
21225 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21226 return true;
21227
21228 // It is worse if c1 is simm12 while c1*c2 is not.
21229 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21230 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21231 const APInt &C1 = C1Node->getAPIntValue();
21232 const APInt &C2 = C2Node->getAPIntValue();
21233 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21234 return false;
21235
21236 // Default to true and let the DAGCombiner decide.
21237 return true;
21238}
21239
21241 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21242 unsigned *Fast) const {
21243 if (!VT.isVector()) {
21244 if (Fast)
21245 *Fast = Subtarget.enableUnalignedScalarMem();
21246 return Subtarget.enableUnalignedScalarMem();
21247 }
21248
21249 // All vector implementations must support element alignment
21250 EVT ElemVT = VT.getVectorElementType();
21251 if (Alignment >= ElemVT.getStoreSize()) {
21252 if (Fast)
21253 *Fast = 1;
21254 return true;
21255 }
21256
21257 // Note: We lower an unmasked unaligned vector access to an equally sized
21258 // e8 element type access. Given this, we effectively support all unmasked
21259 // misaligned accesses. TODO: Work through the codegen implications of
21260 // allowing such accesses to be formed, and considered fast.
21261 if (Fast)
21262 *Fast = Subtarget.enableUnalignedVectorMem();
21263 return Subtarget.enableUnalignedVectorMem();
21264}
21265
21266
21268 const AttributeList &FuncAttributes) const {
21269 if (!Subtarget.hasVInstructions())
21270 return MVT::Other;
21271
21272 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21273 return MVT::Other;
21274
21275 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21276 // has an expansion threshold, and we want the number of hardware memory
21277 // operations to correspond roughly to that threshold. LMUL>1 operations
21278 // are typically expanded linearly internally, and thus correspond to more
21279 // than one actual memory operation. Note that store merging and load
21280 // combining will typically form larger LMUL operations from the LMUL1
21281 // operations emitted here, and that's okay because combining isn't
21282 // introducing new memory operations; it's just merging existing ones.
21283 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21284 if (Op.size() < MinVLenInBytes)
21285 // TODO: Figure out short memops. For the moment, do the default thing
21286 // which ends up using scalar sequences.
21287 return MVT::Other;
21288
21289 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21290 // a large scalar constant and instead use vmv.v.x/i to do the
21291 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21292 // maximize the chance we can encode the size in the vsetvli.
21293 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21294 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21295
21296 // Do we have sufficient alignment for our preferred VT? If not, revert
21297 // to largest size allowed by our alignment criteria.
21298 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21299 Align RequiredAlign(PreferredVT.getStoreSize());
21300 if (Op.isFixedDstAlign())
21301 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21302 if (Op.isMemcpy())
21303 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21304 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21305 }
21306 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21307}
21308
21310 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21311 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21312 bool IsABIRegCopy = CC.has_value();
21313 EVT ValueVT = Val.getValueType();
21314 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21315 PartVT == MVT::f32) {
21316 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21317 // nan, and cast to f32.
21318 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21319 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21320 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21321 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21322 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21323 Parts[0] = Val;
21324 return true;
21325 }
21326
21327 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21328 LLVMContext &Context = *DAG.getContext();
21329 EVT ValueEltVT = ValueVT.getVectorElementType();
21330 EVT PartEltVT = PartVT.getVectorElementType();
21331 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21332 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21333 if (PartVTBitSize % ValueVTBitSize == 0) {
21334 assert(PartVTBitSize >= ValueVTBitSize);
21335 // If the element types are different, bitcast to the same element type of
21336 // PartVT first.
21337 // Give an example here, we want copy a <vscale x 1 x i8> value to
21338 // <vscale x 4 x i16>.
21339 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21340 // subvector, then we can bitcast to <vscale x 4 x i16>.
21341 if (ValueEltVT != PartEltVT) {
21342 if (PartVTBitSize > ValueVTBitSize) {
21343 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21344 assert(Count != 0 && "The number of element should not be zero.");
21345 EVT SameEltTypeVT =
21346 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21347 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21348 DAG.getUNDEF(SameEltTypeVT), Val,
21349 DAG.getVectorIdxConstant(0, DL));
21350 }
21351 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21352 } else {
21353 Val =
21354 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21355 Val, DAG.getVectorIdxConstant(0, DL));
21356 }
21357 Parts[0] = Val;
21358 return true;
21359 }
21360 }
21361 return false;
21362}
21363
21365 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21366 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21367 bool IsABIRegCopy = CC.has_value();
21368 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21369 PartVT == MVT::f32) {
21370 SDValue Val = Parts[0];
21371
21372 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21373 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21374 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21375 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21376 return Val;
21377 }
21378
21379 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21380 LLVMContext &Context = *DAG.getContext();
21381 SDValue Val = Parts[0];
21382 EVT ValueEltVT = ValueVT.getVectorElementType();
21383 EVT PartEltVT = PartVT.getVectorElementType();
21384 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21385 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21386 if (PartVTBitSize % ValueVTBitSize == 0) {
21387 assert(PartVTBitSize >= ValueVTBitSize);
21388 EVT SameEltTypeVT = ValueVT;
21389 // If the element types are different, convert it to the same element type
21390 // of PartVT.
21391 // Give an example here, we want copy a <vscale x 1 x i8> value from
21392 // <vscale x 4 x i16>.
21393 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21394 // then we can extract <vscale x 1 x i8>.
21395 if (ValueEltVT != PartEltVT) {
21396 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21397 assert(Count != 0 && "The number of element should not be zero.");
21398 SameEltTypeVT =
21399 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21400 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21401 }
21402 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21403 DAG.getVectorIdxConstant(0, DL));
21404 return Val;
21405 }
21406 }
21407 return SDValue();
21408}
21409
21411 // When aggressively optimizing for code size, we prefer to use a div
21412 // instruction, as it is usually smaller than the alternative sequence.
21413 // TODO: Add vector division?
21414 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21415 return OptSize && !VT.isVector();
21416}
21417
21419 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21420 // some situation.
21421 unsigned Opc = N->getOpcode();
21422 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21423 return false;
21424 return true;
21425}
21426
21427static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21428 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21429 Function *ThreadPointerFunc =
21430 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21431 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21432 IRB.CreateCall(ThreadPointerFunc), Offset);
21433}
21434
21436 // Fuchsia provides a fixed TLS slot for the stack cookie.
21437 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21438 if (Subtarget.isTargetFuchsia())
21439 return useTpOffset(IRB, -0x10);
21440
21441 // Android provides a fixed TLS slot for the stack cookie. See the definition
21442 // of TLS_SLOT_STACK_GUARD in
21443 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21444 if (Subtarget.isTargetAndroid())
21445 return useTpOffset(IRB, -0x18);
21446
21448}
21449
21451 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21452 const DataLayout &DL) const {
21453 EVT VT = getValueType(DL, VTy);
21454 // Don't lower vlseg/vsseg for vector types that can't be split.
21455 if (!isTypeLegal(VT))
21456 return false;
21457
21459 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21460 Alignment))
21461 return false;
21462
21463 MVT ContainerVT = VT.getSimpleVT();
21464
21465 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21466 if (!Subtarget.useRVVForFixedLengthVectors())
21467 return false;
21468 // Sometimes the interleaved access pass picks up splats as interleaves of
21469 // one element. Don't lower these.
21470 if (FVTy->getNumElements() < 2)
21471 return false;
21472
21474 } else {
21475 // The intrinsics for scalable vectors are not overloaded on pointer type
21476 // and can only handle the default address space.
21477 if (AddrSpace)
21478 return false;
21479 }
21480
21481 // Need to make sure that EMUL * NFIELDS ≤ 8
21482 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21483 if (Fractional)
21484 return true;
21485 return Factor * LMUL <= 8;
21486}
21487
21489 Align Alignment) const {
21490 if (!Subtarget.hasVInstructions())
21491 return false;
21492
21493 // Only support fixed vectors if we know the minimum vector size.
21494 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21495 return false;
21496
21497 EVT ScalarType = DataType.getScalarType();
21498 if (!isLegalElementTypeForRVV(ScalarType))
21499 return false;
21500
21501 if (!Subtarget.enableUnalignedVectorMem() &&
21502 Alignment < ScalarType.getStoreSize())
21503 return false;
21504
21505 return true;
21506}
21507
21509 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21510 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21511 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21512 Intrinsic::riscv_seg8_load};
21513
21514/// Lower an interleaved load into a vlsegN intrinsic.
21515///
21516/// E.g. Lower an interleaved load (Factor = 2):
21517/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21518/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21519/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21520///
21521/// Into:
21522/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21523/// %ptr, i64 4)
21524/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21525/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21528 ArrayRef<unsigned> Indices, unsigned Factor) const {
21529 IRBuilder<> Builder(LI);
21530
21531 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21532 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21534 LI->getDataLayout()))
21535 return false;
21536
21537 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21538
21539 Function *VlsegNFunc =
21541 {VTy, LI->getPointerOperandType(), XLenTy});
21542
21543 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21544
21545 CallInst *VlsegN =
21546 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21547
21548 for (unsigned i = 0; i < Shuffles.size(); i++) {
21549 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21550 Shuffles[i]->replaceAllUsesWith(SubVec);
21551 }
21552
21553 return true;
21554}
21555
21557 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21558 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21559 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21560 Intrinsic::riscv_seg8_store};
21561
21562/// Lower an interleaved store into a vssegN intrinsic.
21563///
21564/// E.g. Lower an interleaved store (Factor = 3):
21565/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21566/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21567/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21568///
21569/// Into:
21570/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21571/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21572/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21573/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21574/// %ptr, i32 4)
21575///
21576/// Note that the new shufflevectors will be removed and we'll only generate one
21577/// vsseg3 instruction in CodeGen.
21579 ShuffleVectorInst *SVI,
21580 unsigned Factor) const {
21581 IRBuilder<> Builder(SI);
21582 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21583 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21584 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21585 ShuffleVTy->getNumElements() / Factor);
21586 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21587 SI->getPointerAddressSpace(),
21588 SI->getDataLayout()))
21589 return false;
21590
21591 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21592
21593 Function *VssegNFunc =
21594 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21595 {VTy, SI->getPointerOperandType(), XLenTy});
21596
21597 auto Mask = SVI->getShuffleMask();
21599
21600 for (unsigned i = 0; i < Factor; i++) {
21601 Value *Shuffle = Builder.CreateShuffleVector(
21602 SVI->getOperand(0), SVI->getOperand(1),
21603 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21604 Ops.push_back(Shuffle);
21605 }
21606 // This VL should be OK (should be executable in one vsseg instruction,
21607 // potentially under larger LMULs) because we checked that the fixed vector
21608 // type fits in isLegalInterleavedAccessType
21609 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21610 Ops.append({SI->getPointerOperand(), VL});
21611
21612 Builder.CreateCall(VssegNFunc, Ops);
21613
21614 return true;
21615}
21616
21618 LoadInst *LI) const {
21619 assert(LI->isSimple());
21620 IRBuilder<> Builder(LI);
21621
21622 // Only deinterleave2 supported at present.
21623 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21624 return false;
21625
21626 unsigned Factor = 2;
21627
21628 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21629 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21630
21631 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21633 LI->getDataLayout()))
21634 return false;
21635
21636 Function *VlsegNFunc;
21637 Value *VL;
21638 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21640
21641 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21642 VlsegNFunc = Intrinsic::getDeclaration(
21643 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21644 {ResVTy, LI->getPointerOperandType(), XLenTy});
21645 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21646 } else {
21647 static const Intrinsic::ID IntrIds[] = {
21648 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21649 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21650 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21651 Intrinsic::riscv_vlseg8};
21652
21653 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21654 {ResVTy, XLenTy});
21655 VL = Constant::getAllOnesValue(XLenTy);
21656 Ops.append(Factor, PoisonValue::get(ResVTy));
21657 }
21658
21659 Ops.append({LI->getPointerOperand(), VL});
21660
21661 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21662 DI->replaceAllUsesWith(Vlseg);
21663
21664 return true;
21665}
21666
21668 StoreInst *SI) const {
21669 assert(SI->isSimple());
21670 IRBuilder<> Builder(SI);
21671
21672 // Only interleave2 supported at present.
21673 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21674 return false;
21675
21676 unsigned Factor = 2;
21677
21678 VectorType *VTy = cast<VectorType>(II->getType());
21679 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21680
21681 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21682 SI->getPointerAddressSpace(),
21683 SI->getDataLayout()))
21684 return false;
21685
21686 Function *VssegNFunc;
21687 Value *VL;
21688 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21689
21690 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21691 VssegNFunc = Intrinsic::getDeclaration(
21692 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21693 {InVTy, SI->getPointerOperandType(), XLenTy});
21694 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21695 } else {
21696 static const Intrinsic::ID IntrIds[] = {
21697 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21698 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21699 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21700 Intrinsic::riscv_vsseg8};
21701
21702 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21703 {InVTy, XLenTy});
21704 VL = Constant::getAllOnesValue(XLenTy);
21705 }
21706
21707 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21708 SI->getPointerOperand(), VL});
21709
21710 return true;
21711}
21712
21716 const TargetInstrInfo *TII) const {
21717 assert(MBBI->isCall() && MBBI->getCFIType() &&
21718 "Invalid call instruction for a KCFI check");
21719 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21720 MBBI->getOpcode()));
21721
21722 MachineOperand &Target = MBBI->getOperand(0);
21723 Target.setIsRenamable(false);
21724
21725 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21726 .addReg(Target.getReg())
21727 .addImm(MBBI->getCFIType())
21728 .getInstr();
21729}
21730
21731#define GET_REGISTER_MATCHER
21732#include "RISCVGenAsmMatcher.inc"
21733
21736 const MachineFunction &MF) const {
21738 if (Reg == RISCV::NoRegister)
21740 if (Reg == RISCV::NoRegister)
21742 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21743 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21744 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21745 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21746 StringRef(RegName) + "\"."));
21747 return Reg;
21748}
21749
21752 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21753
21754 if (NontemporalInfo == nullptr)
21756
21757 // 1 for default value work as __RISCV_NTLH_ALL
21758 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21759 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21760 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21761 // 5 -> __RISCV_NTLH_ALL
21762 int NontemporalLevel = 5;
21763 const MDNode *RISCVNontemporalInfo =
21764 I.getMetadata("riscv-nontemporal-domain");
21765 if (RISCVNontemporalInfo != nullptr)
21766 NontemporalLevel =
21767 cast<ConstantInt>(
21768 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21769 ->getValue())
21770 ->getZExtValue();
21771
21772 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21773 "RISC-V target doesn't support this non-temporal domain.");
21774
21775 NontemporalLevel -= 2;
21777 if (NontemporalLevel & 0b1)
21778 Flags |= MONontemporalBit0;
21779 if (NontemporalLevel & 0b10)
21780 Flags |= MONontemporalBit1;
21781
21782 return Flags;
21783}
21784
21787
21788 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21790 TargetFlags |= (NodeFlags & MONontemporalBit0);
21791 TargetFlags |= (NodeFlags & MONontemporalBit1);
21792 return TargetFlags;
21793}
21794
21796 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21797 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21798}
21799
21801 if (VT.isScalableVector())
21802 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21803 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21804 return true;
21805 return Subtarget.hasStdExtZbb() &&
21806 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21807}
21808
21810 ISD::CondCode Cond) const {
21811 return isCtpopFast(VT) ? 0 : 1;
21812}
21813
21815
21816 // GISel support is in progress or complete for these opcodes.
21817 unsigned Op = Inst.getOpcode();
21818 if (Op == Instruction::Add || Op == Instruction::Sub ||
21819 Op == Instruction::And || Op == Instruction::Or ||
21820 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21821 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21822 Op == Instruction::Freeze || Op == Instruction::Store)
21823 return false;
21824
21825 if (Inst.getType()->isScalableTy())
21826 return true;
21827
21828 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21829 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21830 !isa<ReturnInst>(&Inst))
21831 return true;
21832
21833 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21834 if (AI->getAllocatedType()->isScalableTy())
21835 return true;
21836 }
21837
21838 return false;
21839}
21840
21841SDValue
21842RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21843 SelectionDAG &DAG,
21844 SmallVectorImpl<SDNode *> &Created) const {
21846 if (isIntDivCheap(N->getValueType(0), Attr))
21847 return SDValue(N, 0); // Lower SDIV as SDIV
21848
21849 // Only perform this transform if short forward branch opt is supported.
21850 if (!Subtarget.hasShortForwardBranchOpt())
21851 return SDValue();
21852 EVT VT = N->getValueType(0);
21853 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21854 return SDValue();
21855
21856 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21857 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21858 return SDValue();
21859 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21860}
21861
21862bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21863 EVT VT, const APInt &AndMask) const {
21864 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21865 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21867}
21868
21869unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21870 return Subtarget.getMinimumJumpTableEntries();
21871}
21872
21873// Handle single arg such as return value.
21874template <typename Arg>
21875void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21876 // This lambda determines whether an array of types are constructed by
21877 // homogeneous vector types.
21878 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21879 // First, extract the first element in the argument type.
21880 auto It = ArgList.begin();
21881 MVT FirstArgRegType = It->VT;
21882
21883 // Return if there is no return or the type needs split.
21884 if (It == ArgList.end() || It->Flags.isSplit())
21885 return false;
21886
21887 ++It;
21888
21889 // Return if this argument type contains only 1 element, or it's not a
21890 // vector type.
21891 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21892 return false;
21893
21894 // Second, check if the following elements in this argument type are all the
21895 // same.
21896 for (; It != ArgList.end(); ++It)
21897 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21898 return false;
21899
21900 return true;
21901 };
21902
21903 if (isHomogeneousScalableVectorType(ArgList)) {
21904 // Handle as tuple type
21905 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21906 } else {
21907 // Handle as normal vector type
21908 bool FirstVMaskAssigned = false;
21909 for (const auto &OutArg : ArgList) {
21910 MVT RegisterVT = OutArg.VT;
21911
21912 // Skip non-RVV register type
21913 if (!RegisterVT.isVector())
21914 continue;
21915
21916 if (RegisterVT.isFixedLengthVector())
21917 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21918
21919 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21920 RVVArgInfos.push_back({1, RegisterVT, true});
21921 FirstVMaskAssigned = true;
21922 continue;
21923 }
21924
21925 RVVArgInfos.push_back({1, RegisterVT, false});
21926 }
21927 }
21928}
21929
21930// Handle multiple args.
21931template <>
21932void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21933 const DataLayout &DL = MF->getDataLayout();
21934 const Function &F = MF->getFunction();
21935 LLVMContext &Context = F.getContext();
21936
21937 bool FirstVMaskAssigned = false;
21938 for (Type *Ty : TypeList) {
21939 StructType *STy = dyn_cast<StructType>(Ty);
21940 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21941 Type *ElemTy = STy->getTypeAtIndex(0U);
21942 EVT VT = TLI->getValueType(DL, ElemTy);
21943 MVT RegisterVT =
21944 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21945 unsigned NumRegs =
21946 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21947
21948 RVVArgInfos.push_back(
21949 {NumRegs * STy->getNumElements(), RegisterVT, false});
21950 } else {
21951 SmallVector<EVT, 4> ValueVTs;
21952 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21953
21954 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21955 ++Value) {
21956 EVT VT = ValueVTs[Value];
21957 MVT RegisterVT =
21958 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21959 unsigned NumRegs =
21960 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21961
21962 // Skip non-RVV register type
21963 if (!RegisterVT.isVector())
21964 continue;
21965
21966 if (RegisterVT.isFixedLengthVector())
21967 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21968
21969 if (!FirstVMaskAssigned &&
21970 RegisterVT.getVectorElementType() == MVT::i1) {
21971 RVVArgInfos.push_back({1, RegisterVT, true});
21972 FirstVMaskAssigned = true;
21973 --NumRegs;
21974 }
21975
21976 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21977 }
21978 }
21979 }
21980}
21981
21982void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21983 unsigned StartReg) {
21984 assert((StartReg % LMul) == 0 &&
21985 "Start register number should be multiple of lmul");
21986 const MCPhysReg *VRArrays;
21987 switch (LMul) {
21988 default:
21989 report_fatal_error("Invalid lmul");
21990 case 1:
21991 VRArrays = ArgVRs;
21992 break;
21993 case 2:
21994 VRArrays = ArgVRM2s;
21995 break;
21996 case 4:
21997 VRArrays = ArgVRM4s;
21998 break;
21999 case 8:
22000 VRArrays = ArgVRM8s;
22001 break;
22002 }
22003
22004 for (unsigned i = 0; i < NF; ++i)
22005 if (StartReg)
22006 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
22007 else
22008 AllocatedPhysRegs.push_back(MCPhysReg());
22009}
22010
22011/// This function determines if each RVV argument is passed by register, if the
22012/// argument can be assigned to a VR, then give it a specific register.
22013/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
22014void RVVArgDispatcher::compute() {
22015 uint32_t AssignedMap = 0;
22016 auto allocate = [&](const RVVArgInfo &ArgInfo) {
22017 // Allocate first vector mask argument to V0.
22018 if (ArgInfo.FirstVMask) {
22019 AllocatedPhysRegs.push_back(RISCV::V0);
22020 return;
22021 }
22022
22023 unsigned RegsNeeded = divideCeil(
22024 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
22025 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
22026 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
22027 StartReg += RegsNeeded) {
22028 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
22029 if ((AssignedMap & Map) == 0) {
22030 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
22031 AssignedMap |= Map;
22032 return;
22033 }
22034 }
22035
22036 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
22037 };
22038
22039 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
22040 allocate(RVVArgInfos[i]);
22041}
22042
22044 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
22045 return AllocatedPhysRegs[CurIdx++];
22046}
22047
22050 int JTI,
22051 SelectionDAG &DAG) const {
22052 if (Subtarget.hasStdExtZicfilp()) {
22053 // When Zicfilp enabled, we need to use software guarded branch for jump
22054 // table branch.
22055 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
22056 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
22057 Addr);
22058 }
22059 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22060}
22061
22063
22064#define GET_RISCVVIntrinsicsTable_IMPL
22065#include "RISCVGenSearchableTables.inc"
22066
22067} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1237
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1229
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1009
Class for arbitrary precision integers.
Definition: APInt.h:77
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:208
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1365
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1180
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1161
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:188
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:308
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1376
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1597
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:414
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:198
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1490
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isMask(unsigned numBits) const
Definition: APInt.h:467
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:313
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1236
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:419
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:285
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1109
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:275
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1368
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:265
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:494
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:695
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:808
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:707
@ Add
*p = old + v
Definition: Instructions.h:711
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:725
@ Or
*p = old | v
Definition: Instructions.h:719
@ Sub
*p = old - v
Definition: Instructions.h:713
@ And
*p = old & v
Definition: Instructions.h:715
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:747
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:723
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:729
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:727
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:751
@ Nand
*p = ~(old & v)
Definition: Instructions.h:717
bool isFloatingPointOperation() const
Definition: Instructions.h:863
BinOp getOperation() const
Definition: Instructions.h:786
Value * getValOperand()
Definition: Instructions.h:855
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:828
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
iterator_range< arg_iterator > args()
Definition: Function.h:855
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:716
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Argument * getArg(unsigned i) const
Definition: Function.h:849
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1877
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2514
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1832
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2031
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:524
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:172
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:529
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1747
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1342
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:495
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2492
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1852
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2005
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:514
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:173
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:258
Value * getPointerOperand()
Definition: Instructions.h:252
bool isSimple() const
Definition: Instructions.h:244
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:208
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:390
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:289
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1147
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1143
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:737
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:484
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1391
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1290
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:567
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:728
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1176
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1292
@ STRICT_FCEIL
Definition: ISDOpcodes.h:434
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1293
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:491
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:804
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:551
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1376
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1380
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:702
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1249
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1254
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1390
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:485
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:927
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1288
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1289
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1431
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:899
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:670
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:458
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:628
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1209
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1373
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:736
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1242
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1377
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1009
@ STRICT_LROUND
Definition: ISDOpcodes.h:439
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:944
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1098
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1291
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1077
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:594
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:654
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:741
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1392
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1172
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:438
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1385
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:894
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:659
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:608
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1286
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1232
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:870
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:756
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1350
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1269
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1294
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:986
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1062
@ STRICT_LRINT
Definition: ISDOpcodes.h:441
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:682
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:599
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:902
@ STRICT_FROUND
Definition: ISDOpcodes.h:436
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:750
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:457
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1393
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:435
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:437
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:936
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1284
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:451
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:473
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:450
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1005
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1285
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1203
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:478
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1229
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:665
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:644
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:442
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:620
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1283
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ STRICT_LLROUND
Definition: ISDOpcodes.h:440
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:431
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:869
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1381
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1167
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1091
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:777
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:501
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:430
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:588
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:523
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1516
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1516
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1437
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1599
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:431
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1522
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:509
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:317
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1042
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1002
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)